megfile 3.1.6__py3-none-any.whl → 4.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. megfile/cli.py +12 -7
  2. megfile/config.py +34 -44
  3. megfile/fs.py +169 -11
  4. megfile/fs_path.py +183 -259
  5. megfile/hdfs.py +106 -5
  6. megfile/hdfs_path.py +34 -90
  7. megfile/http.py +50 -1
  8. megfile/http_path.py +27 -65
  9. megfile/interfaces.py +1 -8
  10. megfile/lib/base_prefetch_reader.py +62 -78
  11. megfile/lib/combine_reader.py +5 -0
  12. megfile/lib/glob.py +3 -6
  13. megfile/lib/hdfs_prefetch_reader.py +7 -7
  14. megfile/lib/http_prefetch_reader.py +6 -6
  15. megfile/lib/s3_buffered_writer.py +67 -64
  16. megfile/lib/s3_cached_handler.py +1 -2
  17. megfile/lib/s3_limited_seekable_writer.py +3 -7
  18. megfile/lib/s3_memory_handler.py +1 -2
  19. megfile/lib/s3_pipe_handler.py +1 -2
  20. megfile/lib/s3_prefetch_reader.py +15 -20
  21. megfile/lib/s3_share_cache_reader.py +8 -5
  22. megfile/pathlike.py +397 -401
  23. megfile/s3.py +118 -17
  24. megfile/s3_path.py +150 -224
  25. megfile/sftp.py +300 -10
  26. megfile/sftp_path.py +46 -322
  27. megfile/smart.py +33 -27
  28. megfile/smart_path.py +9 -14
  29. megfile/stdio.py +1 -1
  30. megfile/stdio_path.py +2 -2
  31. megfile/utils/__init__.py +11 -4
  32. megfile/version.py +1 -1
  33. {megfile-3.1.6.dist-info → megfile-4.0.0.dist-info}/METADATA +7 -7
  34. megfile-4.0.0.dist-info/RECORD +52 -0
  35. {megfile-3.1.6.dist-info → megfile-4.0.0.dist-info}/WHEEL +1 -1
  36. {megfile-3.1.6.dist-info → megfile-4.0.0.dist-info}/top_level.txt +0 -2
  37. docs/conf.py +0 -65
  38. megfile-3.1.6.dist-info/RECORD +0 -55
  39. scripts/convert_results_to_sarif.py +0 -91
  40. scripts/generate_file.py +0 -344
  41. {megfile-3.1.6.dist-info → megfile-4.0.0.dist-info}/LICENSE +0 -0
  42. {megfile-3.1.6.dist-info → megfile-4.0.0.dist-info}/LICENSE.pyre +0 -0
  43. {megfile-3.1.6.dist-info → megfile-4.0.0.dist-info}/entry_points.txt +0 -0
scripts/generate_file.py DELETED
@@ -1,344 +0,0 @@
1
- import importlib
2
- import re
3
-
4
- ALL_IGNORE_FUNC_LIST = dict(
5
- s3=[
6
- "open",
7
- "readlink",
8
- "iterdir",
9
- "is_mount",
10
- "is_socket",
11
- "is_fifo",
12
- "is_block_device",
13
- "is_char_device",
14
- "owner",
15
- "absolute",
16
- "rmdir",
17
- "glob",
18
- "iglob",
19
- "glob_stat",
20
- "rename",
21
- "cwd",
22
- "mkdir",
23
- "parts",
24
- "path_without_protocol",
25
- "path_with_protocol",
26
- ],
27
- fs=[
28
- "open",
29
- "from_uri",
30
- "path_with_protocol",
31
- "joinpath",
32
- "readlink",
33
- "iterdir",
34
- "chmod",
35
- "group",
36
- "is_socket",
37
- "is_fifo",
38
- "is_block_device",
39
- "is_char_device",
40
- "rmdir",
41
- "owner",
42
- "absolute",
43
- "resolve",
44
- "cwd",
45
- "home",
46
- "glob",
47
- "iglob",
48
- "glob_stat",
49
- "rename",
50
- "parts",
51
- "root",
52
- "anchor",
53
- "drive",
54
- "replace",
55
- "hardlink_to",
56
- "mkdir",
57
- "utime",
58
- ],
59
- http=["open"],
60
- sftp=[
61
- "path_without_protocol",
62
- "expanduser",
63
- "iterdir",
64
- "readlink",
65
- "cwd",
66
- "glob",
67
- "iglob",
68
- "glob_stat",
69
- "resolve",
70
- "relpath",
71
- "utime",
72
- "parts",
73
- ],
74
- hdfs=[
75
- "iterdir",
76
- "absolute",
77
- "rmdir",
78
- "glob",
79
- "iglob",
80
- "glob_stat",
81
- "rename",
82
- "mkdir",
83
- "path_without_protocol",
84
- "path_with_protocol",
85
- "parts",
86
- ],
87
- )
88
-
89
- ALL_IMPORT_LINES = dict(
90
- s3=[
91
- "from typing import BinaryIO, Callable, Iterator, List, Optional, Tuple",
92
- "from megfile.interfaces import Access, FileEntry, PathLike, StatResult",
93
- ],
94
- fs=[
95
- "from typing import BinaryIO, Callable, Iterator, List, Optional, Tuple",
96
- "from megfile.interfaces import Access, FileEntry, PathLike, StatResult",
97
- ],
98
- http=["from megfile.interfaces import PathLike, StatResult"],
99
- stdio=[
100
- "from typing import IO, AnyStr, Optional",
101
- "from megfile.interfaces import PathLike",
102
- ],
103
- sftp=[
104
- "from typing import IO, AnyStr, BinaryIO, Iterator, List, Tuple, "
105
- "Callable, Optional",
106
- "from megfile.interfaces import FileEntry, PathLike, StatResult",
107
- ],
108
- hdfs=[
109
- "from typing import IO, AnyStr, BinaryIO, Iterator, List, Optional, Tuple",
110
- "from megfile.interfaces import FileEntry, PathLike, StatResult",
111
- ],
112
- )
113
-
114
- ALL_FUNC_NAME_MAPPING = dict(
115
- s3=dict(
116
- is_dir="isdir",
117
- is_file="isfile",
118
- load="load_from",
119
- mkdir="makedirs",
120
- md5="getmd5",
121
- symlink_to="symlink",
122
- is_symlink="islink",
123
- save="save_as",
124
- ),
125
- fs=dict(
126
- is_dir="isdir",
127
- is_file="isfile",
128
- md5="getmd5",
129
- load="load_from",
130
- mkdir="makedirs",
131
- symlink_to="symlink",
132
- is_symlink="islink",
133
- is_mount="ismount",
134
- save="save_as",
135
- joinpath="path_join",
136
- is_absolute="isabs",
137
- replace="move",
138
- ),
139
- http=dict(),
140
- sftp=dict(
141
- is_dir="isdir",
142
- is_file="isfile",
143
- md5="getmd5",
144
- load="load_from",
145
- mkdir="makedirs",
146
- symlink_to="symlink",
147
- is_symlink="islink",
148
- save="save_as",
149
- is_absolute="isabs",
150
- replace="move",
151
- ),
152
- hdfs=dict(
153
- is_dir="isdir",
154
- is_file="isfile",
155
- load="load_from",
156
- mkdir="makedirs",
157
- md5="getmd5",
158
- symlink_to="symlink",
159
- is_symlink="islink",
160
- save="save_as",
161
- ),
162
- )
163
- PARAMETER_PATTERN = re.compile(r"\[[^:]*\]")
164
-
165
-
166
- def get_class_name(current_file_type: str):
167
- if current_file_type == "fs":
168
- return "FSPath"
169
- return f"{current_file_type.capitalize()}Path"
170
-
171
-
172
- def insert_class_method_lines(
173
- func_params: list, annotation_lines: list, current_file_type: str
174
- ):
175
- ignore_func_list = ALL_IGNORE_FUNC_LIST.get(current_file_type, [])
176
- func_name_mapping = ALL_FUNC_NAME_MAPPING.get(current_file_type, {})
177
-
178
- real_func_name, func_content_lines = None, []
179
- if func_params:
180
- func_first_line = "".join(func_params)
181
- path_param_name = "path"
182
- current_params_line = PARAMETER_PATTERN.sub(
183
- "", func_first_line.split("(", maxsplit=1)[1].split(")", maxsplit=1)[0]
184
- )
185
- current_params = []
186
- kwargs_mode = False
187
- for params_words in current_params_line.split(","):
188
- if ":" in params_words:
189
- param = params_words.split(":", maxsplit=1)[0].strip()
190
- elif "=" in params_words:
191
- param = params_words.split("=", maxsplit=1)[0].strip()
192
- else:
193
- param = params_words.strip()
194
- if param == "*":
195
- kwargs_mode = True
196
- continue
197
-
198
- if param and param != "**kwargs":
199
- if "dst" in param:
200
- path_param_name = param.replace("dst", "src")
201
- if kwargs_mode:
202
- param = f"{param}={param}"
203
- current_params.append(param)
204
- func_name = (
205
- func_first_line.strip()
206
- .split("def ", maxsplit=1)[1]
207
- .split("(", maxsplit=1)[0]
208
- )
209
- if func_name == "save":
210
- func_first_line = func_first_line.replace(
211
- "self", f"{path_param_name}: PathLike"
212
- )
213
- special_order_params = [
214
- param.strip()
215
- for param in func_first_line.split("(", 1)[1]
216
- .split(")", 1)[0]
217
- .split(",")
218
- ]
219
- special_order_params[0], special_order_params[1] = (
220
- special_order_params[1],
221
- special_order_params[0],
222
- )
223
- func_first_line = "".join(
224
- [
225
- func_first_line.split("(", 1)[0],
226
- "(",
227
- ", ".join(special_order_params),
228
- ")",
229
- func_first_line.split(")", 1)[1],
230
- ]
231
- )
232
- else:
233
- func_first_line = func_first_line.replace(
234
- "self", f"{path_param_name}: PathLike"
235
- )
236
-
237
- if not func_name.startswith("_") and func_name not in ignore_func_list:
238
- real_func_name = (
239
- f"{current_file_type}_{func_name_mapping.get(func_name, func_name)}"
240
- )
241
- func_content_lines.append(
242
- func_first_line.replace(func_name, real_func_name).replace(
243
- ", **kwargs", ""
244
- )
245
- )
246
-
247
- insert_log = False
248
- for annotation_line in annotation_lines:
249
- if insert_log is False and annotation_line.strip().startswith(":"):
250
- func_content_lines.append(
251
- f" :param {path_param_name}: Given path"
252
- )
253
- insert_log = True
254
- func_content_lines.append(annotation_line)
255
-
256
- class_name = get_class_name(current_file_type)
257
- if class_name == "StdioPath":
258
- func_content_lines.append(
259
- f" return {class_name}({path_param_name}).{func_name}({', '.join(current_params[1:])}) # pyre-ignore[6]\n\n" # noqa: E501
260
- )
261
- else:
262
- func_content_lines.append(
263
- f" return {class_name}({path_param_name}).{func_name}({', '.join(current_params[1:])})\n\n" # noqa: E501
264
- )
265
- return real_func_name, func_content_lines
266
-
267
-
268
- def get_methods_from_path_file(current_file_type: str):
269
- all_func_list = importlib.import_module(f"megfile.{current_file_type}_path").__all__
270
- methods_content = []
271
- import_lines = ALL_IMPORT_LINES.get(current_file_type, [])
272
- import_lines.append(
273
- f"from megfile.{current_file_type}_path import {', '.join(all_func_list)}"
274
- )
275
- with open(f"megfile/{current_file_type}_path.py", "r") as f:
276
- class_start = False
277
- func_start = False
278
- func_params = []
279
- annotation_lines = []
280
- annotation_start = False
281
- for line in f.readlines():
282
- if line.strip().startswith(f"class {get_class_name(current_file_type)}("):
283
- class_start = True
284
- elif class_start is True:
285
- if line.strip() and not line.startswith(" " * 4):
286
- break
287
- elif func_start is True:
288
- if line.rsplit("#", maxsplit=1)[0].strip().endswith(":"):
289
- func_start = False
290
- func_params.append(line.strip())
291
- elif "'''" in line or '"""' in line:
292
- if line.count("'''") <= 1 and line.count('"""') <= 1:
293
- annotation_start = not annotation_start
294
- annotation_lines.append(line[4:].rstrip())
295
- elif annotation_start is True:
296
- annotation_lines.append(line[4:].rstrip())
297
- elif line.startswith(" def"):
298
- if line.rsplit("#", maxsplit=1)[0].strip().endswith(":"):
299
- func_start = False
300
- else:
301
- func_start = True
302
- func_name, func_content_lines = insert_class_method_lines(
303
- func_params, annotation_lines, current_file_type
304
- )
305
- if func_name:
306
- all_func_list.append(func_name)
307
- if func_content_lines:
308
- methods_content.extend(func_content_lines)
309
- func_params = [line.strip()]
310
- annotation_lines = []
311
- func_name, func_content_lines = insert_class_method_lines(
312
- func_params, annotation_lines, current_file_type
313
- )
314
- if func_name:
315
- all_func_list.append(func_name)
316
- if func_content_lines:
317
- methods_content.extend(func_content_lines)
318
- return import_lines, all_func_list, methods_content
319
-
320
-
321
- def generate_file(current_file_type: str):
322
- current_class_name = get_class_name(current_file_type)
323
- import_lines, all_func_list, methods_content = get_methods_from_path_file(
324
- current_file_type
325
- )
326
- with open(f"megfile/{current_file_type}.py", "w") as f:
327
- for line in import_lines:
328
- f.write("\n")
329
- f.write(line)
330
-
331
- f.write("\n\n__all__ = [\n")
332
- for func_name in all_func_list:
333
- if func_name != current_class_name and func_name != "HttpsPath":
334
- f.write(f" '{func_name}',\n")
335
- f.write("]\n\n")
336
-
337
- for line in methods_content:
338
- f.write("\n")
339
- f.write(line)
340
-
341
-
342
- if __name__ == "__main__":
343
- for t in ["s3", "fs", "http", "stdio", "sftp", "hdfs"]:
344
- generate_file(t)