megfile 3.1.1__py3-none-any.whl → 3.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docs/conf.py +2 -4
- megfile/__init__.py +394 -203
- megfile/cli.py +258 -238
- megfile/config.py +25 -21
- megfile/errors.py +124 -114
- megfile/fs.py +174 -140
- megfile/fs_path.py +462 -354
- megfile/hdfs.py +133 -101
- megfile/hdfs_path.py +290 -236
- megfile/http.py +15 -14
- megfile/http_path.py +111 -107
- megfile/interfaces.py +70 -65
- megfile/lib/base_prefetch_reader.py +84 -65
- megfile/lib/combine_reader.py +12 -12
- megfile/lib/compare.py +17 -13
- megfile/lib/compat.py +1 -5
- megfile/lib/fnmatch.py +29 -30
- megfile/lib/glob.py +46 -54
- megfile/lib/hdfs_prefetch_reader.py +40 -25
- megfile/lib/hdfs_tools.py +1 -3
- megfile/lib/http_prefetch_reader.py +69 -46
- megfile/lib/joinpath.py +5 -5
- megfile/lib/lazy_handler.py +7 -3
- megfile/lib/s3_buffered_writer.py +58 -51
- megfile/lib/s3_cached_handler.py +13 -14
- megfile/lib/s3_limited_seekable_writer.py +37 -28
- megfile/lib/s3_memory_handler.py +34 -30
- megfile/lib/s3_pipe_handler.py +24 -25
- megfile/lib/s3_prefetch_reader.py +71 -52
- megfile/lib/s3_share_cache_reader.py +37 -24
- megfile/lib/shadow_handler.py +7 -3
- megfile/lib/stdio_handler.py +9 -8
- megfile/lib/url.py +3 -3
- megfile/pathlike.py +259 -228
- megfile/s3.py +220 -153
- megfile/s3_path.py +977 -802
- megfile/sftp.py +190 -156
- megfile/sftp_path.py +540 -450
- megfile/smart.py +397 -330
- megfile/smart_path.py +100 -105
- megfile/stdio.py +10 -9
- megfile/stdio_path.py +32 -35
- megfile/utils/__init__.py +73 -54
- megfile/utils/mutex.py +11 -14
- megfile/version.py +1 -1
- {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/METADATA +5 -8
- megfile-3.1.2.dist-info/RECORD +55 -0
- {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/WHEEL +1 -1
- scripts/convert_results_to_sarif.py +45 -78
- scripts/generate_file.py +140 -64
- megfile-3.1.1.dist-info/RECORD +0 -55
- {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/LICENSE +0 -0
- {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/LICENSE.pyre +0 -0
- {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/entry_points.txt +0 -0
- {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/top_level.txt +0 -0
megfile/utils/__init__.py
CHANGED
|
@@ -5,17 +5,26 @@ import os
|
|
|
5
5
|
import uuid
|
|
6
6
|
from copy import copy
|
|
7
7
|
from functools import wraps
|
|
8
|
-
from io import
|
|
8
|
+
from io import (
|
|
9
|
+
BufferedIOBase,
|
|
10
|
+
BufferedRandom,
|
|
11
|
+
BufferedReader,
|
|
12
|
+
BufferedWriter,
|
|
13
|
+
BytesIO,
|
|
14
|
+
StringIO,
|
|
15
|
+
TextIOBase,
|
|
16
|
+
TextIOWrapper,
|
|
17
|
+
)
|
|
9
18
|
from typing import IO, Callable, Optional
|
|
10
19
|
|
|
11
20
|
from megfile.utils.mutex import ProcessLocal, ThreadLocal
|
|
12
21
|
|
|
13
22
|
|
|
14
23
|
def get_content_size(fileobj: IO, *, intrusive: bool = False) -> int:
|
|
15
|
-
|
|
24
|
+
"""Get size of File-Like Object
|
|
16
25
|
|
|
17
26
|
The File-Like Object must be seekable, otherwise raise IOError
|
|
18
|
-
|
|
27
|
+
"""
|
|
19
28
|
if isinstance(fileobj, (BytesIO, StringIO)):
|
|
20
29
|
return len(fileobj.getvalue())
|
|
21
30
|
|
|
@@ -25,8 +34,8 @@ def get_content_size(fileobj: IO, *, intrusive: bool = False) -> int:
|
|
|
25
34
|
file = file.buffer
|
|
26
35
|
if isinstance(file, BufferedIOBase):
|
|
27
36
|
file = file.raw
|
|
28
|
-
if hasattr(file,
|
|
29
|
-
return getattr(file,
|
|
37
|
+
if hasattr(file, "_content_size"):
|
|
38
|
+
return getattr(file, "_content_size") # pyre-ignore[16]
|
|
30
39
|
|
|
31
40
|
offset = fileobj.tell()
|
|
32
41
|
if not is_seekable(fileobj) and is_writable(fileobj):
|
|
@@ -40,41 +49,41 @@ def get_content_size(fileobj: IO, *, intrusive: bool = False) -> int:
|
|
|
40
49
|
|
|
41
50
|
|
|
42
51
|
def is_seekable(fileobj: IO) -> bool:
|
|
43
|
-
|
|
44
|
-
if hasattr(fileobj,
|
|
52
|
+
"""Test if File-Like Object is seekable"""
|
|
53
|
+
if hasattr(fileobj, "seekable"):
|
|
45
54
|
try:
|
|
46
55
|
return fileobj.seekable()
|
|
47
56
|
except Exception:
|
|
48
57
|
return False
|
|
49
|
-
return hasattr(fileobj,
|
|
58
|
+
return hasattr(fileobj, "seek")
|
|
50
59
|
|
|
51
60
|
|
|
52
61
|
def is_readable(fileobj: IO) -> bool:
|
|
53
|
-
|
|
54
|
-
if hasattr(fileobj,
|
|
62
|
+
"""Test if File-Like Object is readable"""
|
|
63
|
+
if hasattr(fileobj, "readable"):
|
|
55
64
|
try:
|
|
56
65
|
return fileobj.readable()
|
|
57
66
|
except Exception:
|
|
58
67
|
return False
|
|
59
|
-
return hasattr(fileobj,
|
|
68
|
+
return hasattr(fileobj, "read")
|
|
60
69
|
|
|
61
70
|
|
|
62
71
|
def is_writable(fileobj: IO) -> bool:
|
|
63
|
-
|
|
64
|
-
if hasattr(fileobj,
|
|
72
|
+
"""Test if File-Like Object is writable"""
|
|
73
|
+
if hasattr(fileobj, "writable"):
|
|
65
74
|
try:
|
|
66
75
|
return fileobj.writable()
|
|
67
76
|
except Exception:
|
|
68
77
|
return False
|
|
69
|
-
return hasattr(fileobj,
|
|
78
|
+
return hasattr(fileobj, "write")
|
|
70
79
|
|
|
71
80
|
|
|
72
81
|
def _is_pickle(fileobj) -> bool:
|
|
73
|
-
|
|
74
|
-
if fileobj.name.endswith(
|
|
82
|
+
"""Test if File Object is pickle"""
|
|
83
|
+
if fileobj.name.endswith(".pkl") or fileobj.name.endswith(".pickle"):
|
|
75
84
|
return True
|
|
76
85
|
|
|
77
|
-
if
|
|
86
|
+
if "r" in fileobj.mode and "b" in fileobj.mode:
|
|
78
87
|
offset = fileobj.tell()
|
|
79
88
|
fileobj.seek(0)
|
|
80
89
|
data = fileobj.read(2)
|
|
@@ -90,29 +99,32 @@ def get_content_offset(start: Optional[int], stop: Optional[int], size: int):
|
|
|
90
99
|
if stop is None or stop < 0 or start < 0:
|
|
91
100
|
start, stop, _ = slice(start, stop).indices(size)
|
|
92
101
|
if stop < start:
|
|
93
|
-
raise ValueError(
|
|
102
|
+
raise ValueError("read length must be positive")
|
|
94
103
|
return start, stop
|
|
95
104
|
|
|
96
105
|
|
|
97
106
|
def get_name(fileobj, default=None):
|
|
98
|
-
return getattr(fileobj,
|
|
107
|
+
return getattr(fileobj, "name", default or repr(fileobj))
|
|
99
108
|
|
|
100
109
|
|
|
101
|
-
def get_mode(fileobj, default=
|
|
110
|
+
def get_mode(fileobj, default="r"):
|
|
102
111
|
if isinstance(fileobj, BytesIO):
|
|
103
|
-
return
|
|
112
|
+
return "rb+"
|
|
104
113
|
elif isinstance(fileobj, StringIO):
|
|
105
|
-
return
|
|
106
|
-
return getattr(fileobj,
|
|
114
|
+
return "r+"
|
|
115
|
+
return getattr(fileobj, "mode", default)
|
|
107
116
|
|
|
108
117
|
|
|
109
118
|
def shadow_copy(fileobj: IO, intrusive: bool = True, buffered: bool = False):
|
|
110
|
-
|
|
119
|
+
"""Create a File-Like Object, maintaining file pointer,
|
|
120
|
+
to avoid misunderstanding the position when read / write / seek.
|
|
111
121
|
|
|
112
|
-
:param intrusive: If is intrusive. If True, move file pointer to the original
|
|
122
|
+
:param intrusive: If is intrusive. If True, move file pointer to the original
|
|
123
|
+
position after every read / write / seek. If False, then not.
|
|
113
124
|
:param verbose: If True, print log when read / write / seek
|
|
114
|
-
|
|
125
|
+
"""
|
|
115
126
|
from megfile.lib.shadow_handler import ShadowHandler
|
|
127
|
+
|
|
116
128
|
result = ShadowHandler(fileobj, intrusive=intrusive)
|
|
117
129
|
mode = get_mode(fileobj)
|
|
118
130
|
if "b" in mode and (buffered or _is_pickle(result)):
|
|
@@ -125,22 +137,25 @@ def shadow_copy(fileobj: IO, intrusive: bool = True, buffered: bool = False):
|
|
|
125
137
|
return result
|
|
126
138
|
|
|
127
139
|
|
|
128
|
-
def lazy_open(
|
|
129
|
-
|
|
130
|
-
''' Create a File-Like Object, maintaining file pointer, to open a file in lazy mode
|
|
140
|
+
def lazy_open(path: str, mode: str, open_func: Optional[Callable] = None, **options):
|
|
141
|
+
"""Create a File-Like Object, maintaining file pointer, to open a file in lazy mode
|
|
131
142
|
|
|
132
|
-
:param intrusive: If is intrusive. If True, move file pointer to the original
|
|
143
|
+
:param intrusive: If is intrusive. If True, move file pointer to the original
|
|
144
|
+
position after every read / write / seek. If False, then not.
|
|
133
145
|
:param verbose: If True, print log when read / write / seek
|
|
134
|
-
|
|
146
|
+
"""
|
|
135
147
|
from megfile.lib.lazy_handler import LazyHandler
|
|
148
|
+
|
|
136
149
|
if open_func is None:
|
|
137
150
|
from megfile.smart import smart_open
|
|
151
|
+
|
|
138
152
|
open_func = smart_open
|
|
139
153
|
return LazyHandler(path, mode, open_func=open_func, **options)
|
|
140
154
|
|
|
141
155
|
|
|
142
156
|
def patch_rlimit():
|
|
143
157
|
import resource
|
|
158
|
+
|
|
144
159
|
soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
|
|
145
160
|
resource.setrlimit(resource.RLIMIT_NOFILE, (hard, hard))
|
|
146
161
|
|
|
@@ -151,36 +166,38 @@ process_local = ProcessLocal()
|
|
|
151
166
|
|
|
152
167
|
def combine(file_objects, name):
|
|
153
168
|
from megfile.lib.combine_reader import CombineReader
|
|
169
|
+
|
|
154
170
|
return CombineReader(file_objects, name)
|
|
155
171
|
|
|
156
172
|
|
|
157
173
|
def get_binary_mode(mode: str) -> str:
|
|
158
|
-
|
|
159
|
-
if
|
|
174
|
+
"""Replace mode parameter in open() with corresponding binary mode"""
|
|
175
|
+
if "t" in mode:
|
|
160
176
|
# rt / wt / rt+ => rb / wb / rb+
|
|
161
|
-
mode = mode.replace(
|
|
162
|
-
elif
|
|
177
|
+
mode = mode.replace("t", "b")
|
|
178
|
+
elif "b" not in mode:
|
|
163
179
|
# r / w / r+ => rb / wb / rb+
|
|
164
|
-
mode = mode[:1] +
|
|
180
|
+
mode = mode[:1] + "b" + mode[1:]
|
|
165
181
|
# rb / wb / r+b => rb / wb / rb+
|
|
166
|
-
return "".join(sorted(mode, key=lambda k: {
|
|
182
|
+
return "".join(sorted(mode, key=lambda k: {"b": 1, "+": 2}.get(k, 0)))
|
|
167
183
|
|
|
168
184
|
|
|
169
185
|
def binary_open(open_func):
|
|
170
|
-
|
|
186
|
+
"""
|
|
171
187
|
Decorator:
|
|
172
188
|
Output according to user-setting mode while calling Open
|
|
173
|
-
|
|
189
|
+
"""
|
|
174
190
|
|
|
175
191
|
@wraps(open_func)
|
|
176
192
|
def wrapper(
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
193
|
+
path,
|
|
194
|
+
mode: str = "rb",
|
|
195
|
+
encoding: Optional[str] = None,
|
|
196
|
+
errors: Optional[str] = None,
|
|
197
|
+
**kwargs,
|
|
198
|
+
):
|
|
182
199
|
fileobj = open_func(path, get_binary_mode(mode), **kwargs)
|
|
183
|
-
if
|
|
200
|
+
if "b" not in mode:
|
|
184
201
|
fileobj = TextIOWrapper(fileobj, encoding=encoding, errors=errors)
|
|
185
202
|
fileobj.mode = mode # pyre-ignore[41]
|
|
186
203
|
return fileobj
|
|
@@ -189,20 +206,20 @@ def binary_open(open_func):
|
|
|
189
206
|
|
|
190
207
|
|
|
191
208
|
def get_human_size(size_bytes: float) -> str:
|
|
192
|
-
|
|
209
|
+
"""Get human-readable size, e.g. `100MB`"""
|
|
193
210
|
if size_bytes < 0:
|
|
194
211
|
# TODO: replace AssertionError with ValueError in 4.0.0
|
|
195
|
-
raise AssertionError(
|
|
212
|
+
raise AssertionError("negative size: %r" % size_bytes)
|
|
196
213
|
if size_bytes == 0:
|
|
197
|
-
return
|
|
198
|
-
size_name = (
|
|
214
|
+
return "0 B"
|
|
215
|
+
size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
|
|
199
216
|
index = int(math.floor(math.log(size_bytes, 1024)))
|
|
200
217
|
base = math.pow(1024, index)
|
|
201
218
|
if base == 1:
|
|
202
219
|
size = size_bytes
|
|
203
220
|
else:
|
|
204
221
|
size = round(size_bytes / base, 2)
|
|
205
|
-
return
|
|
222
|
+
return "%s %s" % (size, size_name[index])
|
|
206
223
|
|
|
207
224
|
|
|
208
225
|
def necessary_params(func: Callable, **kwargs):
|
|
@@ -222,7 +239,7 @@ def necessary_params(func: Callable, **kwargs):
|
|
|
222
239
|
return res_kwargs
|
|
223
240
|
|
|
224
241
|
|
|
225
|
-
def generate_cache_path(filename: str, cache_dir: str =
|
|
242
|
+
def generate_cache_path(filename: str, cache_dir: str = "/tmp") -> str:
|
|
226
243
|
suffix = os.path.splitext(filename)[1]
|
|
227
244
|
return os.path.join(cache_dir, str(uuid.uuid4()) + suffix)
|
|
228
245
|
|
|
@@ -240,7 +257,7 @@ def _get_class(cls_or_obj) -> type:
|
|
|
240
257
|
|
|
241
258
|
def calculate_md5(file_object):
|
|
242
259
|
hash_md5 = hashlib.md5() # nosec
|
|
243
|
-
for chunk in iter(lambda: file_object.read(4096), b
|
|
260
|
+
for chunk in iter(lambda: file_object.read(4096), b""):
|
|
244
261
|
hash_md5.update(chunk)
|
|
245
262
|
return hash_md5.hexdigest()
|
|
246
263
|
|
|
@@ -266,7 +283,8 @@ class classproperty(property):
|
|
|
266
283
|
def __set__(self, cls_or_obj, value: object) -> None:
|
|
267
284
|
"""
|
|
268
285
|
This method gets called when a property value should be set.
|
|
269
|
-
@param cls_or_obj: The class or instance of which the property should be
|
|
286
|
+
@param cls_or_obj: The class or instance of which the property should be
|
|
287
|
+
changed.
|
|
270
288
|
@param value: The new value.
|
|
271
289
|
"""
|
|
272
290
|
# call this method only on the class, not the instance
|
|
@@ -275,7 +293,8 @@ class classproperty(property):
|
|
|
275
293
|
def __delete__(self, cls_or_obj) -> None:
|
|
276
294
|
"""
|
|
277
295
|
This method gets called when a property should be deleted.
|
|
278
|
-
@param cls_or_obj: The class or instance of which the property should be
|
|
296
|
+
@param cls_or_obj: The class or instance of which the property should be
|
|
297
|
+
deleted.
|
|
279
298
|
"""
|
|
280
299
|
# call this method only on the class, not the instance
|
|
281
300
|
super(classproperty, self).__delete__(_get_class(cls_or_obj))
|
megfile/utils/mutex.py
CHANGED
|
@@ -5,14 +5,10 @@ from threading import RLock
|
|
|
5
5
|
from threading import local as _ThreadLocal
|
|
6
6
|
from typing import Any, Callable, Iterator
|
|
7
7
|
|
|
8
|
-
__all__ = [
|
|
9
|
-
'ThreadLocal',
|
|
10
|
-
'ProcessLocal',
|
|
11
|
-
]
|
|
8
|
+
__all__ = ["ThreadLocal", "ProcessLocal"]
|
|
12
9
|
|
|
13
10
|
|
|
14
11
|
class ForkAware(ABC):
|
|
15
|
-
|
|
16
12
|
def __init__(self):
|
|
17
13
|
self._process_id = os.getpid()
|
|
18
14
|
self._reset()
|
|
@@ -26,7 +22,6 @@ class ForkAware(ABC):
|
|
|
26
22
|
|
|
27
23
|
|
|
28
24
|
def fork_aware(func):
|
|
29
|
-
|
|
30
25
|
@wraps(func)
|
|
31
26
|
def wrapper(self, *args, **kwargs):
|
|
32
27
|
current_pid = os.getpid()
|
|
@@ -39,7 +34,6 @@ def fork_aware(func):
|
|
|
39
34
|
|
|
40
35
|
|
|
41
36
|
class BaseLocal(ABC): # pragma: no cover
|
|
42
|
-
|
|
43
37
|
@property
|
|
44
38
|
@abstractmethod
|
|
45
39
|
def _data(self) -> dict:
|
|
@@ -68,7 +62,6 @@ class BaseLocal(ABC): # pragma: no cover
|
|
|
68
62
|
|
|
69
63
|
|
|
70
64
|
class ThreadLocal(ForkAware, BaseLocal):
|
|
71
|
-
|
|
72
65
|
def _reset(self):
|
|
73
66
|
self._local = _ThreadLocal()
|
|
74
67
|
|
|
@@ -86,9 +79,12 @@ class ThreadLocal(ForkAware, BaseLocal):
|
|
|
86
79
|
|
|
87
80
|
class ProcessLocal(ForkAware, BaseLocal):
|
|
88
81
|
"""
|
|
89
|
-
Provides a basic per-process mapping container that wipes itself if the current PID
|
|
82
|
+
Provides a basic per-process mapping container that wipes itself if the current PID
|
|
83
|
+
changed since the last get/set.
|
|
84
|
+
|
|
90
85
|
Aka `threading.local()`, but for processes instead of threads.
|
|
91
86
|
"""
|
|
87
|
+
|
|
92
88
|
_lock = None
|
|
93
89
|
|
|
94
90
|
def _reset(self):
|
|
@@ -101,8 +97,9 @@ class ProcessLocal(ForkAware, BaseLocal):
|
|
|
101
97
|
return self._local
|
|
102
98
|
|
|
103
99
|
def __call__(self, key: str, func: Callable, *args, **kwargs) -> Any:
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
100
|
+
data = self._data
|
|
101
|
+
if key not in data:
|
|
102
|
+
with self._lock:
|
|
103
|
+
if key not in data:
|
|
104
|
+
data[key] = func(*args, **kwargs)
|
|
105
|
+
return data[key]
|
megfile/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
VERSION = "3.1.
|
|
1
|
+
VERSION = "3.1.2"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: megfile
|
|
3
|
-
Version: 3.1.
|
|
3
|
+
Version: 3.1.2
|
|
4
4
|
Summary: Megvii file operation library
|
|
5
5
|
Author-email: megvii <megfile@megvii.com>
|
|
6
6
|
Project-URL: Homepage, https://github.com/megvii-research/megfile
|
|
@@ -22,15 +22,15 @@ Description-Content-Type: text/markdown
|
|
|
22
22
|
License-File: LICENSE
|
|
23
23
|
License-File: LICENSE.pyre
|
|
24
24
|
Requires-Dist: boto3
|
|
25
|
-
Requires-Dist: botocore
|
|
25
|
+
Requires-Dist: botocore>=1.13.0
|
|
26
26
|
Requires-Dist: requests
|
|
27
27
|
Requires-Dist: paramiko
|
|
28
28
|
Requires-Dist: tqdm
|
|
29
29
|
Requires-Dist: pyyaml
|
|
30
30
|
Provides-Extra: cli
|
|
31
|
-
Requires-Dist: click
|
|
31
|
+
Requires-Dist: click; extra == "cli"
|
|
32
32
|
Provides-Extra: hdfs
|
|
33
|
-
Requires-Dist: hdfs
|
|
33
|
+
Requires-Dist: hdfs; extra == "hdfs"
|
|
34
34
|
|
|
35
35
|
megfile - Megvii FILE library
|
|
36
36
|
---
|
|
@@ -198,12 +198,9 @@ s3 =
|
|
|
198
198
|
|
|
199
199
|
*You can submit code even if the code doesn't meet conditions. The project members will evaluate and assist you in making code changes*
|
|
200
200
|
|
|
201
|
-
* **Code format**: Your code needs to pass **code format check**. `megfile` uses `
|
|
201
|
+
* **Code format**: Your code needs to pass **code format check**. `megfile` uses `ruff` as lint tool
|
|
202
202
|
* **Static check**: Your code needs complete **type hint**. `megfile` uses `pytype` as static check tool. If `pytype` failed in static check, use `# pytype: disable=XXX` to disable the error and please tell us why you disable it.
|
|
203
203
|
|
|
204
|
-
*Note* : Because `pytype` doesn't support variable type annation, the variable type hint format introduced by py36 cannot be used.
|
|
205
|
-
> i.e. `variable: int` is invalid, replace it with `variable # type: int`
|
|
206
|
-
|
|
207
204
|
* **Test**: Your code needs complete **unit test** coverage. `megfile` uses `pyfakefs` and `moto` as local file system and s3 virtual environment in unit tests. The newly added code should have a complete unit test to ensure the correctness
|
|
208
205
|
|
|
209
206
|
* You can help to improve `megfile` in many ways:
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
docs/conf.py,sha256=sfDSly5jO8W_RmuAptOIp4hd8dNcO-9a5XrHTbxFnNo,2448
|
|
2
|
+
megfile/__init__.py,sha256=i2Lbq_VxIgppaqwkxG0_H35dRfcjJ4mCYWjprOf4hHo,7318
|
|
3
|
+
megfile/cli.py,sha256=Z54c98bIBOmljPtSPKPnXmuzsJJ8eLE0pd2l-ImhL1U,22234
|
|
4
|
+
megfile/config.py,sha256=_SkJRaVWUdfW1Q9uX0vao-6YVQKJtfej22Z8DykuRps,2331
|
|
5
|
+
megfile/errors.py,sha256=h6CgdgFG7A39ZgSvso9o5fz2uBZwMkZtBhRgtCXBvPI,13929
|
|
6
|
+
megfile/fs.py,sha256=dgj5fW-EEzQNdjMF2tkB5DjXu3iHQbtLi5PSIMxR8fc,11966
|
|
7
|
+
megfile/fs_path.py,sha256=Ffvukc176beH5aQMZXXtwH6ApwLYXPViCIUP0pijgT0,41590
|
|
8
|
+
megfile/hdfs.py,sha256=latguOuuzAmg-yWOy3Sm723CJ0ybN_eSHRubVNqhcMU,9202
|
|
9
|
+
megfile/hdfs_path.py,sha256=0XLtABufwqL-y8igOxzOJz6zOGppuBp2f2SwXIMvvYg,27299
|
|
10
|
+
megfile/http.py,sha256=2Z2yqyhU-zcJCJwSNyBsxsZ7f2FT9X6fcednsbHDsFM,2025
|
|
11
|
+
megfile/http_path.py,sha256=BhMNjQVB85IaCGGIKzgEfY73mAVdCzJP08W1RuGeMRA,16119
|
|
12
|
+
megfile/interfaces.py,sha256=HPkYvpIEYzzVrZrUAvmBIO9SmmQXEWD9Ihaq551cnlM,8575
|
|
13
|
+
megfile/pathlike.py,sha256=vKuCMlSAPYNSojp03wEj2i3Cq3E3ROp_-UkkdgBElws,30802
|
|
14
|
+
megfile/s3.py,sha256=7SdfLjAePVh-bpRyuj566VB4Qa7KP86rCJGzYANR7wQ,13008
|
|
15
|
+
megfile/s3_path.py,sha256=fHXDwndXz3X9zicdyxRhCgXzCSnPyEPs56MAxAaN6BY,93440
|
|
16
|
+
megfile/sftp.py,sha256=vyDnYXX3i1j2fhXMC8YCeX-66MDb9wrBQQjQVhZx0uo,13004
|
|
17
|
+
megfile/sftp_path.py,sha256=4tByWvUJK1KBJoa3t5aoWYnZpaRWN9nQIE6ZyiGHrbk,53519
|
|
18
|
+
megfile/smart.py,sha256=Vr4R7HpjXjt587KOc2-1QGbQ5EsZ48YRzCaK0rz3IS0,36108
|
|
19
|
+
megfile/smart_path.py,sha256=RO84tnqXsKtd_T19mz5wjD9LSnsE9_Vv3CuHId1qDiU,6686
|
|
20
|
+
megfile/stdio.py,sha256=UYe-h440Wc4f5COOzOTG1svnp5nFzrfpixehJ0_0_NY,653
|
|
21
|
+
megfile/stdio_path.py,sha256=7jzVdreamO18yBWZM7Pp71cO7GmrYb0M0qyQde2Ypq4,2706
|
|
22
|
+
megfile/version.py,sha256=qUtPF5wMCWTQvs67Rysqijd4tyLvFcKVJ9nmvYVQDQo,19
|
|
23
|
+
megfile/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
24
|
+
megfile/lib/base_prefetch_reader.py,sha256=CaYWuiKmlk4Utr0IFDPwPC58wV2jBAhqpxhwhRHc734,13652
|
|
25
|
+
megfile/lib/combine_reader.py,sha256=uSzo3PmhD5ck6_Vv6dFU5vVx4boeA97VS-puPyhF_BE,4657
|
|
26
|
+
megfile/lib/compare.py,sha256=n_dtLxgoskYnsIZMKdKmVhQoVn8qYUrUhkS1JH2_X3o,2170
|
|
27
|
+
megfile/lib/compat.py,sha256=SynEeHluys3tCK-lb_1oV3o_ft83yZvunqM_AjibLgE,207
|
|
28
|
+
megfile/lib/fnmatch.py,sha256=HzXwMCskXGdA0tHCkgTGrIZmyyFvQpOQxoGqjZExXR8,4040
|
|
29
|
+
megfile/lib/glob.py,sha256=iJ0NvFh7b07MDru36YY1j3ZWPCNBLAECzUkoqYfJWgY,10052
|
|
30
|
+
megfile/lib/hdfs_prefetch_reader.py,sha256=UrcUmTM1IZwD95oZMJXuY2dYEpE7uUjs_6dHyTMYDbg,2129
|
|
31
|
+
megfile/lib/hdfs_tools.py,sha256=4K-OdMYFFSLBGmDzjatioHvuZuUbKVy7ACeJl-l0HLQ,435
|
|
32
|
+
megfile/lib/http_prefetch_reader.py,sha256=PibR9Mx4JSdhkB4p8WzD3i2f__Txv55VBtPOmhml3vM,4556
|
|
33
|
+
megfile/lib/joinpath.py,sha256=gaPNtBi8fzd5LZNyZp5zrHzaybcqKJ1xlntGmVNyFEM,929
|
|
34
|
+
megfile/lib/lazy_handler.py,sha256=bE7RGt1x_xYWMgGAvHr7dwEt52qy-D3z90X3oyCvE6g,1875
|
|
35
|
+
megfile/lib/s3_buffered_writer.py,sha256=V2nMdA3sB8XV6q5OqWFBZhPrzkw1ZUMM81k2uDPNn24,7087
|
|
36
|
+
megfile/lib/s3_cached_handler.py,sha256=QrQKck06ye16o7GD71T-fVCseKlOhsxp82LtBTtAKJU,1498
|
|
37
|
+
megfile/lib/s3_limited_seekable_writer.py,sha256=v-e7rfFBfWCSQVtJIaFHM_i0Hb1FkfVLHlhawo5MOIk,6358
|
|
38
|
+
megfile/lib/s3_memory_handler.py,sha256=NGKWbI4LG2cmV06CP7KOVPqS_BNpm3ApqKi5ibgIBvQ,4208
|
|
39
|
+
megfile/lib/s3_pipe_handler.py,sha256=DY1UTNCq8oD3QWXNb4orOiz3EoEAo6dhwmZZdk6h1bU,3694
|
|
40
|
+
megfile/lib/s3_prefetch_reader.py,sha256=YZA6JOQXcioREh_z1E-kZ2WRPTm02v0dCEVqyaOMHns,4287
|
|
41
|
+
megfile/lib/s3_share_cache_reader.py,sha256=jhGL1B6NPv68cQnW1Jf7ey-zTQ8XfiJg5ILDNgRWHy0,3671
|
|
42
|
+
megfile/lib/shadow_handler.py,sha256=UHsbDHISGBPBlzFyu1V_UgUSoYNoVvVDsyhxtt0yEU0,2735
|
|
43
|
+
megfile/lib/stdio_handler.py,sha256=IDdgENLQlhigEwkLL4zStueVSzdWg7xVcTF_koof_Ek,1987
|
|
44
|
+
megfile/lib/url.py,sha256=ER32pWy9Q2MAk3TraAaNEBWIqUeBmLuM57ol2cs7-Ks,103
|
|
45
|
+
megfile/utils/__init__.py,sha256=NfO5vNxfeceGvMB3dgZNudyPFTmPY096JbC4iYroX6o,9003
|
|
46
|
+
megfile/utils/mutex.py,sha256=asb8opGLgK22RiuBJUnfsvB8LnMmodP8KzCVHKmQBWA,2561
|
|
47
|
+
scripts/convert_results_to_sarif.py,sha256=nDiOfsedb22Ps7ZodmYdlXZlxv54fRxCQgOZsB2OkNk,2833
|
|
48
|
+
scripts/generate_file.py,sha256=-mTcBiqiQ1juvqojVfVZ-uZWgpANHJNdhrF7s68zNfc,10903
|
|
49
|
+
megfile-3.1.2.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
|
|
50
|
+
megfile-3.1.2.dist-info/LICENSE.pyre,sha256=9lf5nT-5ZH25JijpYAequ0bl8E8z5JmZB1qrjiUMp84,1080
|
|
51
|
+
megfile-3.1.2.dist-info/METADATA,sha256=PdRilJUjmvzVJmWUcW_Jx9ieLmcJlAao4NEdWFP8NB8,8797
|
|
52
|
+
megfile-3.1.2.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
|
53
|
+
megfile-3.1.2.dist-info/entry_points.txt,sha256=M6ZWSSv5_5_QtIpZafy3vq7WuOJ_5dSGQQnEZbByt2Q,49
|
|
54
|
+
megfile-3.1.2.dist-info/top_level.txt,sha256=oTnYXo1Z3V61qSWAKtnY9RkDgRSHvfRN38FQae6E0W0,50
|
|
55
|
+
megfile-3.1.2.dist-info/RECORD,,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
# Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
-
|
|
3
|
+
|
|
4
4
|
# This source code is licensed under the MIT license found in the
|
|
5
5
|
# LICENSE.pyre file in the root directory of this source tree.
|
|
6
6
|
|
|
@@ -18,44 +18,33 @@ Location = Dict[str, Any]
|
|
|
18
18
|
|
|
19
19
|
def _locations(errors: List[Error]) -> Dict[str, Location]:
|
|
20
20
|
locations = {
|
|
21
|
-
error["path"]: {
|
|
22
|
-
|
|
23
|
-
"index": 0,
|
|
24
|
-
} for error in errors
|
|
21
|
+
error["path"]: {"uri": f"file://{Path.cwd() / error['path']}", "index": 0}
|
|
22
|
+
for error in errors
|
|
25
23
|
}
|
|
26
24
|
for index, location in enumerate(locations.values()):
|
|
27
25
|
location["index"] = index
|
|
28
26
|
return locations
|
|
29
27
|
|
|
30
28
|
|
|
31
|
-
def _to_sarif_result(error: Error, locations: Dict[str,
|
|
32
|
-
Location]) -> Dict[str, Any]:
|
|
29
|
+
def _to_sarif_result(error: Error, locations: Dict[str, Location]) -> Dict[str, Any]:
|
|
33
30
|
LOG.info(f"Transforming:\n{error}")
|
|
34
31
|
|
|
35
32
|
return {
|
|
36
|
-
"ruleId":
|
|
37
|
-
|
|
38
|
-
"
|
|
39
|
-
|
|
40
|
-
"
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
"physicalLocation":
|
|
49
|
-
{
|
|
50
|
-
"artifactLocation": locations[error["path"]],
|
|
51
|
-
"region":
|
|
52
|
-
{
|
|
53
|
-
"startLine": error["line"],
|
|
54
|
-
"startColumn": error["column"] + 1,
|
|
55
|
-
},
|
|
56
|
-
}
|
|
33
|
+
"ruleId": "type-error",
|
|
34
|
+
"ruleIndex": 0,
|
|
35
|
+
"level": "error",
|
|
36
|
+
"message": {"text": error["description"]},
|
|
37
|
+
"locations": [
|
|
38
|
+
{
|
|
39
|
+
"physicalLocation": {
|
|
40
|
+
"artifactLocation": locations[error["path"]],
|
|
41
|
+
"region": {
|
|
42
|
+
"startLine": error["line"],
|
|
43
|
+
"startColumn": error["column"] + 1,
|
|
44
|
+
},
|
|
57
45
|
}
|
|
58
|
-
|
|
46
|
+
}
|
|
47
|
+
],
|
|
59
48
|
}
|
|
60
49
|
|
|
61
50
|
|
|
@@ -63,62 +52,40 @@ def _to_sarif(errors: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
|
63
52
|
LOG.info(f"Transforming:\n{errors}")
|
|
64
53
|
locations = _locations(errors)
|
|
65
54
|
return {
|
|
66
|
-
"version":
|
|
67
|
-
|
|
68
|
-
"
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
"driver":
|
|
76
|
-
{
|
|
77
|
-
"name":
|
|
78
|
-
"Pyre",
|
|
79
|
-
"informationUri":
|
|
80
|
-
"https://www.pyre-check.org",
|
|
81
|
-
"rules":
|
|
82
|
-
[
|
|
83
|
-
{
|
|
84
|
-
"id":
|
|
85
|
-
"type-error",
|
|
86
|
-
"shortDescription":
|
|
87
|
-
{
|
|
88
|
-
"text": "Type Error"
|
|
89
|
-
},
|
|
90
|
-
"helpUri":
|
|
91
|
-
"https://www.pyre-check.org",
|
|
92
|
-
"help":
|
|
93
|
-
{
|
|
94
|
-
"text":
|
|
95
|
-
"Pyre is a type checker for Python"
|
|
96
|
-
},
|
|
97
|
-
},
|
|
98
|
-
],
|
|
99
|
-
}
|
|
100
|
-
},
|
|
101
|
-
"artifacts":
|
|
102
|
-
[
|
|
55
|
+
"version": "2.1.0",
|
|
56
|
+
"$schema": "http://json.schemastore.org/sarif-2.1.0-rtm.4",
|
|
57
|
+
"runs": [
|
|
58
|
+
{
|
|
59
|
+
"tool": {
|
|
60
|
+
"driver": {
|
|
61
|
+
"name": "Pyre",
|
|
62
|
+
"informationUri": "https://www.pyre-check.org",
|
|
63
|
+
"rules": [
|
|
103
64
|
{
|
|
104
|
-
"
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
"results":
|
|
110
|
-
[
|
|
111
|
-
_to_sarif_result(error, locations)
|
|
112
|
-
for error in errors
|
|
65
|
+
"id": "type-error",
|
|
66
|
+
"shortDescription": {"text": "Type Error"},
|
|
67
|
+
"helpUri": "https://www.pyre-check.org",
|
|
68
|
+
"help": {"text": "Pyre is a type checker for Python"},
|
|
69
|
+
}
|
|
113
70
|
],
|
|
114
|
-
|
|
115
|
-
|
|
71
|
+
}
|
|
72
|
+
},
|
|
73
|
+
"artifacts": [
|
|
74
|
+
{"location": location}
|
|
75
|
+
for location in sorted(
|
|
76
|
+
locations.values(), key=lambda location: location["index"]
|
|
77
|
+
)
|
|
78
|
+
],
|
|
79
|
+
"results": [_to_sarif_result(error, locations) for error in errors],
|
|
80
|
+
}
|
|
81
|
+
],
|
|
116
82
|
}
|
|
117
83
|
|
|
118
84
|
|
|
119
85
|
if __name__ == "__main__":
|
|
120
86
|
logging.basicConfig(
|
|
121
|
-
format="%(asctime)s [%(levelname)s] %(message)s", level=logging.DEBUG
|
|
87
|
+
format="%(asctime)s [%(levelname)s] %(message)s", level=logging.DEBUG
|
|
88
|
+
)
|
|
122
89
|
|
|
123
90
|
sarif = _to_sarif(json.load(sys.stdin))
|
|
124
91
|
json.dump(sarif, sys.stdout, indent=4)
|