megfile 3.1.0.post2__py3-none-any.whl → 3.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docs/conf.py +2 -4
- megfile/__init__.py +394 -203
- megfile/cli.py +258 -238
- megfile/config.py +25 -21
- megfile/errors.py +124 -114
- megfile/fs.py +174 -140
- megfile/fs_path.py +462 -354
- megfile/hdfs.py +133 -101
- megfile/hdfs_path.py +290 -236
- megfile/http.py +15 -14
- megfile/http_path.py +111 -107
- megfile/interfaces.py +70 -65
- megfile/lib/base_prefetch_reader.py +94 -69
- megfile/lib/combine_reader.py +13 -12
- megfile/lib/compare.py +17 -13
- megfile/lib/compat.py +1 -5
- megfile/lib/fnmatch.py +29 -30
- megfile/lib/glob.py +54 -55
- megfile/lib/hdfs_prefetch_reader.py +40 -25
- megfile/lib/hdfs_tools.py +1 -3
- megfile/lib/http_prefetch_reader.py +69 -46
- megfile/lib/joinpath.py +5 -5
- megfile/lib/lazy_handler.py +7 -3
- megfile/lib/s3_buffered_writer.py +61 -52
- megfile/lib/s3_cached_handler.py +14 -13
- megfile/lib/s3_limited_seekable_writer.py +38 -28
- megfile/lib/s3_memory_handler.py +35 -29
- megfile/lib/s3_pipe_handler.py +25 -24
- megfile/lib/s3_prefetch_reader.py +71 -52
- megfile/lib/s3_share_cache_reader.py +37 -24
- megfile/lib/shadow_handler.py +8 -3
- megfile/lib/stdio_handler.py +9 -8
- megfile/lib/url.py +3 -3
- megfile/pathlike.py +259 -228
- megfile/s3.py +220 -153
- megfile/s3_path.py +977 -802
- megfile/sftp.py +190 -156
- megfile/sftp_path.py +540 -450
- megfile/smart.py +397 -330
- megfile/smart_path.py +100 -105
- megfile/stdio.py +10 -9
- megfile/stdio_path.py +32 -35
- megfile/utils/__init__.py +75 -54
- megfile/utils/mutex.py +11 -14
- megfile/version.py +1 -1
- {megfile-3.1.0.post2.dist-info → megfile-3.1.2.dist-info}/METADATA +5 -8
- megfile-3.1.2.dist-info/RECORD +55 -0
- {megfile-3.1.0.post2.dist-info → megfile-3.1.2.dist-info}/WHEEL +1 -1
- scripts/convert_results_to_sarif.py +45 -78
- scripts/generate_file.py +140 -64
- megfile-3.1.0.post2.dist-info/RECORD +0 -55
- {megfile-3.1.0.post2.dist-info → megfile-3.1.2.dist-info}/LICENSE +0 -0
- {megfile-3.1.0.post2.dist-info → megfile-3.1.2.dist-info}/LICENSE.pyre +0 -0
- {megfile-3.1.0.post2.dist-info → megfile-3.1.2.dist-info}/entry_points.txt +0 -0
- {megfile-3.1.0.post2.dist-info → megfile-3.1.2.dist-info}/top_level.txt +0 -0
megfile/utils/__init__.py
CHANGED
|
@@ -5,17 +5,26 @@ import os
|
|
|
5
5
|
import uuid
|
|
6
6
|
from copy import copy
|
|
7
7
|
from functools import wraps
|
|
8
|
-
from io import
|
|
8
|
+
from io import (
|
|
9
|
+
BufferedIOBase,
|
|
10
|
+
BufferedRandom,
|
|
11
|
+
BufferedReader,
|
|
12
|
+
BufferedWriter,
|
|
13
|
+
BytesIO,
|
|
14
|
+
StringIO,
|
|
15
|
+
TextIOBase,
|
|
16
|
+
TextIOWrapper,
|
|
17
|
+
)
|
|
9
18
|
from typing import IO, Callable, Optional
|
|
10
19
|
|
|
11
20
|
from megfile.utils.mutex import ProcessLocal, ThreadLocal
|
|
12
21
|
|
|
13
22
|
|
|
14
23
|
def get_content_size(fileobj: IO, *, intrusive: bool = False) -> int:
|
|
15
|
-
|
|
24
|
+
"""Get size of File-Like Object
|
|
16
25
|
|
|
17
26
|
The File-Like Object must be seekable, otherwise raise IOError
|
|
18
|
-
|
|
27
|
+
"""
|
|
19
28
|
if isinstance(fileobj, (BytesIO, StringIO)):
|
|
20
29
|
return len(fileobj.getvalue())
|
|
21
30
|
|
|
@@ -25,8 +34,8 @@ def get_content_size(fileobj: IO, *, intrusive: bool = False) -> int:
|
|
|
25
34
|
file = file.buffer
|
|
26
35
|
if isinstance(file, BufferedIOBase):
|
|
27
36
|
file = file.raw
|
|
28
|
-
if hasattr(file,
|
|
29
|
-
return getattr(file,
|
|
37
|
+
if hasattr(file, "_content_size"):
|
|
38
|
+
return getattr(file, "_content_size") # pyre-ignore[16]
|
|
30
39
|
|
|
31
40
|
offset = fileobj.tell()
|
|
32
41
|
if not is_seekable(fileobj) and is_writable(fileobj):
|
|
@@ -40,41 +49,41 @@ def get_content_size(fileobj: IO, *, intrusive: bool = False) -> int:
|
|
|
40
49
|
|
|
41
50
|
|
|
42
51
|
def is_seekable(fileobj: IO) -> bool:
|
|
43
|
-
|
|
44
|
-
if hasattr(fileobj,
|
|
52
|
+
"""Test if File-Like Object is seekable"""
|
|
53
|
+
if hasattr(fileobj, "seekable"):
|
|
45
54
|
try:
|
|
46
55
|
return fileobj.seekable()
|
|
47
56
|
except Exception:
|
|
48
57
|
return False
|
|
49
|
-
return hasattr(fileobj,
|
|
58
|
+
return hasattr(fileobj, "seek")
|
|
50
59
|
|
|
51
60
|
|
|
52
61
|
def is_readable(fileobj: IO) -> bool:
|
|
53
|
-
|
|
54
|
-
if hasattr(fileobj,
|
|
62
|
+
"""Test if File-Like Object is readable"""
|
|
63
|
+
if hasattr(fileobj, "readable"):
|
|
55
64
|
try:
|
|
56
65
|
return fileobj.readable()
|
|
57
66
|
except Exception:
|
|
58
67
|
return False
|
|
59
|
-
return hasattr(fileobj,
|
|
68
|
+
return hasattr(fileobj, "read")
|
|
60
69
|
|
|
61
70
|
|
|
62
71
|
def is_writable(fileobj: IO) -> bool:
|
|
63
|
-
|
|
64
|
-
if hasattr(fileobj,
|
|
72
|
+
"""Test if File-Like Object is writable"""
|
|
73
|
+
if hasattr(fileobj, "writable"):
|
|
65
74
|
try:
|
|
66
75
|
return fileobj.writable()
|
|
67
76
|
except Exception:
|
|
68
77
|
return False
|
|
69
|
-
return hasattr(fileobj,
|
|
78
|
+
return hasattr(fileobj, "write")
|
|
70
79
|
|
|
71
80
|
|
|
72
81
|
def _is_pickle(fileobj) -> bool:
|
|
73
|
-
|
|
74
|
-
if fileobj.name.endswith(
|
|
82
|
+
"""Test if File Object is pickle"""
|
|
83
|
+
if fileobj.name.endswith(".pkl") or fileobj.name.endswith(".pickle"):
|
|
75
84
|
return True
|
|
76
85
|
|
|
77
|
-
if
|
|
86
|
+
if "r" in fileobj.mode and "b" in fileobj.mode:
|
|
78
87
|
offset = fileobj.tell()
|
|
79
88
|
fileobj.seek(0)
|
|
80
89
|
data = fileobj.read(2)
|
|
@@ -90,29 +99,32 @@ def get_content_offset(start: Optional[int], stop: Optional[int], size: int):
|
|
|
90
99
|
if stop is None or stop < 0 or start < 0:
|
|
91
100
|
start, stop, _ = slice(start, stop).indices(size)
|
|
92
101
|
if stop < start:
|
|
93
|
-
raise ValueError(
|
|
102
|
+
raise ValueError("read length must be positive")
|
|
94
103
|
return start, stop
|
|
95
104
|
|
|
96
105
|
|
|
97
106
|
def get_name(fileobj, default=None):
|
|
98
|
-
return getattr(fileobj,
|
|
107
|
+
return getattr(fileobj, "name", default or repr(fileobj))
|
|
99
108
|
|
|
100
109
|
|
|
101
|
-
def get_mode(fileobj, default=
|
|
110
|
+
def get_mode(fileobj, default="r"):
|
|
102
111
|
if isinstance(fileobj, BytesIO):
|
|
103
|
-
return
|
|
112
|
+
return "rb+"
|
|
104
113
|
elif isinstance(fileobj, StringIO):
|
|
105
|
-
return
|
|
106
|
-
return getattr(fileobj,
|
|
114
|
+
return "r+"
|
|
115
|
+
return getattr(fileobj, "mode", default)
|
|
107
116
|
|
|
108
117
|
|
|
109
118
|
def shadow_copy(fileobj: IO, intrusive: bool = True, buffered: bool = False):
|
|
110
|
-
|
|
119
|
+
"""Create a File-Like Object, maintaining file pointer,
|
|
120
|
+
to avoid misunderstanding the position when read / write / seek.
|
|
111
121
|
|
|
112
|
-
:param intrusive: If is intrusive. If True, move file pointer to the original
|
|
122
|
+
:param intrusive: If is intrusive. If True, move file pointer to the original
|
|
123
|
+
position after every read / write / seek. If False, then not.
|
|
113
124
|
:param verbose: If True, print log when read / write / seek
|
|
114
|
-
|
|
125
|
+
"""
|
|
115
126
|
from megfile.lib.shadow_handler import ShadowHandler
|
|
127
|
+
|
|
116
128
|
result = ShadowHandler(fileobj, intrusive=intrusive)
|
|
117
129
|
mode = get_mode(fileobj)
|
|
118
130
|
if "b" in mode and (buffered or _is_pickle(result)):
|
|
@@ -125,22 +137,25 @@ def shadow_copy(fileobj: IO, intrusive: bool = True, buffered: bool = False):
|
|
|
125
137
|
return result
|
|
126
138
|
|
|
127
139
|
|
|
128
|
-
def lazy_open(
|
|
129
|
-
|
|
130
|
-
''' Create a File-Like Object, maintaining file pointer, to open a file in lazy mode
|
|
140
|
+
def lazy_open(path: str, mode: str, open_func: Optional[Callable] = None, **options):
|
|
141
|
+
"""Create a File-Like Object, maintaining file pointer, to open a file in lazy mode
|
|
131
142
|
|
|
132
|
-
:param intrusive: If is intrusive. If True, move file pointer to the original
|
|
143
|
+
:param intrusive: If is intrusive. If True, move file pointer to the original
|
|
144
|
+
position after every read / write / seek. If False, then not.
|
|
133
145
|
:param verbose: If True, print log when read / write / seek
|
|
134
|
-
|
|
146
|
+
"""
|
|
135
147
|
from megfile.lib.lazy_handler import LazyHandler
|
|
148
|
+
|
|
136
149
|
if open_func is None:
|
|
137
150
|
from megfile.smart import smart_open
|
|
151
|
+
|
|
138
152
|
open_func = smart_open
|
|
139
153
|
return LazyHandler(path, mode, open_func=open_func, **options)
|
|
140
154
|
|
|
141
155
|
|
|
142
156
|
def patch_rlimit():
|
|
143
157
|
import resource
|
|
158
|
+
|
|
144
159
|
soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
|
|
145
160
|
resource.setrlimit(resource.RLIMIT_NOFILE, (hard, hard))
|
|
146
161
|
|
|
@@ -151,36 +166,38 @@ process_local = ProcessLocal()
|
|
|
151
166
|
|
|
152
167
|
def combine(file_objects, name):
|
|
153
168
|
from megfile.lib.combine_reader import CombineReader
|
|
169
|
+
|
|
154
170
|
return CombineReader(file_objects, name)
|
|
155
171
|
|
|
156
172
|
|
|
157
173
|
def get_binary_mode(mode: str) -> str:
|
|
158
|
-
|
|
159
|
-
if
|
|
174
|
+
"""Replace mode parameter in open() with corresponding binary mode"""
|
|
175
|
+
if "t" in mode:
|
|
160
176
|
# rt / wt / rt+ => rb / wb / rb+
|
|
161
|
-
mode = mode.replace(
|
|
162
|
-
elif
|
|
177
|
+
mode = mode.replace("t", "b")
|
|
178
|
+
elif "b" not in mode:
|
|
163
179
|
# r / w / r+ => rb / wb / rb+
|
|
164
|
-
mode = mode[:1] +
|
|
180
|
+
mode = mode[:1] + "b" + mode[1:]
|
|
165
181
|
# rb / wb / r+b => rb / wb / rb+
|
|
166
|
-
return "".join(sorted(mode, key=lambda k: {
|
|
182
|
+
return "".join(sorted(mode, key=lambda k: {"b": 1, "+": 2}.get(k, 0)))
|
|
167
183
|
|
|
168
184
|
|
|
169
185
|
def binary_open(open_func):
|
|
170
|
-
|
|
186
|
+
"""
|
|
171
187
|
Decorator:
|
|
172
188
|
Output according to user-setting mode while calling Open
|
|
173
|
-
|
|
189
|
+
"""
|
|
174
190
|
|
|
175
191
|
@wraps(open_func)
|
|
176
192
|
def wrapper(
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
193
|
+
path,
|
|
194
|
+
mode: str = "rb",
|
|
195
|
+
encoding: Optional[str] = None,
|
|
196
|
+
errors: Optional[str] = None,
|
|
197
|
+
**kwargs,
|
|
198
|
+
):
|
|
182
199
|
fileobj = open_func(path, get_binary_mode(mode), **kwargs)
|
|
183
|
-
if
|
|
200
|
+
if "b" not in mode:
|
|
184
201
|
fileobj = TextIOWrapper(fileobj, encoding=encoding, errors=errors)
|
|
185
202
|
fileobj.mode = mode # pyre-ignore[41]
|
|
186
203
|
return fileobj
|
|
@@ -189,18 +206,20 @@ def binary_open(open_func):
|
|
|
189
206
|
|
|
190
207
|
|
|
191
208
|
def get_human_size(size_bytes: float) -> str:
|
|
192
|
-
|
|
193
|
-
|
|
209
|
+
"""Get human-readable size, e.g. `100MB`"""
|
|
210
|
+
if size_bytes < 0:
|
|
211
|
+
# TODO: replace AssertionError with ValueError in 4.0.0
|
|
212
|
+
raise AssertionError("negative size: %r" % size_bytes)
|
|
194
213
|
if size_bytes == 0:
|
|
195
|
-
return
|
|
196
|
-
size_name = (
|
|
214
|
+
return "0 B"
|
|
215
|
+
size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
|
|
197
216
|
index = int(math.floor(math.log(size_bytes, 1024)))
|
|
198
217
|
base = math.pow(1024, index)
|
|
199
218
|
if base == 1:
|
|
200
219
|
size = size_bytes
|
|
201
220
|
else:
|
|
202
221
|
size = round(size_bytes / base, 2)
|
|
203
|
-
return
|
|
222
|
+
return "%s %s" % (size, size_name[index])
|
|
204
223
|
|
|
205
224
|
|
|
206
225
|
def necessary_params(func: Callable, **kwargs):
|
|
@@ -220,7 +239,7 @@ def necessary_params(func: Callable, **kwargs):
|
|
|
220
239
|
return res_kwargs
|
|
221
240
|
|
|
222
241
|
|
|
223
|
-
def generate_cache_path(filename: str, cache_dir: str =
|
|
242
|
+
def generate_cache_path(filename: str, cache_dir: str = "/tmp") -> str:
|
|
224
243
|
suffix = os.path.splitext(filename)[1]
|
|
225
244
|
return os.path.join(cache_dir, str(uuid.uuid4()) + suffix)
|
|
226
245
|
|
|
@@ -238,7 +257,7 @@ def _get_class(cls_or_obj) -> type:
|
|
|
238
257
|
|
|
239
258
|
def calculate_md5(file_object):
|
|
240
259
|
hash_md5 = hashlib.md5() # nosec
|
|
241
|
-
for chunk in iter(lambda: file_object.read(4096), b
|
|
260
|
+
for chunk in iter(lambda: file_object.read(4096), b""):
|
|
242
261
|
hash_md5.update(chunk)
|
|
243
262
|
return hash_md5.hexdigest()
|
|
244
263
|
|
|
@@ -264,7 +283,8 @@ class classproperty(property):
|
|
|
264
283
|
def __set__(self, cls_or_obj, value: object) -> None:
|
|
265
284
|
"""
|
|
266
285
|
This method gets called when a property value should be set.
|
|
267
|
-
@param cls_or_obj: The class or instance of which the property should be
|
|
286
|
+
@param cls_or_obj: The class or instance of which the property should be
|
|
287
|
+
changed.
|
|
268
288
|
@param value: The new value.
|
|
269
289
|
"""
|
|
270
290
|
# call this method only on the class, not the instance
|
|
@@ -273,7 +293,8 @@ class classproperty(property):
|
|
|
273
293
|
def __delete__(self, cls_or_obj) -> None:
|
|
274
294
|
"""
|
|
275
295
|
This method gets called when a property should be deleted.
|
|
276
|
-
@param cls_or_obj: The class or instance of which the property should be
|
|
296
|
+
@param cls_or_obj: The class or instance of which the property should be
|
|
297
|
+
deleted.
|
|
277
298
|
"""
|
|
278
299
|
# call this method only on the class, not the instance
|
|
279
300
|
super(classproperty, self).__delete__(_get_class(cls_or_obj))
|
megfile/utils/mutex.py
CHANGED
|
@@ -5,14 +5,10 @@ from threading import RLock
|
|
|
5
5
|
from threading import local as _ThreadLocal
|
|
6
6
|
from typing import Any, Callable, Iterator
|
|
7
7
|
|
|
8
|
-
__all__ = [
|
|
9
|
-
'ThreadLocal',
|
|
10
|
-
'ProcessLocal',
|
|
11
|
-
]
|
|
8
|
+
__all__ = ["ThreadLocal", "ProcessLocal"]
|
|
12
9
|
|
|
13
10
|
|
|
14
11
|
class ForkAware(ABC):
|
|
15
|
-
|
|
16
12
|
def __init__(self):
|
|
17
13
|
self._process_id = os.getpid()
|
|
18
14
|
self._reset()
|
|
@@ -26,7 +22,6 @@ class ForkAware(ABC):
|
|
|
26
22
|
|
|
27
23
|
|
|
28
24
|
def fork_aware(func):
|
|
29
|
-
|
|
30
25
|
@wraps(func)
|
|
31
26
|
def wrapper(self, *args, **kwargs):
|
|
32
27
|
current_pid = os.getpid()
|
|
@@ -39,7 +34,6 @@ def fork_aware(func):
|
|
|
39
34
|
|
|
40
35
|
|
|
41
36
|
class BaseLocal(ABC): # pragma: no cover
|
|
42
|
-
|
|
43
37
|
@property
|
|
44
38
|
@abstractmethod
|
|
45
39
|
def _data(self) -> dict:
|
|
@@ -68,7 +62,6 @@ class BaseLocal(ABC): # pragma: no cover
|
|
|
68
62
|
|
|
69
63
|
|
|
70
64
|
class ThreadLocal(ForkAware, BaseLocal):
|
|
71
|
-
|
|
72
65
|
def _reset(self):
|
|
73
66
|
self._local = _ThreadLocal()
|
|
74
67
|
|
|
@@ -86,9 +79,12 @@ class ThreadLocal(ForkAware, BaseLocal):
|
|
|
86
79
|
|
|
87
80
|
class ProcessLocal(ForkAware, BaseLocal):
|
|
88
81
|
"""
|
|
89
|
-
Provides a basic per-process mapping container that wipes itself if the current PID
|
|
82
|
+
Provides a basic per-process mapping container that wipes itself if the current PID
|
|
83
|
+
changed since the last get/set.
|
|
84
|
+
|
|
90
85
|
Aka `threading.local()`, but for processes instead of threads.
|
|
91
86
|
"""
|
|
87
|
+
|
|
92
88
|
_lock = None
|
|
93
89
|
|
|
94
90
|
def _reset(self):
|
|
@@ -101,8 +97,9 @@ class ProcessLocal(ForkAware, BaseLocal):
|
|
|
101
97
|
return self._local
|
|
102
98
|
|
|
103
99
|
def __call__(self, key: str, func: Callable, *args, **kwargs) -> Any:
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
100
|
+
data = self._data
|
|
101
|
+
if key not in data:
|
|
102
|
+
with self._lock:
|
|
103
|
+
if key not in data:
|
|
104
|
+
data[key] = func(*args, **kwargs)
|
|
105
|
+
return data[key]
|
megfile/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
VERSION = "3.1.
|
|
1
|
+
VERSION = "3.1.2"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: megfile
|
|
3
|
-
Version: 3.1.
|
|
3
|
+
Version: 3.1.2
|
|
4
4
|
Summary: Megvii file operation library
|
|
5
5
|
Author-email: megvii <megfile@megvii.com>
|
|
6
6
|
Project-URL: Homepage, https://github.com/megvii-research/megfile
|
|
@@ -22,15 +22,15 @@ Description-Content-Type: text/markdown
|
|
|
22
22
|
License-File: LICENSE
|
|
23
23
|
License-File: LICENSE.pyre
|
|
24
24
|
Requires-Dist: boto3
|
|
25
|
-
Requires-Dist: botocore
|
|
25
|
+
Requires-Dist: botocore>=1.13.0
|
|
26
26
|
Requires-Dist: requests
|
|
27
27
|
Requires-Dist: paramiko
|
|
28
28
|
Requires-Dist: tqdm
|
|
29
29
|
Requires-Dist: pyyaml
|
|
30
30
|
Provides-Extra: cli
|
|
31
|
-
Requires-Dist: click
|
|
31
|
+
Requires-Dist: click; extra == "cli"
|
|
32
32
|
Provides-Extra: hdfs
|
|
33
|
-
Requires-Dist: hdfs
|
|
33
|
+
Requires-Dist: hdfs; extra == "hdfs"
|
|
34
34
|
|
|
35
35
|
megfile - Megvii FILE library
|
|
36
36
|
---
|
|
@@ -198,12 +198,9 @@ s3 =
|
|
|
198
198
|
|
|
199
199
|
*You can submit code even if the code doesn't meet conditions. The project members will evaluate and assist you in making code changes*
|
|
200
200
|
|
|
201
|
-
* **Code format**: Your code needs to pass **code format check**. `megfile` uses `
|
|
201
|
+
* **Code format**: Your code needs to pass **code format check**. `megfile` uses `ruff` as lint tool
|
|
202
202
|
* **Static check**: Your code needs complete **type hint**. `megfile` uses `pytype` as static check tool. If `pytype` failed in static check, use `# pytype: disable=XXX` to disable the error and please tell us why you disable it.
|
|
203
203
|
|
|
204
|
-
*Note* : Because `pytype` doesn't support variable type annation, the variable type hint format introduced by py36 cannot be used.
|
|
205
|
-
> i.e. `variable: int` is invalid, replace it with `variable # type: int`
|
|
206
|
-
|
|
207
204
|
* **Test**: Your code needs complete **unit test** coverage. `megfile` uses `pyfakefs` and `moto` as local file system and s3 virtual environment in unit tests. The newly added code should have a complete unit test to ensure the correctness
|
|
208
205
|
|
|
209
206
|
* You can help to improve `megfile` in many ways:
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
docs/conf.py,sha256=sfDSly5jO8W_RmuAptOIp4hd8dNcO-9a5XrHTbxFnNo,2448
|
|
2
|
+
megfile/__init__.py,sha256=i2Lbq_VxIgppaqwkxG0_H35dRfcjJ4mCYWjprOf4hHo,7318
|
|
3
|
+
megfile/cli.py,sha256=Z54c98bIBOmljPtSPKPnXmuzsJJ8eLE0pd2l-ImhL1U,22234
|
|
4
|
+
megfile/config.py,sha256=_SkJRaVWUdfW1Q9uX0vao-6YVQKJtfej22Z8DykuRps,2331
|
|
5
|
+
megfile/errors.py,sha256=h6CgdgFG7A39ZgSvso9o5fz2uBZwMkZtBhRgtCXBvPI,13929
|
|
6
|
+
megfile/fs.py,sha256=dgj5fW-EEzQNdjMF2tkB5DjXu3iHQbtLi5PSIMxR8fc,11966
|
|
7
|
+
megfile/fs_path.py,sha256=Ffvukc176beH5aQMZXXtwH6ApwLYXPViCIUP0pijgT0,41590
|
|
8
|
+
megfile/hdfs.py,sha256=latguOuuzAmg-yWOy3Sm723CJ0ybN_eSHRubVNqhcMU,9202
|
|
9
|
+
megfile/hdfs_path.py,sha256=0XLtABufwqL-y8igOxzOJz6zOGppuBp2f2SwXIMvvYg,27299
|
|
10
|
+
megfile/http.py,sha256=2Z2yqyhU-zcJCJwSNyBsxsZ7f2FT9X6fcednsbHDsFM,2025
|
|
11
|
+
megfile/http_path.py,sha256=BhMNjQVB85IaCGGIKzgEfY73mAVdCzJP08W1RuGeMRA,16119
|
|
12
|
+
megfile/interfaces.py,sha256=HPkYvpIEYzzVrZrUAvmBIO9SmmQXEWD9Ihaq551cnlM,8575
|
|
13
|
+
megfile/pathlike.py,sha256=vKuCMlSAPYNSojp03wEj2i3Cq3E3ROp_-UkkdgBElws,30802
|
|
14
|
+
megfile/s3.py,sha256=7SdfLjAePVh-bpRyuj566VB4Qa7KP86rCJGzYANR7wQ,13008
|
|
15
|
+
megfile/s3_path.py,sha256=fHXDwndXz3X9zicdyxRhCgXzCSnPyEPs56MAxAaN6BY,93440
|
|
16
|
+
megfile/sftp.py,sha256=vyDnYXX3i1j2fhXMC8YCeX-66MDb9wrBQQjQVhZx0uo,13004
|
|
17
|
+
megfile/sftp_path.py,sha256=4tByWvUJK1KBJoa3t5aoWYnZpaRWN9nQIE6ZyiGHrbk,53519
|
|
18
|
+
megfile/smart.py,sha256=Vr4R7HpjXjt587KOc2-1QGbQ5EsZ48YRzCaK0rz3IS0,36108
|
|
19
|
+
megfile/smart_path.py,sha256=RO84tnqXsKtd_T19mz5wjD9LSnsE9_Vv3CuHId1qDiU,6686
|
|
20
|
+
megfile/stdio.py,sha256=UYe-h440Wc4f5COOzOTG1svnp5nFzrfpixehJ0_0_NY,653
|
|
21
|
+
megfile/stdio_path.py,sha256=7jzVdreamO18yBWZM7Pp71cO7GmrYb0M0qyQde2Ypq4,2706
|
|
22
|
+
megfile/version.py,sha256=qUtPF5wMCWTQvs67Rysqijd4tyLvFcKVJ9nmvYVQDQo,19
|
|
23
|
+
megfile/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
24
|
+
megfile/lib/base_prefetch_reader.py,sha256=CaYWuiKmlk4Utr0IFDPwPC58wV2jBAhqpxhwhRHc734,13652
|
|
25
|
+
megfile/lib/combine_reader.py,sha256=uSzo3PmhD5ck6_Vv6dFU5vVx4boeA97VS-puPyhF_BE,4657
|
|
26
|
+
megfile/lib/compare.py,sha256=n_dtLxgoskYnsIZMKdKmVhQoVn8qYUrUhkS1JH2_X3o,2170
|
|
27
|
+
megfile/lib/compat.py,sha256=SynEeHluys3tCK-lb_1oV3o_ft83yZvunqM_AjibLgE,207
|
|
28
|
+
megfile/lib/fnmatch.py,sha256=HzXwMCskXGdA0tHCkgTGrIZmyyFvQpOQxoGqjZExXR8,4040
|
|
29
|
+
megfile/lib/glob.py,sha256=iJ0NvFh7b07MDru36YY1j3ZWPCNBLAECzUkoqYfJWgY,10052
|
|
30
|
+
megfile/lib/hdfs_prefetch_reader.py,sha256=UrcUmTM1IZwD95oZMJXuY2dYEpE7uUjs_6dHyTMYDbg,2129
|
|
31
|
+
megfile/lib/hdfs_tools.py,sha256=4K-OdMYFFSLBGmDzjatioHvuZuUbKVy7ACeJl-l0HLQ,435
|
|
32
|
+
megfile/lib/http_prefetch_reader.py,sha256=PibR9Mx4JSdhkB4p8WzD3i2f__Txv55VBtPOmhml3vM,4556
|
|
33
|
+
megfile/lib/joinpath.py,sha256=gaPNtBi8fzd5LZNyZp5zrHzaybcqKJ1xlntGmVNyFEM,929
|
|
34
|
+
megfile/lib/lazy_handler.py,sha256=bE7RGt1x_xYWMgGAvHr7dwEt52qy-D3z90X3oyCvE6g,1875
|
|
35
|
+
megfile/lib/s3_buffered_writer.py,sha256=V2nMdA3sB8XV6q5OqWFBZhPrzkw1ZUMM81k2uDPNn24,7087
|
|
36
|
+
megfile/lib/s3_cached_handler.py,sha256=QrQKck06ye16o7GD71T-fVCseKlOhsxp82LtBTtAKJU,1498
|
|
37
|
+
megfile/lib/s3_limited_seekable_writer.py,sha256=v-e7rfFBfWCSQVtJIaFHM_i0Hb1FkfVLHlhawo5MOIk,6358
|
|
38
|
+
megfile/lib/s3_memory_handler.py,sha256=NGKWbI4LG2cmV06CP7KOVPqS_BNpm3ApqKi5ibgIBvQ,4208
|
|
39
|
+
megfile/lib/s3_pipe_handler.py,sha256=DY1UTNCq8oD3QWXNb4orOiz3EoEAo6dhwmZZdk6h1bU,3694
|
|
40
|
+
megfile/lib/s3_prefetch_reader.py,sha256=YZA6JOQXcioREh_z1E-kZ2WRPTm02v0dCEVqyaOMHns,4287
|
|
41
|
+
megfile/lib/s3_share_cache_reader.py,sha256=jhGL1B6NPv68cQnW1Jf7ey-zTQ8XfiJg5ILDNgRWHy0,3671
|
|
42
|
+
megfile/lib/shadow_handler.py,sha256=UHsbDHISGBPBlzFyu1V_UgUSoYNoVvVDsyhxtt0yEU0,2735
|
|
43
|
+
megfile/lib/stdio_handler.py,sha256=IDdgENLQlhigEwkLL4zStueVSzdWg7xVcTF_koof_Ek,1987
|
|
44
|
+
megfile/lib/url.py,sha256=ER32pWy9Q2MAk3TraAaNEBWIqUeBmLuM57ol2cs7-Ks,103
|
|
45
|
+
megfile/utils/__init__.py,sha256=NfO5vNxfeceGvMB3dgZNudyPFTmPY096JbC4iYroX6o,9003
|
|
46
|
+
megfile/utils/mutex.py,sha256=asb8opGLgK22RiuBJUnfsvB8LnMmodP8KzCVHKmQBWA,2561
|
|
47
|
+
scripts/convert_results_to_sarif.py,sha256=nDiOfsedb22Ps7ZodmYdlXZlxv54fRxCQgOZsB2OkNk,2833
|
|
48
|
+
scripts/generate_file.py,sha256=-mTcBiqiQ1juvqojVfVZ-uZWgpANHJNdhrF7s68zNfc,10903
|
|
49
|
+
megfile-3.1.2.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
|
|
50
|
+
megfile-3.1.2.dist-info/LICENSE.pyre,sha256=9lf5nT-5ZH25JijpYAequ0bl8E8z5JmZB1qrjiUMp84,1080
|
|
51
|
+
megfile-3.1.2.dist-info/METADATA,sha256=PdRilJUjmvzVJmWUcW_Jx9ieLmcJlAao4NEdWFP8NB8,8797
|
|
52
|
+
megfile-3.1.2.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
|
53
|
+
megfile-3.1.2.dist-info/entry_points.txt,sha256=M6ZWSSv5_5_QtIpZafy3vq7WuOJ_5dSGQQnEZbByt2Q,49
|
|
54
|
+
megfile-3.1.2.dist-info/top_level.txt,sha256=oTnYXo1Z3V61qSWAKtnY9RkDgRSHvfRN38FQae6E0W0,50
|
|
55
|
+
megfile-3.1.2.dist-info/RECORD,,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
# Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
-
|
|
3
|
+
|
|
4
4
|
# This source code is licensed under the MIT license found in the
|
|
5
5
|
# LICENSE.pyre file in the root directory of this source tree.
|
|
6
6
|
|
|
@@ -18,44 +18,33 @@ Location = Dict[str, Any]
|
|
|
18
18
|
|
|
19
19
|
def _locations(errors: List[Error]) -> Dict[str, Location]:
|
|
20
20
|
locations = {
|
|
21
|
-
error["path"]: {
|
|
22
|
-
|
|
23
|
-
"index": 0,
|
|
24
|
-
} for error in errors
|
|
21
|
+
error["path"]: {"uri": f"file://{Path.cwd() / error['path']}", "index": 0}
|
|
22
|
+
for error in errors
|
|
25
23
|
}
|
|
26
24
|
for index, location in enumerate(locations.values()):
|
|
27
25
|
location["index"] = index
|
|
28
26
|
return locations
|
|
29
27
|
|
|
30
28
|
|
|
31
|
-
def _to_sarif_result(error: Error, locations: Dict[str,
|
|
32
|
-
Location]) -> Dict[str, Any]:
|
|
29
|
+
def _to_sarif_result(error: Error, locations: Dict[str, Location]) -> Dict[str, Any]:
|
|
33
30
|
LOG.info(f"Transforming:\n{error}")
|
|
34
31
|
|
|
35
32
|
return {
|
|
36
|
-
"ruleId":
|
|
37
|
-
|
|
38
|
-
"
|
|
39
|
-
|
|
40
|
-
"
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
"physicalLocation":
|
|
49
|
-
{
|
|
50
|
-
"artifactLocation": locations[error["path"]],
|
|
51
|
-
"region":
|
|
52
|
-
{
|
|
53
|
-
"startLine": error["line"],
|
|
54
|
-
"startColumn": error["column"] + 1,
|
|
55
|
-
},
|
|
56
|
-
}
|
|
33
|
+
"ruleId": "type-error",
|
|
34
|
+
"ruleIndex": 0,
|
|
35
|
+
"level": "error",
|
|
36
|
+
"message": {"text": error["description"]},
|
|
37
|
+
"locations": [
|
|
38
|
+
{
|
|
39
|
+
"physicalLocation": {
|
|
40
|
+
"artifactLocation": locations[error["path"]],
|
|
41
|
+
"region": {
|
|
42
|
+
"startLine": error["line"],
|
|
43
|
+
"startColumn": error["column"] + 1,
|
|
44
|
+
},
|
|
57
45
|
}
|
|
58
|
-
|
|
46
|
+
}
|
|
47
|
+
],
|
|
59
48
|
}
|
|
60
49
|
|
|
61
50
|
|
|
@@ -63,62 +52,40 @@ def _to_sarif(errors: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
|
63
52
|
LOG.info(f"Transforming:\n{errors}")
|
|
64
53
|
locations = _locations(errors)
|
|
65
54
|
return {
|
|
66
|
-
"version":
|
|
67
|
-
|
|
68
|
-
"
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
"driver":
|
|
76
|
-
{
|
|
77
|
-
"name":
|
|
78
|
-
"Pyre",
|
|
79
|
-
"informationUri":
|
|
80
|
-
"https://www.pyre-check.org",
|
|
81
|
-
"rules":
|
|
82
|
-
[
|
|
83
|
-
{
|
|
84
|
-
"id":
|
|
85
|
-
"type-error",
|
|
86
|
-
"shortDescription":
|
|
87
|
-
{
|
|
88
|
-
"text": "Type Error"
|
|
89
|
-
},
|
|
90
|
-
"helpUri":
|
|
91
|
-
"https://www.pyre-check.org",
|
|
92
|
-
"help":
|
|
93
|
-
{
|
|
94
|
-
"text":
|
|
95
|
-
"Pyre is a type checker for Python"
|
|
96
|
-
},
|
|
97
|
-
},
|
|
98
|
-
],
|
|
99
|
-
}
|
|
100
|
-
},
|
|
101
|
-
"artifacts":
|
|
102
|
-
[
|
|
55
|
+
"version": "2.1.0",
|
|
56
|
+
"$schema": "http://json.schemastore.org/sarif-2.1.0-rtm.4",
|
|
57
|
+
"runs": [
|
|
58
|
+
{
|
|
59
|
+
"tool": {
|
|
60
|
+
"driver": {
|
|
61
|
+
"name": "Pyre",
|
|
62
|
+
"informationUri": "https://www.pyre-check.org",
|
|
63
|
+
"rules": [
|
|
103
64
|
{
|
|
104
|
-
"
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
"results":
|
|
110
|
-
[
|
|
111
|
-
_to_sarif_result(error, locations)
|
|
112
|
-
for error in errors
|
|
65
|
+
"id": "type-error",
|
|
66
|
+
"shortDescription": {"text": "Type Error"},
|
|
67
|
+
"helpUri": "https://www.pyre-check.org",
|
|
68
|
+
"help": {"text": "Pyre is a type checker for Python"},
|
|
69
|
+
}
|
|
113
70
|
],
|
|
114
|
-
|
|
115
|
-
|
|
71
|
+
}
|
|
72
|
+
},
|
|
73
|
+
"artifacts": [
|
|
74
|
+
{"location": location}
|
|
75
|
+
for location in sorted(
|
|
76
|
+
locations.values(), key=lambda location: location["index"]
|
|
77
|
+
)
|
|
78
|
+
],
|
|
79
|
+
"results": [_to_sarif_result(error, locations) for error in errors],
|
|
80
|
+
}
|
|
81
|
+
],
|
|
116
82
|
}
|
|
117
83
|
|
|
118
84
|
|
|
119
85
|
if __name__ == "__main__":
|
|
120
86
|
logging.basicConfig(
|
|
121
|
-
format="%(asctime)s [%(levelname)s] %(message)s", level=logging.DEBUG
|
|
87
|
+
format="%(asctime)s [%(levelname)s] %(message)s", level=logging.DEBUG
|
|
88
|
+
)
|
|
122
89
|
|
|
123
90
|
sarif = _to_sarif(json.load(sys.stdin))
|
|
124
91
|
json.dump(sarif, sys.stdout, indent=4)
|