macrodata-refiner 0.2.0__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {macrodata_refiner-0.2.0/src/macrodata_refiner.egg-info → macrodata_refiner-0.2.2}/PKG-INFO +21 -4
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/README.md +2 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/pyproject.toml +27 -5
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2/src/macrodata_refiner.egg-info}/PKG-INFO +21 -4
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/macrodata_refiner.egg-info/SOURCES.txt +14 -7
- macrodata_refiner-0.2.2/src/macrodata_refiner.egg-info/requires.txt +35 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/__init__.py +16 -20
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/cli/auth.py +2 -2
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/cli/ui.py +9 -0
- macrodata_refiner-0.2.2/src/refiner/execution/asyncio/__init__.py +1 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/execution/operators/row.py +10 -9
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/execution/operators/vectorized.py +5 -10
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/io/datafolder.py +55 -1
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/io/fileset.py +50 -35
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/launchers/base.py +33 -3
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/launchers/cloud.py +74 -3
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/launchers/local.py +15 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/pipeline/__init__.py +0 -6
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/pipeline/data/tabular.py +10 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/pipeline/expressions.py +86 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/pipeline/pipeline.py +21 -5
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/pipeline/planning.py +31 -6
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/pipeline/sinks/__init__.py +0 -2
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/pipeline/sinks/lerobot.py +5 -4
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/pipeline/sources/readers/base.py +28 -2
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/pipeline/sources/readers/csv.py +14 -5
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/pipeline/sources/readers/jsonl.py +11 -2
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/pipeline/sources/readers/lerobot.py +5 -0
- macrodata_refiner-0.2.2/src/refiner/pipeline/sources/readers/parquet.py +450 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/pipeline/utils/cache/decoder_cache.py +4 -2
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/platform/client/api.py +5 -13
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/platform/client/http.py +28 -4
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/platform/client/models.py +6 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/platform/manifest.py +36 -35
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/robotics/lerobot_format/__init__.py +0 -2
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/robotics/lerobot_format/row.py +3 -12
- macrodata_refiner-0.2.2/src/refiner/text/__init__.py +11 -0
- macrodata_refiner-0.2.2/src/refiner/text/commoncrawl.py +654 -0
- macrodata_refiner-0.2.2/src/refiner/utils/__init__.py +3 -0
- macrodata_refiner-0.2.2/src/refiner/utils/imports.py +75 -0
- macrodata_refiner-0.2.2/src/refiner/video/__init__.py +41 -0
- {macrodata_refiner-0.2.0/src/refiner/media → macrodata_refiner-0.2.2/src/refiner}/video/remux.py +6 -4
- {macrodata_refiner-0.2.0/src/refiner/media → macrodata_refiner-0.2.2/src/refiner}/video/transcode.py +6 -4
- {macrodata_refiner-0.2.0/src/refiner/media → macrodata_refiner-0.2.2/src/refiner}/video/writer.py +12 -14
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/worker/entrypoint.py +12 -0
- macrodata_refiner-0.2.2/src/refiner/worker/resources/gpu.py +81 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/worker/runner.py +29 -18
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/tests/test_cache.py +1 -1
- macrodata_refiner-0.2.2/tests/test_commoncrawl_text.py +1194 -0
- macrodata_refiner-0.2.2/tests/test_optional_dependencies.py +19 -0
- macrodata_refiner-0.2.0/src/macrodata_refiner.egg-info/requires.txt +0 -14
- macrodata_refiner-0.2.0/src/refiner/media/__init__.py +0 -3
- macrodata_refiner-0.2.0/src/refiner/media/video/__init__.py +0 -3
- macrodata_refiner-0.2.0/src/refiner/pipeline/sources/readers/parquet.py +0 -252
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/LICENSE +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/setup.cfg +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/macrodata_refiner.egg-info/dependency_links.txt +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/macrodata_refiner.egg-info/entry_points.txt +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/macrodata_refiner.egg-info/top_level.txt +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/cli/__init__.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/cli/main.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/execution/__init__.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/execution/asyncio/runtime.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/execution/asyncio/window.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/execution/buffer.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/execution/engine.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/execution/operators/__init__.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/execution/tracking/__init__.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/execution/tracking/shards.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/io/__init__.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/io/datafile.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/launchers/__init__.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/pipeline/data/block.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/pipeline/data/row.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/pipeline/data/shard.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/pipeline/sinks/base.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/pipeline/sinks/jsonl.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/pipeline/sinks/lerobot_reducer.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/pipeline/sinks/parquet.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/pipeline/sources/__init__.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/pipeline/sources/base.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/pipeline/sources/items.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/pipeline/sources/readers/__init__.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/pipeline/sources/readers/utils.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/pipeline/sources/task.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/pipeline/steps.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/pipeline/utils/__init__.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/pipeline/utils/cache/__init__.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/pipeline/utils/cache/file_cache.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/pipeline/utils/cache/lease_cache.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/platform/__init__.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/platform/auth.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/platform/client/__init__.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/platform/client/serialize.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/py.typed +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/robotics/__init__.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/robotics/lerobot_format/metadata/__init__.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/robotics/lerobot_format/metadata/info.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/robotics/lerobot_format/metadata/metadata.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/robotics/lerobot_format/metadata/stats.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/robotics/lerobot_format/metadata/tasks.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/robotics/lerobot_format/tabular.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/robotics/motion.py +0 -0
- {macrodata_refiner-0.2.0/src/refiner/media → macrodata_refiner-0.2.2/src/refiner}/video/types.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/worker/__init__.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/worker/context.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/worker/lifecycle/__init__.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/worker/lifecycle/base.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/worker/lifecycle/local/__init__.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/worker/lifecycle/local/claim.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/worker/lifecycle/local/files.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/worker/lifecycle/local/lifecycle.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/worker/lifecycle/platform.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/worker/metrics/__init__.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/worker/metrics/api.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/worker/metrics/context.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/worker/metrics/otel.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/worker/resources/__init__.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/worker/resources/cpu.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/worker/resources/memory.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/worker/resources/network.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/worker/workdir.py +0 -0
- {macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/tests/test_expressions.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: macrodata-refiner
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: Refiner by Macrodata Labs, a data processing framework for Machine Learning large scale datasets
|
|
5
5
|
Author: Macrodata Labs
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -12,12 +12,10 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
12
12
|
Requires-Python: >=3.10
|
|
13
13
|
Description-Content-Type: text/markdown
|
|
14
14
|
License-File: LICENSE
|
|
15
|
-
Requires-Dist: av
|
|
16
15
|
Requires-Dist: cloudpickle==3.1.2
|
|
17
16
|
Requires-Dist: fsspec
|
|
18
17
|
Requires-Dist: httpx
|
|
19
18
|
Requires-Dist: loguru
|
|
20
|
-
Requires-Dist: huggingface-hub>=1.4.1
|
|
21
19
|
Requires-Dist: opentelemetry-exporter-otlp-proto-http
|
|
22
20
|
Requires-Dist: opentelemetry-sdk
|
|
23
21
|
Requires-Dist: numpy
|
|
@@ -25,7 +23,24 @@ Requires-Dist: psutil
|
|
|
25
23
|
Requires-Dist: orjson
|
|
26
24
|
Requires-Dist: pyarrow
|
|
27
25
|
Requires-Dist: msgspec>=0.20.0
|
|
28
|
-
|
|
26
|
+
Provides-Extra: video
|
|
27
|
+
Requires-Dist: av; extra == "video"
|
|
28
|
+
Provides-Extra: robotics
|
|
29
|
+
Requires-Dist: macrodata-refiner[video]; extra == "robotics"
|
|
30
|
+
Requires-Dist: huggingface-hub>=1.4.1; extra == "robotics"
|
|
31
|
+
Requires-Dist: hf>=1.7.1; extra == "robotics"
|
|
32
|
+
Provides-Extra: text
|
|
33
|
+
Requires-Dist: warcio; extra == "text"
|
|
34
|
+
Provides-Extra: s3
|
|
35
|
+
Requires-Dist: s3fs; extra == "s3"
|
|
36
|
+
Provides-Extra: testing
|
|
37
|
+
Requires-Dist: macrodata-refiner[robotics]; extra == "testing"
|
|
38
|
+
Requires-Dist: macrodata-refiner[text]; extra == "testing"
|
|
39
|
+
Requires-Dist: macrodata-refiner[s3]; extra == "testing"
|
|
40
|
+
Requires-Dist: pytest>=8.0.0; extra == "testing"
|
|
41
|
+
Requires-Dist: pytest-cov>=5.0.0; extra == "testing"
|
|
42
|
+
Provides-Extra: all
|
|
43
|
+
Requires-Dist: macrodata-refiner[testing]; extra == "all"
|
|
29
44
|
Dynamic: license-file
|
|
30
45
|
|
|
31
46
|
<p align="center">
|
|
@@ -83,6 +98,8 @@ import refiner as mdr
|
|
|
83
98
|
)
|
|
84
99
|
```
|
|
85
100
|
|
|
101
|
+
Need cloud GPUs? See [Launchers](docs/launchers.md) for the GPU-specific cloud options.
|
|
102
|
+
|
|
86
103
|
### Local example
|
|
87
104
|
|
|
88
105
|
Launch a local pipeline:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "macrodata-refiner"
|
|
3
|
-
version = "0.2.
|
|
3
|
+
version = "0.2.2"
|
|
4
4
|
description = "Refiner by Macrodata Labs, a data processing framework for Machine Learning large scale datasets"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
license = "Apache-2.0"
|
|
@@ -16,12 +16,10 @@ authors = [
|
|
|
16
16
|
]
|
|
17
17
|
requires-python = ">=3.10"
|
|
18
18
|
dependencies = [
|
|
19
|
-
"av",
|
|
20
19
|
"cloudpickle==3.1.2",
|
|
21
20
|
"fsspec",
|
|
22
21
|
"httpx",
|
|
23
22
|
"loguru",
|
|
24
|
-
"huggingface-hub>=1.4.1",
|
|
25
23
|
"opentelemetry-exporter-otlp-proto-http",
|
|
26
24
|
"opentelemetry-sdk",
|
|
27
25
|
"numpy",
|
|
@@ -29,8 +27,33 @@ dependencies = [
|
|
|
29
27
|
"orjson",
|
|
30
28
|
"pyarrow",
|
|
31
29
|
"msgspec>=0.20.0",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
[project.optional-dependencies]
|
|
33
|
+
video = [
|
|
34
|
+
"av",
|
|
35
|
+
]
|
|
36
|
+
robotics = [
|
|
37
|
+
"macrodata-refiner[video]",
|
|
38
|
+
"huggingface-hub>=1.4.1",
|
|
32
39
|
"hf>=1.7.1",
|
|
33
40
|
]
|
|
41
|
+
text = [
|
|
42
|
+
"warcio",
|
|
43
|
+
]
|
|
44
|
+
s3 = [
|
|
45
|
+
"s3fs",
|
|
46
|
+
]
|
|
47
|
+
testing = [
|
|
48
|
+
"macrodata-refiner[robotics]",
|
|
49
|
+
"macrodata-refiner[text]",
|
|
50
|
+
"macrodata-refiner[s3]",
|
|
51
|
+
"pytest>=8.0.0",
|
|
52
|
+
"pytest-cov>=5.0.0",
|
|
53
|
+
]
|
|
54
|
+
all = [
|
|
55
|
+
"macrodata-refiner[testing]",
|
|
56
|
+
]
|
|
34
57
|
|
|
35
58
|
[project.scripts]
|
|
36
59
|
macrodata = "refiner.cli.main:main"
|
|
@@ -47,9 +70,8 @@ refiner = ["py.typed"]
|
|
|
47
70
|
|
|
48
71
|
[dependency-groups]
|
|
49
72
|
dev = [
|
|
73
|
+
"macrodata-refiner[all]",
|
|
50
74
|
"pre-commit>=4.0.0",
|
|
51
|
-
"pytest>=8.0.0",
|
|
52
|
-
"pytest-cov>=5.0.0",
|
|
53
75
|
"ruff>=0.14.10",
|
|
54
76
|
"ty>=0.0.7",
|
|
55
77
|
]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: macrodata-refiner
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: Refiner by Macrodata Labs, a data processing framework for Machine Learning large scale datasets
|
|
5
5
|
Author: Macrodata Labs
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -12,12 +12,10 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
12
12
|
Requires-Python: >=3.10
|
|
13
13
|
Description-Content-Type: text/markdown
|
|
14
14
|
License-File: LICENSE
|
|
15
|
-
Requires-Dist: av
|
|
16
15
|
Requires-Dist: cloudpickle==3.1.2
|
|
17
16
|
Requires-Dist: fsspec
|
|
18
17
|
Requires-Dist: httpx
|
|
19
18
|
Requires-Dist: loguru
|
|
20
|
-
Requires-Dist: huggingface-hub>=1.4.1
|
|
21
19
|
Requires-Dist: opentelemetry-exporter-otlp-proto-http
|
|
22
20
|
Requires-Dist: opentelemetry-sdk
|
|
23
21
|
Requires-Dist: numpy
|
|
@@ -25,7 +23,24 @@ Requires-Dist: psutil
|
|
|
25
23
|
Requires-Dist: orjson
|
|
26
24
|
Requires-Dist: pyarrow
|
|
27
25
|
Requires-Dist: msgspec>=0.20.0
|
|
28
|
-
|
|
26
|
+
Provides-Extra: video
|
|
27
|
+
Requires-Dist: av; extra == "video"
|
|
28
|
+
Provides-Extra: robotics
|
|
29
|
+
Requires-Dist: macrodata-refiner[video]; extra == "robotics"
|
|
30
|
+
Requires-Dist: huggingface-hub>=1.4.1; extra == "robotics"
|
|
31
|
+
Requires-Dist: hf>=1.7.1; extra == "robotics"
|
|
32
|
+
Provides-Extra: text
|
|
33
|
+
Requires-Dist: warcio; extra == "text"
|
|
34
|
+
Provides-Extra: s3
|
|
35
|
+
Requires-Dist: s3fs; extra == "s3"
|
|
36
|
+
Provides-Extra: testing
|
|
37
|
+
Requires-Dist: macrodata-refiner[robotics]; extra == "testing"
|
|
38
|
+
Requires-Dist: macrodata-refiner[text]; extra == "testing"
|
|
39
|
+
Requires-Dist: macrodata-refiner[s3]; extra == "testing"
|
|
40
|
+
Requires-Dist: pytest>=8.0.0; extra == "testing"
|
|
41
|
+
Requires-Dist: pytest-cov>=5.0.0; extra == "testing"
|
|
42
|
+
Provides-Extra: all
|
|
43
|
+
Requires-Dist: macrodata-refiner[testing]; extra == "all"
|
|
29
44
|
Dynamic: license-file
|
|
30
45
|
|
|
31
46
|
<p align="center">
|
|
@@ -83,6 +98,8 @@ import refiner as mdr
|
|
|
83
98
|
)
|
|
84
99
|
```
|
|
85
100
|
|
|
101
|
+
Need cloud GPUs? See [Launchers](docs/launchers.md) for the GPU-specific cloud options.
|
|
102
|
+
|
|
86
103
|
### Local example
|
|
87
104
|
|
|
88
105
|
Launch a local pipeline:
|
{macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/macrodata_refiner.egg-info/SOURCES.txt
RENAMED
|
@@ -16,6 +16,7 @@ src/refiner/cli/ui.py
|
|
|
16
16
|
src/refiner/execution/__init__.py
|
|
17
17
|
src/refiner/execution/buffer.py
|
|
18
18
|
src/refiner/execution/engine.py
|
|
19
|
+
src/refiner/execution/asyncio/__init__.py
|
|
19
20
|
src/refiner/execution/asyncio/runtime.py
|
|
20
21
|
src/refiner/execution/asyncio/window.py
|
|
21
22
|
src/refiner/execution/operators/__init__.py
|
|
@@ -31,12 +32,6 @@ src/refiner/launchers/__init__.py
|
|
|
31
32
|
src/refiner/launchers/base.py
|
|
32
33
|
src/refiner/launchers/cloud.py
|
|
33
34
|
src/refiner/launchers/local.py
|
|
34
|
-
src/refiner/media/__init__.py
|
|
35
|
-
src/refiner/media/video/__init__.py
|
|
36
|
-
src/refiner/media/video/remux.py
|
|
37
|
-
src/refiner/media/video/transcode.py
|
|
38
|
-
src/refiner/media/video/types.py
|
|
39
|
-
src/refiner/media/video/writer.py
|
|
40
35
|
src/refiner/pipeline/__init__.py
|
|
41
36
|
src/refiner/pipeline/expressions.py
|
|
42
37
|
src/refiner/pipeline/pipeline.py
|
|
@@ -86,6 +81,15 @@ src/refiner/robotics/lerobot_format/metadata/info.py
|
|
|
86
81
|
src/refiner/robotics/lerobot_format/metadata/metadata.py
|
|
87
82
|
src/refiner/robotics/lerobot_format/metadata/stats.py
|
|
88
83
|
src/refiner/robotics/lerobot_format/metadata/tasks.py
|
|
84
|
+
src/refiner/text/__init__.py
|
|
85
|
+
src/refiner/text/commoncrawl.py
|
|
86
|
+
src/refiner/utils/__init__.py
|
|
87
|
+
src/refiner/utils/imports.py
|
|
88
|
+
src/refiner/video/__init__.py
|
|
89
|
+
src/refiner/video/remux.py
|
|
90
|
+
src/refiner/video/transcode.py
|
|
91
|
+
src/refiner/video/types.py
|
|
92
|
+
src/refiner/video/writer.py
|
|
89
93
|
src/refiner/worker/__init__.py
|
|
90
94
|
src/refiner/worker/context.py
|
|
91
95
|
src/refiner/worker/entrypoint.py
|
|
@@ -104,7 +108,10 @@ src/refiner/worker/metrics/context.py
|
|
|
104
108
|
src/refiner/worker/metrics/otel.py
|
|
105
109
|
src/refiner/worker/resources/__init__.py
|
|
106
110
|
src/refiner/worker/resources/cpu.py
|
|
111
|
+
src/refiner/worker/resources/gpu.py
|
|
107
112
|
src/refiner/worker/resources/memory.py
|
|
108
113
|
src/refiner/worker/resources/network.py
|
|
109
114
|
tests/test_cache.py
|
|
110
|
-
tests/
|
|
115
|
+
tests/test_commoncrawl_text.py
|
|
116
|
+
tests/test_expressions.py
|
|
117
|
+
tests/test_optional_dependencies.py
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
cloudpickle==3.1.2
|
|
2
|
+
fsspec
|
|
3
|
+
httpx
|
|
4
|
+
loguru
|
|
5
|
+
opentelemetry-exporter-otlp-proto-http
|
|
6
|
+
opentelemetry-sdk
|
|
7
|
+
numpy
|
|
8
|
+
psutil
|
|
9
|
+
orjson
|
|
10
|
+
pyarrow
|
|
11
|
+
msgspec>=0.20.0
|
|
12
|
+
|
|
13
|
+
[all]
|
|
14
|
+
macrodata-refiner[testing]
|
|
15
|
+
|
|
16
|
+
[robotics]
|
|
17
|
+
macrodata-refiner[video]
|
|
18
|
+
huggingface-hub>=1.4.1
|
|
19
|
+
hf>=1.7.1
|
|
20
|
+
|
|
21
|
+
[s3]
|
|
22
|
+
s3fs
|
|
23
|
+
|
|
24
|
+
[testing]
|
|
25
|
+
macrodata-refiner[robotics]
|
|
26
|
+
macrodata-refiner[text]
|
|
27
|
+
macrodata-refiner[s3]
|
|
28
|
+
pytest>=8.0.0
|
|
29
|
+
pytest-cov>=5.0.0
|
|
30
|
+
|
|
31
|
+
[text]
|
|
32
|
+
warcio
|
|
33
|
+
|
|
34
|
+
[video]
|
|
35
|
+
av
|
|
@@ -1,11 +1,9 @@
|
|
|
1
|
+
import refiner.io as io
|
|
2
|
+
import refiner.pipeline as pipeline
|
|
1
3
|
import refiner.robotics as robotics
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
from refiner.media import VideoFile
|
|
4
|
+
import refiner.text as text
|
|
5
|
+
import refiner.video as video
|
|
5
6
|
from refiner.pipeline import (
|
|
6
|
-
RefinerPipeline,
|
|
7
|
-
Row,
|
|
8
|
-
Shard,
|
|
9
7
|
from_items,
|
|
10
8
|
from_source,
|
|
11
9
|
read_csv,
|
|
@@ -22,19 +20,11 @@ from refiner.worker.metrics.api import (
|
|
|
22
20
|
log_throughput,
|
|
23
21
|
register_gauge,
|
|
24
22
|
)
|
|
25
|
-
|
|
23
|
+
|
|
24
|
+
robot = robotics
|
|
26
25
|
|
|
27
26
|
__all__ = [
|
|
28
|
-
|
|
29
|
-
"LocalLauncher",
|
|
30
|
-
"LaunchStats",
|
|
31
|
-
"DataFile",
|
|
32
|
-
"DataFolder",
|
|
33
|
-
"DataFileSet",
|
|
34
|
-
"Shard",
|
|
35
|
-
"Row",
|
|
36
|
-
"Worker",
|
|
37
|
-
"WorkerRunStats",
|
|
27
|
+
# sources
|
|
38
28
|
"read_csv",
|
|
39
29
|
"read_jsonl",
|
|
40
30
|
"read_lerobot",
|
|
@@ -42,16 +32,22 @@ __all__ = [
|
|
|
42
32
|
"from_items",
|
|
43
33
|
"from_source",
|
|
44
34
|
"task",
|
|
35
|
+
# metrics
|
|
45
36
|
"log_throughput",
|
|
46
37
|
"log_gauge",
|
|
47
38
|
"log_gauges",
|
|
48
|
-
"register_gauge",
|
|
49
39
|
"log_histogram",
|
|
40
|
+
"register_gauge",
|
|
41
|
+
# expressions
|
|
50
42
|
"col",
|
|
51
43
|
"lit",
|
|
52
44
|
"coalesce",
|
|
53
45
|
"if_else",
|
|
54
|
-
|
|
55
|
-
"
|
|
46
|
+
# submodules
|
|
47
|
+
"io",
|
|
48
|
+
"pipeline",
|
|
49
|
+
"video",
|
|
50
|
+
"robot",
|
|
56
51
|
"robotics",
|
|
52
|
+
"text",
|
|
57
53
|
]
|
|
@@ -18,7 +18,7 @@ from refiner.platform.client import (
|
|
|
18
18
|
sanitize_terminal_text,
|
|
19
19
|
verify_api_key,
|
|
20
20
|
)
|
|
21
|
-
from refiner.cli.ui import display_identity, print_banner
|
|
21
|
+
from refiner.cli.ui import display_identity, print_banner, stdin_is_interactive
|
|
22
22
|
|
|
23
23
|
_TOKEN_SETTINGS_SUFFIX = "/settings/api-keys"
|
|
24
24
|
|
|
@@ -31,7 +31,7 @@ def _read_token(args: argparse.Namespace) -> str:
|
|
|
31
31
|
if args.token and args.token.strip():
|
|
32
32
|
return args.token.strip()
|
|
33
33
|
|
|
34
|
-
read_from_stdin = args.token_stdin or not
|
|
34
|
+
read_from_stdin = args.token_stdin or not stdin_is_interactive()
|
|
35
35
|
if read_from_stdin:
|
|
36
36
|
token = sys.stdin.read().strip()
|
|
37
37
|
if token:
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import sys
|
|
4
|
+
|
|
3
5
|
from refiner.platform.client import UserIdentity
|
|
4
6
|
|
|
5
7
|
ASCII_BANNER = r"""
|
|
@@ -26,3 +28,10 @@ def display_identity(user: UserIdentity) -> str:
|
|
|
26
28
|
if email:
|
|
27
29
|
return f"{label} ({email})"
|
|
28
30
|
return label
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def stdin_is_interactive() -> bool:
|
|
34
|
+
try:
|
|
35
|
+
return sys.stdin.isatty()
|
|
36
|
+
except Exception: # pragma: no cover
|
|
37
|
+
return False
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -58,15 +58,16 @@ def execute_row_steps(
|
|
|
58
58
|
)
|
|
59
59
|
|
|
60
60
|
async def _run_async_step(*, step: AsyncRowStep, row: Row) -> Row:
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
61
|
+
with set_active_step_index(step.index):
|
|
62
|
+
result = step.apply_row_async(row)
|
|
63
|
+
if inspect.isawaitable(result):
|
|
64
|
+
result = await result
|
|
65
|
+
result = cast(MapResult, result)
|
|
66
|
+
if isinstance(result, Row):
|
|
67
|
+
return result
|
|
68
|
+
if isinstance(result, dict):
|
|
69
|
+
return row.update(result)
|
|
70
|
+
raise TypeError(f"Unsupported map_async() result type: {type(result)!r}")
|
|
70
71
|
|
|
71
72
|
def _run_step(i: int, *, flush_all: bool) -> None:
|
|
72
73
|
step = ordered[i]
|
{macrodata_refiner-0.2.0 → macrodata_refiner-0.2.2}/src/refiner/execution/operators/vectorized.py
RENAMED
|
@@ -10,7 +10,7 @@ from refiner.execution.tracking.shards import (
|
|
|
10
10
|
count_table_by_shard,
|
|
11
11
|
counts_delta,
|
|
12
12
|
)
|
|
13
|
-
from refiner.pipeline.data.tabular import repeat_scalar
|
|
13
|
+
from refiner.pipeline.data.tabular import filter_table, repeat_scalar
|
|
14
14
|
from refiner.pipeline.expressions import eval_expr_arrow
|
|
15
15
|
from refiner.pipeline.steps import (
|
|
16
16
|
CastStep,
|
|
@@ -22,6 +22,7 @@ from refiner.pipeline.steps import (
|
|
|
22
22
|
VectorizedOp,
|
|
23
23
|
WithColumnsStep,
|
|
24
24
|
)
|
|
25
|
+
from refiner.worker.context import set_active_step_index
|
|
25
26
|
from refiner.worker.metrics.api import log_throughput
|
|
26
27
|
|
|
27
28
|
|
|
@@ -68,14 +69,7 @@ def apply_vectorized_op(
|
|
|
68
69
|
return out, None
|
|
69
70
|
|
|
70
71
|
if isinstance(op, FilterExprStep):
|
|
71
|
-
|
|
72
|
-
next_table = (
|
|
73
|
-
table
|
|
74
|
-
if isinstance(mask, pa.Scalar) and bool(mask.as_py())
|
|
75
|
-
else (
|
|
76
|
-
table.slice(0, 0) if isinstance(mask, pa.Scalar) else table.filter(mask)
|
|
77
|
-
)
|
|
78
|
-
)
|
|
72
|
+
next_table = filter_table(table, op.predicate)
|
|
79
73
|
next_shard_counts = count_table_by_shard(next_table)
|
|
80
74
|
for shard_id in set(shard_counts) | set(next_shard_counts):
|
|
81
75
|
previous = int(shard_counts.get(shard_id, 0))
|
|
@@ -100,7 +94,8 @@ def apply_vectorized_op(
|
|
|
100
94
|
return next_table, next_shard_counts
|
|
101
95
|
|
|
102
96
|
if isinstance(op, FnTableStep):
|
|
103
|
-
|
|
97
|
+
with set_active_step_index(op.index):
|
|
98
|
+
next_table = op.fn(table)
|
|
104
99
|
if not isinstance(next_table, pa.Table):
|
|
105
100
|
raise TypeError(
|
|
106
101
|
f"map_table() must return pa.Table, got {type(next_table)!r}"
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from collections.abc import Iterable, Mapping
|
|
1
|
+
from collections.abc import Iterable, Iterator, Mapping
|
|
2
2
|
from os import PathLike
|
|
3
3
|
from typing import IO, Any, TypeAlias, Union, cast
|
|
4
4
|
|
|
@@ -113,6 +113,34 @@ class DataFolder(DirFileSystem):
|
|
|
113
113
|
return self.abs_path(paths)
|
|
114
114
|
return [self.abs_path(p) for p in paths]
|
|
115
115
|
|
|
116
|
+
def find(self, path: str, *args, **kwargs):
|
|
117
|
+
# Avoid DirFileSystem.find(): some backends (notably HF buckets) can leak
|
|
118
|
+
# sibling prefix matches like `root-2/...` or return the bare root entry,
|
|
119
|
+
# and DirFileSystem._relpath() asserts before we can filter them out.
|
|
120
|
+
"""List paths under this folder, skipping backend results outside the base path."""
|
|
121
|
+
detail = kwargs.get("detail", False)
|
|
122
|
+
target = self._join(path.rstrip("/"))
|
|
123
|
+
ret = self.fs.find(target, *args, **kwargs)
|
|
124
|
+
target = target.rstrip("/")
|
|
125
|
+
target_prefix = target + self.fs.sep
|
|
126
|
+
alt_target = target[1:] if target.startswith(self.fs.sep) else None
|
|
127
|
+
alt_prefix = alt_target + self.fs.sep if alt_target is not None else None
|
|
128
|
+
|
|
129
|
+
def rel(p: str) -> str | None:
|
|
130
|
+
if p == target or (alt_target is not None and p == alt_target):
|
|
131
|
+
return path.rstrip("/")
|
|
132
|
+
if p.startswith(target_prefix):
|
|
133
|
+
suffix = p[len(target_prefix) :]
|
|
134
|
+
elif alt_prefix is not None and p.startswith(alt_prefix):
|
|
135
|
+
suffix = p[len(alt_prefix) :]
|
|
136
|
+
else:
|
|
137
|
+
return None
|
|
138
|
+
return suffix if path in {"", "/"} else f"{path.rstrip('/')}/{suffix}"
|
|
139
|
+
|
|
140
|
+
if detail:
|
|
141
|
+
return {r: info for p, info in ret.items() if (r := rel(p)) is not None}
|
|
142
|
+
return [r for p in ret if (r := rel(p)) is not None]
|
|
143
|
+
|
|
116
144
|
def open_files(
|
|
117
145
|
self, paths: Iterable[str], mode: str = "rb", **kwargs
|
|
118
146
|
) -> list[IO[Any]]:
|
|
@@ -159,3 +187,29 @@ class DataFolder(DirFileSystem):
|
|
|
159
187
|
|
|
160
188
|
def files(self, relpaths: Iterable[str]) -> list[DataFile]:
|
|
161
189
|
return [self.file(p) for p in relpaths]
|
|
190
|
+
|
|
191
|
+
def iter_files_with_sizes(
|
|
192
|
+
self, *, recursive: bool = False, **kwargs: Any
|
|
193
|
+
) -> Iterator[tuple[DataFile, int | None]]:
|
|
194
|
+
if recursive:
|
|
195
|
+
found = self.find("", detail=True, **kwargs)
|
|
196
|
+
items: Iterable[tuple[str, Mapping[str, Any]]] = found.items()
|
|
197
|
+
else:
|
|
198
|
+
items = (
|
|
199
|
+
(str(info["name"]), info)
|
|
200
|
+
for info in self.ls("", detail=True, **kwargs)
|
|
201
|
+
if isinstance(info, Mapping)
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
for relpath, info in sorted(items, key=lambda item: item[0]):
|
|
205
|
+
info_dict = dict(info)
|
|
206
|
+
if info_dict.get("type") != "file":
|
|
207
|
+
continue
|
|
208
|
+
size = info_dict.get("size")
|
|
209
|
+
yield self.file(relpath), size if isinstance(size, int) else None
|
|
210
|
+
|
|
211
|
+
def iter_files(
|
|
212
|
+
self, *, recursive: bool = False, **kwargs: Any
|
|
213
|
+
) -> Iterator[DataFile]:
|
|
214
|
+
for file, _ in self.iter_files_with_sizes(recursive=recursive, **kwargs):
|
|
215
|
+
yield file
|
|
@@ -170,62 +170,77 @@ class DataFileSet:
|
|
|
170
170
|
exts = tuple(e.lower() for e in self.extensions)
|
|
171
171
|
seen: set[tuple[int, str]] = set()
|
|
172
172
|
expanded: list[tuple[DataFile, ...]] = []
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
173
|
+
sizes = dict(self._sizes)
|
|
174
|
+
|
|
175
|
+
def _append_file(
|
|
176
|
+
out: list[DataFile],
|
|
177
|
+
file: DataFile,
|
|
178
|
+
*,
|
|
179
|
+
size: int | None = None,
|
|
180
|
+
apply_extensions: bool = True,
|
|
181
|
+
) -> None:
|
|
182
|
+
if apply_extensions and exts and not file.path.lower().endswith(exts):
|
|
176
183
|
return
|
|
177
184
|
key = (id(file.fs), file.path)
|
|
178
185
|
if key in seen:
|
|
179
186
|
return
|
|
180
187
|
seen.add(key)
|
|
181
188
|
out.append(file)
|
|
189
|
+
if size is not None:
|
|
190
|
+
sizes[(len(expanded), file.abs_path())] = int(size)
|
|
182
191
|
|
|
183
192
|
for entry in self.entries:
|
|
184
193
|
files: list[DataFile] = []
|
|
194
|
+
if isinstance(entry, _PathSource) and not glob.has_magic(entry.path):
|
|
195
|
+
try:
|
|
196
|
+
info = entry.fs.info(entry.path)
|
|
197
|
+
except FileNotFoundError:
|
|
198
|
+
raise FileNotFoundError(
|
|
199
|
+
f"Could not resolve input: {entry.fs.unstrip_protocol(entry.path)!r}"
|
|
200
|
+
)
|
|
201
|
+
item_type = info.get("type")
|
|
202
|
+
if item_type == "directory":
|
|
203
|
+
entry = DataFolder(path=entry.path, fs=entry.fs)
|
|
204
|
+
elif item_type == "file":
|
|
205
|
+
entry = DataFile(fs=entry.fs, path=entry.path)
|
|
206
|
+
else:
|
|
207
|
+
raise TypeError(
|
|
208
|
+
f"Unsupported file type {item_type!r} for input: "
|
|
209
|
+
f"{entry.fs.unstrip_protocol(entry.path)!r}"
|
|
210
|
+
)
|
|
211
|
+
|
|
185
212
|
if isinstance(entry, DataFile):
|
|
186
|
-
_append_file(files, entry)
|
|
213
|
+
_append_file(files, entry, apply_extensions=False)
|
|
187
214
|
elif isinstance(entry, DataFolder):
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
if self.recursive
|
|
191
|
-
else sorted(
|
|
192
|
-
e["name"] if isinstance(e, dict) else e
|
|
193
|
-
for e in entry.ls("", detail=True)
|
|
194
|
-
if not isinstance(e, dict) or e.get("type") == "file"
|
|
195
|
-
)
|
|
196
|
-
)
|
|
197
|
-
for path in paths:
|
|
198
|
-
_append_file(files, entry.file(path))
|
|
215
|
+
for file, size in entry.iter_files_with_sizes(recursive=self.recursive):
|
|
216
|
+
_append_file(files, file, size=size)
|
|
199
217
|
else:
|
|
200
218
|
next_fs, path = entry.fs, entry.path
|
|
201
219
|
if glob.has_magic(path):
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
220
|
+
matched = next_fs.glob(path, detail=True)
|
|
221
|
+
items = matched.items()
|
|
222
|
+
for expanded_path, info in sorted(items):
|
|
223
|
+
if not isinstance(expanded_path, str) or not isinstance(
|
|
224
|
+
info, Mapping
|
|
225
|
+
):
|
|
226
|
+
continue
|
|
227
|
+
if info.get("type") != "file":
|
|
228
|
+
continue
|
|
229
|
+
size = info.get("size")
|
|
230
|
+
_append_file(
|
|
231
|
+
files,
|
|
232
|
+
DataFile(fs=next_fs, path=expanded_path),
|
|
233
|
+
size=size if isinstance(size, int) else None,
|
|
214
234
|
)
|
|
215
|
-
for expanded_path in paths:
|
|
216
|
-
_append_file(
|
|
217
|
-
files, DataFile(fs=next_fs, path=expanded_path)
|
|
218
|
-
)
|
|
219
|
-
else:
|
|
220
|
-
_append_file(files, DataFile(fs=next_fs, path=path))
|
|
221
235
|
else:
|
|
222
|
-
raise
|
|
223
|
-
|
|
236
|
+
raise AssertionError(
|
|
237
|
+
"non-glob _PathSource should have been resolved"
|
|
224
238
|
)
|
|
225
239
|
expanded.append(tuple(files))
|
|
226
240
|
|
|
227
241
|
out = tuple(expanded)
|
|
228
242
|
object.__setattr__(self, "_expanded_sources", out)
|
|
243
|
+
object.__setattr__(self, "_sizes", sizes)
|
|
229
244
|
return out
|
|
230
245
|
|
|
231
246
|
def resolve_file(self, source_index: int, path: str) -> DataFile:
|