macrodata-refiner 0.1.0__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- macrodata_refiner-0.2.0/PKG-INFO +151 -0
- macrodata_refiner-0.2.0/README.md +121 -0
- {macrodata_refiner-0.1.0 → macrodata_refiner-0.2.0}/pyproject.toml +5 -1
- macrodata_refiner-0.2.0/src/macrodata_refiner.egg-info/PKG-INFO +151 -0
- macrodata_refiner-0.2.0/src/macrodata_refiner.egg-info/SOURCES.txt +110 -0
- {macrodata_refiner-0.1.0 → macrodata_refiner-0.2.0}/src/macrodata_refiner.egg-info/requires.txt +4 -0
- macrodata_refiner-0.2.0/src/refiner/__init__.py +57 -0
- {macrodata_refiner-0.1.0 → macrodata_refiner-0.2.0}/src/refiner/cli/auth.py +18 -23
- {macrodata_refiner-0.1.0 → macrodata_refiner-0.2.0}/src/refiner/cli/main.py +1 -1
- {macrodata_refiner-0.1.0 → macrodata_refiner-0.2.0}/src/refiner/cli/ui.py +6 -4
- macrodata_refiner-0.2.0/src/refiner/execution/__init__.py +1 -0
- macrodata_refiner-0.2.0/src/refiner/execution/asyncio/runtime.py +112 -0
- macrodata_refiner-0.2.0/src/refiner/execution/asyncio/window.py +91 -0
- macrodata_refiner-0.1.0/src/refiner/runtime/execution/row_queue.py → macrodata_refiner-0.2.0/src/refiner/execution/buffer.py +3 -3
- {macrodata_refiner-0.1.0/src/refiner/runtime → macrodata_refiner-0.2.0/src/refiner}/execution/engine.py +60 -55
- macrodata_refiner-0.2.0/src/refiner/execution/operators/__init__.py +1 -0
- macrodata_refiner-0.2.0/src/refiner/execution/operators/row.py +186 -0
- macrodata_refiner-0.2.0/src/refiner/execution/operators/vectorized.py +148 -0
- macrodata_refiner-0.2.0/src/refiner/execution/tracking/__init__.py +1 -0
- macrodata_refiner-0.2.0/src/refiner/execution/tracking/shards.py +106 -0
- macrodata_refiner-0.2.0/src/refiner/io/__init__.py +9 -0
- {macrodata_refiner-0.1.0 → macrodata_refiner-0.2.0}/src/refiner/io/datafile.py +25 -6
- {macrodata_refiner-0.1.0 → macrodata_refiner-0.2.0}/src/refiner/io/datafolder.py +25 -9
- macrodata_refiner-0.2.0/src/refiner/io/fileset.py +254 -0
- macrodata_refiner-0.2.0/src/refiner/launchers/__init__.py +11 -0
- macrodata_refiner-0.2.0/src/refiner/launchers/base.py +185 -0
- macrodata_refiner-0.2.0/src/refiner/launchers/cloud.py +139 -0
- macrodata_refiner-0.2.0/src/refiner/launchers/local.py +321 -0
- macrodata_refiner-0.2.0/src/refiner/media/__init__.py +3 -0
- macrodata_refiner-0.2.0/src/refiner/media/video/__init__.py +3 -0
- macrodata_refiner-0.2.0/src/refiner/media/video/remux.py +239 -0
- macrodata_refiner-0.2.0/src/refiner/media/video/transcode.py +243 -0
- macrodata_refiner-0.2.0/src/refiner/media/video/types.py +23 -0
- macrodata_refiner-0.2.0/src/refiner/media/video/writer.py +250 -0
- macrodata_refiner-0.2.0/src/refiner/pipeline/__init__.py +31 -0
- macrodata_refiner-0.2.0/src/refiner/pipeline/data/block.py +129 -0
- {macrodata_refiner-0.1.0/src/refiner/sources → macrodata_refiner-0.2.0/src/refiner/pipeline/data}/row.py +125 -17
- macrodata_refiner-0.2.0/src/refiner/pipeline/data/shard.py +264 -0
- macrodata_refiner-0.2.0/src/refiner/pipeline/data/tabular.py +252 -0
- {macrodata_refiner-0.1.0/src/refiner → macrodata_refiner-0.2.0/src/refiner/pipeline}/expressions.py +33 -2
- macrodata_refiner-0.2.0/src/refiner/pipeline/pipeline.py +587 -0
- {macrodata_refiner-0.1.0/src/refiner/runtime → macrodata_refiner-0.2.0/src/refiner/pipeline}/planning.py +219 -17
- macrodata_refiner-0.2.0/src/refiner/pipeline/sinks/__init__.py +14 -0
- macrodata_refiner-0.2.0/src/refiner/pipeline/sinks/base.py +76 -0
- macrodata_refiner-0.2.0/src/refiner/pipeline/sinks/jsonl.py +81 -0
- macrodata_refiner-0.2.0/src/refiner/pipeline/sinks/lerobot.py +555 -0
- macrodata_refiner-0.2.0/src/refiner/pipeline/sinks/lerobot_reducer.py +276 -0
- macrodata_refiner-0.2.0/src/refiner/pipeline/sinks/parquet.py +78 -0
- macrodata_refiner-0.2.0/src/refiner/pipeline/sources/__init__.py +17 -0
- {macrodata_refiner-0.1.0/src/refiner → macrodata_refiner-0.2.0/src/refiner/pipeline}/sources/base.py +14 -16
- macrodata_refiner-0.2.0/src/refiner/pipeline/sources/items.py +70 -0
- macrodata_refiner-0.2.0/src/refiner/pipeline/sources/readers/__init__.py +15 -0
- macrodata_refiner-0.2.0/src/refiner/pipeline/sources/readers/base.py +264 -0
- macrodata_refiner-0.2.0/src/refiner/pipeline/sources/readers/csv.py +204 -0
- macrodata_refiner-0.2.0/src/refiner/pipeline/sources/readers/jsonl.py +88 -0
- macrodata_refiner-0.2.0/src/refiner/pipeline/sources/readers/lerobot.py +283 -0
- macrodata_refiner-0.2.0/src/refiner/pipeline/sources/readers/parquet.py +252 -0
- {macrodata_refiner-0.1.0/src/refiner → macrodata_refiner-0.2.0/src/refiner/pipeline}/sources/readers/utils.py +1 -11
- {macrodata_refiner-0.1.0/src/refiner → macrodata_refiner-0.2.0/src/refiner/pipeline}/sources/task.py +8 -9
- macrodata_refiner-0.1.0/src/refiner/processors/step.py → macrodata_refiner-0.2.0/src/refiner/pipeline/steps.py +67 -41
- macrodata_refiner-0.2.0/src/refiner/pipeline/utils/cache/decoder_cache.py +192 -0
- macrodata_refiner-0.2.0/src/refiner/pipeline/utils/cache/file_cache.py +233 -0
- macrodata_refiner-0.2.0/src/refiner/pipeline/utils/cache/lease_cache.py +276 -0
- macrodata_refiner-0.2.0/src/refiner/platform/__init__.py +1 -0
- macrodata_refiner-0.2.0/src/refiner/platform/client/__init__.py +56 -0
- macrodata_refiner-0.2.0/src/refiner/platform/client/api.py +271 -0
- {macrodata_refiner-0.1.0/src/refiner/platform → macrodata_refiner-0.2.0/src/refiner/platform/client}/http.py +9 -12
- macrodata_refiner-0.2.0/src/refiner/platform/client/models.py +191 -0
- {macrodata_refiner-0.1.0/src/refiner/platform/cloud → macrodata_refiner-0.2.0/src/refiner/platform/client}/serialize.py +1 -1
- {macrodata_refiner-0.1.0 → macrodata_refiner-0.2.0}/src/refiner/platform/manifest.py +25 -5
- macrodata_refiner-0.2.0/src/refiner/py.typed +0 -0
- macrodata_refiner-0.2.0/src/refiner/robotics/__init__.py +25 -0
- macrodata_refiner-0.2.0/src/refiner/robotics/lerobot_format/__init__.py +49 -0
- macrodata_refiner-0.2.0/src/refiner/robotics/lerobot_format/metadata/__init__.py +41 -0
- macrodata_refiner-0.2.0/src/refiner/robotics/lerobot_format/metadata/info.py +250 -0
- macrodata_refiner-0.2.0/src/refiner/robotics/lerobot_format/metadata/metadata.py +32 -0
- macrodata_refiner-0.2.0/src/refiner/robotics/lerobot_format/metadata/stats.py +686 -0
- macrodata_refiner-0.2.0/src/refiner/robotics/lerobot_format/metadata/tasks.py +151 -0
- macrodata_refiner-0.2.0/src/refiner/robotics/lerobot_format/row.py +297 -0
- macrodata_refiner-0.2.0/src/refiner/robotics/lerobot_format/tabular.py +134 -0
- macrodata_refiner-0.2.0/src/refiner/robotics/motion.py +165 -0
- macrodata_refiner-0.2.0/src/refiner/worker/__init__.py +1 -0
- macrodata_refiner-0.2.0/src/refiner/worker/context.py +121 -0
- macrodata_refiner-0.2.0/src/refiner/worker/entrypoint.py +101 -0
- macrodata_refiner-0.2.0/src/refiner/worker/lifecycle/__init__.py +5 -0
- macrodata_refiner-0.2.0/src/refiner/worker/lifecycle/base.py +25 -0
- macrodata_refiner-0.2.0/src/refiner/worker/lifecycle/local/__init__.py +3 -0
- macrodata_refiner-0.2.0/src/refiner/worker/lifecycle/local/claim.py +147 -0
- macrodata_refiner-0.2.0/src/refiner/worker/lifecycle/local/files.py +41 -0
- macrodata_refiner-0.2.0/src/refiner/worker/lifecycle/local/lifecycle.py +308 -0
- macrodata_refiner-0.2.0/src/refiner/worker/lifecycle/platform.py +99 -0
- macrodata_refiner-0.2.0/src/refiner/worker/metrics/__init__.py +1 -0
- macrodata_refiner-0.1.0/src/refiner/metrics.py → macrodata_refiner-0.2.0/src/refiner/worker/metrics/api.py +38 -13
- macrodata_refiner-0.1.0/src/refiner/runtime/metrics_context.py → macrodata_refiner-0.2.0/src/refiner/worker/metrics/context.py +23 -19
- macrodata_refiner-0.1.0/src/refiner/platform/telemetry/emitter.py → macrodata_refiner-0.2.0/src/refiner/worker/metrics/otel.py +147 -50
- macrodata_refiner-0.2.0/src/refiner/worker/resources/__init__.py +1 -0
- macrodata_refiner-0.2.0/src/refiner/worker/resources/cpu.py +123 -0
- macrodata_refiner-0.2.0/src/refiner/worker/resources/memory.py +63 -0
- macrodata_refiner-0.2.0/src/refiner/worker/resources/network.py +27 -0
- macrodata_refiner-0.2.0/src/refiner/worker/runner.py +391 -0
- macrodata_refiner-0.2.0/src/refiner/worker/workdir.py +22 -0
- macrodata_refiner-0.2.0/tests/test_cache.py +175 -0
- {macrodata_refiner-0.1.0 → macrodata_refiner-0.2.0}/tests/test_expressions.py +6 -0
- macrodata_refiner-0.1.0/PKG-INFO +0 -25
- macrodata_refiner-0.1.0/src/macrodata_refiner.egg-info/PKG-INFO +0 -25
- macrodata_refiner-0.1.0/src/macrodata_refiner.egg-info/SOURCES.txt +0 -76
- macrodata_refiner-0.1.0/src/refiner/__init__.py +0 -65
- macrodata_refiner-0.1.0/src/refiner/io/__init__.py +0 -9
- macrodata_refiner-0.1.0/src/refiner/io/fileset.py +0 -172
- macrodata_refiner-0.1.0/src/refiner/ledger/__init__.py +0 -10
- macrodata_refiner-0.1.0/src/refiner/ledger/backend/__init__.py +0 -10
- macrodata_refiner-0.1.0/src/refiner/ledger/backend/base.py +0 -60
- macrodata_refiner-0.1.0/src/refiner/ledger/backend/cloud.py +0 -85
- macrodata_refiner-0.1.0/src/refiner/ledger/backend/fs.py +0 -267
- macrodata_refiner-0.1.0/src/refiner/ledger/config.py +0 -49
- macrodata_refiner-0.1.0/src/refiner/ledger/policy.py +0 -144
- macrodata_refiner-0.1.0/src/refiner/ledger/shard.py +0 -126
- macrodata_refiner-0.1.0/src/refiner/pipeline.py +0 -381
- macrodata_refiner-0.1.0/src/refiner/platform/__init__.py +0 -28
- macrodata_refiner-0.1.0/src/refiner/platform/client.py +0 -303
- macrodata_refiner-0.1.0/src/refiner/platform/cloud/__init__.py +0 -1
- macrodata_refiner-0.1.0/src/refiner/platform/cloud/models.py +0 -73
- macrodata_refiner-0.1.0/src/refiner/platform/config.py +0 -14
- macrodata_refiner-0.1.0/src/refiner/platform/telemetry/__init__.py +0 -5
- macrodata_refiner-0.1.0/src/refiner/platform/telemetry/metric_helpers.py +0 -117
- macrodata_refiner-0.1.0/src/refiner/processors/__init__.py +0 -21
- macrodata_refiner-0.1.0/src/refiner/runtime/__init__.py +0 -1
- macrodata_refiner-0.1.0/src/refiner/runtime/errors.py +0 -8
- macrodata_refiner-0.1.0/src/refiner/runtime/execution/__init__.py +0 -19
- macrodata_refiner-0.1.0/src/refiner/runtime/execution/row_steps.py +0 -104
- macrodata_refiner-0.1.0/src/refiner/runtime/execution/vectorized.py +0 -114
- macrodata_refiner-0.1.0/src/refiner/runtime/launchers/__init__.py +0 -11
- macrodata_refiner-0.1.0/src/refiner/runtime/launchers/base.py +0 -152
- macrodata_refiner-0.1.0/src/refiner/runtime/launchers/cloud.py +0 -92
- macrodata_refiner-0.1.0/src/refiner/runtime/launchers/local.py +0 -294
- macrodata_refiner-0.1.0/src/refiner/runtime/resources/__init__.py +0 -10
- macrodata_refiner-0.1.0/src/refiner/runtime/resources/cpu.py +0 -52
- macrodata_refiner-0.1.0/src/refiner/runtime/resources/memory.py +0 -39
- macrodata_refiner-0.1.0/src/refiner/runtime/types.py +0 -16
- macrodata_refiner-0.1.0/src/refiner/runtime/worker/__init__.py +0 -3
- macrodata_refiner-0.1.0/src/refiner/runtime/worker/entrypoint.py +0 -142
- macrodata_refiner-0.1.0/src/refiner/runtime/worker/runner.py +0 -242
- macrodata_refiner-0.1.0/src/refiner/sources/__init__.py +0 -23
- macrodata_refiner-0.1.0/src/refiner/sources/items.py +0 -69
- macrodata_refiner-0.1.0/src/refiner/sources/readers/__init__.py +0 -17
- macrodata_refiner-0.1.0/src/refiner/sources/readers/base.py +0 -146
- macrodata_refiner-0.1.0/src/refiner/sources/readers/csv.py +0 -307
- macrodata_refiner-0.1.0/src/refiner/sources/readers/jsonl.py +0 -111
- macrodata_refiner-0.1.0/src/refiner/sources/readers/parquet.py +0 -228
- {macrodata_refiner-0.1.0 → macrodata_refiner-0.2.0}/LICENSE +0 -0
- {macrodata_refiner-0.1.0 → macrodata_refiner-0.2.0}/setup.cfg +0 -0
- {macrodata_refiner-0.1.0 → macrodata_refiner-0.2.0}/src/macrodata_refiner.egg-info/dependency_links.txt +0 -0
- {macrodata_refiner-0.1.0 → macrodata_refiner-0.2.0}/src/macrodata_refiner.egg-info/entry_points.txt +0 -0
- {macrodata_refiner-0.1.0 → macrodata_refiner-0.2.0}/src/macrodata_refiner.egg-info/top_level.txt +0 -0
- {macrodata_refiner-0.1.0 → macrodata_refiner-0.2.0}/src/refiner/cli/__init__.py +0 -0
- /macrodata_refiner-0.1.0/README.md → /macrodata_refiner-0.2.0/src/refiner/pipeline/utils/__init__.py +0 -0
- /macrodata_refiner-0.1.0/src/refiner/py.typed → /macrodata_refiner-0.2.0/src/refiner/pipeline/utils/cache/__init__.py +0 -0
- {macrodata_refiner-0.1.0 → macrodata_refiner-0.2.0}/src/refiner/platform/auth.py +0 -0
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: macrodata-refiner
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Refiner by Macrodata Labs, a data processing framework for Machine Learning large scale datasets
|
|
5
|
+
Author: Macrodata Labs
|
|
6
|
+
License-Expression: Apache-2.0
|
|
7
|
+
Classifier: Operating System :: OS Independent
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Requires-Python: >=3.10
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
License-File: LICENSE
|
|
15
|
+
Requires-Dist: av
|
|
16
|
+
Requires-Dist: cloudpickle==3.1.2
|
|
17
|
+
Requires-Dist: fsspec
|
|
18
|
+
Requires-Dist: httpx
|
|
19
|
+
Requires-Dist: loguru
|
|
20
|
+
Requires-Dist: huggingface-hub>=1.4.1
|
|
21
|
+
Requires-Dist: opentelemetry-exporter-otlp-proto-http
|
|
22
|
+
Requires-Dist: opentelemetry-sdk
|
|
23
|
+
Requires-Dist: numpy
|
|
24
|
+
Requires-Dist: psutil
|
|
25
|
+
Requires-Dist: orjson
|
|
26
|
+
Requires-Dist: pyarrow
|
|
27
|
+
Requires-Dist: msgspec>=0.20.0
|
|
28
|
+
Requires-Dist: hf>=1.7.1
|
|
29
|
+
Dynamic: license-file
|
|
30
|
+
|
|
31
|
+
<p align="center">
|
|
32
|
+
<img src="https://macrodata.co/logo.svg" alt="Macrodata" width="180">
|
|
33
|
+
</p>
|
|
34
|
+
|
|
35
|
+
<h1 align="center">Macrodata Refiner</h1>
|
|
36
|
+
|
|
37
|
+
Refiner is an open-source engine for turning raw, unstructured, and multimodal data into **high-quality datasets** for large model training.
|
|
38
|
+
|
|
39
|
+
It replaces the brittle scripts and stitched-together data tooling that teams still use for training data work, while offering much better support for multimodal data, robotics workflows, and model-based processing.
|
|
40
|
+
|
|
41
|
+
It also plugs into the Macrodata platform, which gives you visibility into what is happening to your data while pipelines run: job and shard lifecycle, logs, metrics, manifests, and pipeline behavior. The same code can run locally for development and then scale out through Macrodata's elastic serverless cloud.
|
|
42
|
+
|
|
43
|
+
## Quickstart
|
|
44
|
+
|
|
45
|
+
Install:
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
pip install macrodata-refiner
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Create a Macrodata API key:
|
|
52
|
+
|
|
53
|
+
- https://macrodata.co/settings/api-keys
|
|
54
|
+
|
|
55
|
+
Log in:
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
macrodata login
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### Cloud example
|
|
62
|
+
|
|
63
|
+
Launch a robotics pipeline on Macrodata Cloud.
|
|
64
|
+
|
|
65
|
+
This requires a valid API key.
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
import refiner as mdr
|
|
69
|
+
|
|
70
|
+
(
|
|
71
|
+
mdr.read_lerobot("hf://datasets/macrodata/aloha_static_battery_ep005_009")
|
|
72
|
+
.map(
|
|
73
|
+
mdr.robotics.motion_trim(
|
|
74
|
+
threshold=0.001,
|
|
75
|
+
pad_frames=5,
|
|
76
|
+
)
|
|
77
|
+
)
|
|
78
|
+
.write_lerobot("hf://buckets/macrodata/test_bucket/aloha_motion")
|
|
79
|
+
.launch_cloud(
|
|
80
|
+
name="motion_trim",
|
|
81
|
+
num_workers=4,
|
|
82
|
+
)
|
|
83
|
+
)
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### Local example
|
|
87
|
+
|
|
88
|
+
Launch a local pipeline:
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
import refiner as mdr
|
|
92
|
+
|
|
93
|
+
def add_preview(row):
|
|
94
|
+
return row.update(
|
|
95
|
+
preview=" ".join(row["text"].split()[:20]),
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
(
|
|
99
|
+
mdr.read_jsonl("input/*.jsonl")
|
|
100
|
+
.filter(mdr.col("lang") == "en")
|
|
101
|
+
.with_columns(
|
|
102
|
+
text=mdr.col("text").str.strip(),
|
|
103
|
+
text_len=mdr.col("text").str.len(),
|
|
104
|
+
)
|
|
105
|
+
.map(add_preview)
|
|
106
|
+
.write_parquet("s3://my-bucket/english-cleanup/")
|
|
107
|
+
.launch_local(
|
|
108
|
+
name="english-cleanup",
|
|
109
|
+
num_workers=2,
|
|
110
|
+
)
|
|
111
|
+
)
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
`pip install` gives you:
|
|
115
|
+
|
|
116
|
+
- the Python package as `refiner`
|
|
117
|
+
- the CLI as `macrodata`
|
|
118
|
+
|
|
119
|
+
## Batteries included
|
|
120
|
+
|
|
121
|
+
- training-data-first pipeline primitives instead of generic ETL abstractions
|
|
122
|
+
- multimodal processing, with robotics support today
|
|
123
|
+
- a lot of built-in readers, transforms, sinks, and lifecycle/runtime machinery so you do not have to rebuild the same scaffolding in scripts
|
|
124
|
+
- access to any storage backend supported by `fsspec` (S3, GCP, Hugging Face, etc.)
|
|
125
|
+
- local execution for development and elastic cloud execution for large runs
|
|
126
|
+
- built-in observability through the Macrodata platform, so you can inspect how your data is changing instead of debugging blindly after the fact
|
|
127
|
+
|
|
128
|
+
## Docs
|
|
129
|
+
|
|
130
|
+
Getting started:
|
|
131
|
+
|
|
132
|
+
- [Pipeline basics](docs/pipeline-basics.md)
|
|
133
|
+
- [Launchers](docs/launchers.md)
|
|
134
|
+
- [CLI](docs/cli.md)
|
|
135
|
+
|
|
136
|
+
Core concepts:
|
|
137
|
+
|
|
138
|
+
- [Reading and writing data](docs/reading-and-writing.md)
|
|
139
|
+
- [Transforms](docs/transforms.md)
|
|
140
|
+
- [Expressions](docs/expressions.md)
|
|
141
|
+
- [In-process debugging](docs/in-process-debugging.md)
|
|
142
|
+
- [Task pipelines](docs/task-pipelines.md)
|
|
143
|
+
|
|
144
|
+
Modalities and platform:
|
|
145
|
+
|
|
146
|
+
- [Robotics](docs/robotics.md)
|
|
147
|
+
- [Observability](docs/observability.md)
|
|
148
|
+
|
|
149
|
+
## Community
|
|
150
|
+
|
|
151
|
+
- join the Macrodata Discord: https://discord.gg/S8kZtmBR2x
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<img src="https://macrodata.co/logo.svg" alt="Macrodata" width="180">
|
|
3
|
+
</p>
|
|
4
|
+
|
|
5
|
+
<h1 align="center">Macrodata Refiner</h1>
|
|
6
|
+
|
|
7
|
+
Refiner is an open-source engine for turning raw, unstructured, and multimodal data into **high-quality datasets** for large model training.
|
|
8
|
+
|
|
9
|
+
It replaces the brittle scripts and stitched-together data tooling that teams still use for training data work, while offering much better support for multimodal data, robotics workflows, and model-based processing.
|
|
10
|
+
|
|
11
|
+
It also plugs into the Macrodata platform, which gives you visibility into what is happening to your data while pipelines run: job and shard lifecycle, logs, metrics, manifests, and pipeline behavior. The same code can run locally for development and then scale out through Macrodata's elastic serverless cloud.
|
|
12
|
+
|
|
13
|
+
## Quickstart
|
|
14
|
+
|
|
15
|
+
Install:
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
pip install macrodata-refiner
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
Create a Macrodata API key:
|
|
22
|
+
|
|
23
|
+
- https://macrodata.co/settings/api-keys
|
|
24
|
+
|
|
25
|
+
Log in:
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
macrodata login
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
### Cloud example
|
|
32
|
+
|
|
33
|
+
Launch a robotics pipeline on Macrodata Cloud.
|
|
34
|
+
|
|
35
|
+
This requires a valid API key.
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
import refiner as mdr
|
|
39
|
+
|
|
40
|
+
(
|
|
41
|
+
mdr.read_lerobot("hf://datasets/macrodata/aloha_static_battery_ep005_009")
|
|
42
|
+
.map(
|
|
43
|
+
mdr.robotics.motion_trim(
|
|
44
|
+
threshold=0.001,
|
|
45
|
+
pad_frames=5,
|
|
46
|
+
)
|
|
47
|
+
)
|
|
48
|
+
.write_lerobot("hf://buckets/macrodata/test_bucket/aloha_motion")
|
|
49
|
+
.launch_cloud(
|
|
50
|
+
name="motion_trim",
|
|
51
|
+
num_workers=4,
|
|
52
|
+
)
|
|
53
|
+
)
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### Local example
|
|
57
|
+
|
|
58
|
+
Launch a local pipeline:
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
import refiner as mdr
|
|
62
|
+
|
|
63
|
+
def add_preview(row):
|
|
64
|
+
return row.update(
|
|
65
|
+
preview=" ".join(row["text"].split()[:20]),
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
(
|
|
69
|
+
mdr.read_jsonl("input/*.jsonl")
|
|
70
|
+
.filter(mdr.col("lang") == "en")
|
|
71
|
+
.with_columns(
|
|
72
|
+
text=mdr.col("text").str.strip(),
|
|
73
|
+
text_len=mdr.col("text").str.len(),
|
|
74
|
+
)
|
|
75
|
+
.map(add_preview)
|
|
76
|
+
.write_parquet("s3://my-bucket/english-cleanup/")
|
|
77
|
+
.launch_local(
|
|
78
|
+
name="english-cleanup",
|
|
79
|
+
num_workers=2,
|
|
80
|
+
)
|
|
81
|
+
)
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
`pip install` gives you:
|
|
85
|
+
|
|
86
|
+
- the Python package as `refiner`
|
|
87
|
+
- the CLI as `macrodata`
|
|
88
|
+
|
|
89
|
+
## Batteries included
|
|
90
|
+
|
|
91
|
+
- training-data-first pipeline primitives instead of generic ETL abstractions
|
|
92
|
+
- multimodal processing, with robotics support today
|
|
93
|
+
- a lot of built-in readers, transforms, sinks, and lifecycle/runtime machinery so you do not have to rebuild the same scaffolding in scripts
|
|
94
|
+
- access to any storage backend supported by `fsspec` (S3, GCP, Hugging Face, etc.)
|
|
95
|
+
- local execution for development and elastic cloud execution for large runs
|
|
96
|
+
- built-in observability through the Macrodata platform, so you can inspect how your data is changing instead of debugging blindly after the fact
|
|
97
|
+
|
|
98
|
+
## Docs
|
|
99
|
+
|
|
100
|
+
Getting started:
|
|
101
|
+
|
|
102
|
+
- [Pipeline basics](docs/pipeline-basics.md)
|
|
103
|
+
- [Launchers](docs/launchers.md)
|
|
104
|
+
- [CLI](docs/cli.md)
|
|
105
|
+
|
|
106
|
+
Core concepts:
|
|
107
|
+
|
|
108
|
+
- [Reading and writing data](docs/reading-and-writing.md)
|
|
109
|
+
- [Transforms](docs/transforms.md)
|
|
110
|
+
- [Expressions](docs/expressions.md)
|
|
111
|
+
- [In-process debugging](docs/in-process-debugging.md)
|
|
112
|
+
- [Task pipelines](docs/task-pipelines.md)
|
|
113
|
+
|
|
114
|
+
Modalities and platform:
|
|
115
|
+
|
|
116
|
+
- [Robotics](docs/robotics.md)
|
|
117
|
+
- [Observability](docs/observability.md)
|
|
118
|
+
|
|
119
|
+
## Community
|
|
120
|
+
|
|
121
|
+
- join the Macrodata Discord: https://discord.gg/S8kZtmBR2x
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "macrodata-refiner"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.2.0"
|
|
4
4
|
description = "Refiner by Macrodata Labs, a data processing framework for Machine Learning large scale datasets"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
license = "Apache-2.0"
|
|
@@ -16,6 +16,7 @@ authors = [
|
|
|
16
16
|
]
|
|
17
17
|
requires-python = ">=3.10"
|
|
18
18
|
dependencies = [
|
|
19
|
+
"av",
|
|
19
20
|
"cloudpickle==3.1.2",
|
|
20
21
|
"fsspec",
|
|
21
22
|
"httpx",
|
|
@@ -23,9 +24,12 @@ dependencies = [
|
|
|
23
24
|
"huggingface-hub>=1.4.1",
|
|
24
25
|
"opentelemetry-exporter-otlp-proto-http",
|
|
25
26
|
"opentelemetry-sdk",
|
|
27
|
+
"numpy",
|
|
26
28
|
"psutil",
|
|
27
29
|
"orjson",
|
|
28
30
|
"pyarrow",
|
|
31
|
+
"msgspec>=0.20.0",
|
|
32
|
+
"hf>=1.7.1",
|
|
29
33
|
]
|
|
30
34
|
|
|
31
35
|
[project.scripts]
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: macrodata-refiner
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Refiner by Macrodata Labs, a data processing framework for Machine Learning large scale datasets
|
|
5
|
+
Author: Macrodata Labs
|
|
6
|
+
License-Expression: Apache-2.0
|
|
7
|
+
Classifier: Operating System :: OS Independent
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Requires-Python: >=3.10
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
License-File: LICENSE
|
|
15
|
+
Requires-Dist: av
|
|
16
|
+
Requires-Dist: cloudpickle==3.1.2
|
|
17
|
+
Requires-Dist: fsspec
|
|
18
|
+
Requires-Dist: httpx
|
|
19
|
+
Requires-Dist: loguru
|
|
20
|
+
Requires-Dist: huggingface-hub>=1.4.1
|
|
21
|
+
Requires-Dist: opentelemetry-exporter-otlp-proto-http
|
|
22
|
+
Requires-Dist: opentelemetry-sdk
|
|
23
|
+
Requires-Dist: numpy
|
|
24
|
+
Requires-Dist: psutil
|
|
25
|
+
Requires-Dist: orjson
|
|
26
|
+
Requires-Dist: pyarrow
|
|
27
|
+
Requires-Dist: msgspec>=0.20.0
|
|
28
|
+
Requires-Dist: hf>=1.7.1
|
|
29
|
+
Dynamic: license-file
|
|
30
|
+
|
|
31
|
+
<p align="center">
|
|
32
|
+
<img src="https://macrodata.co/logo.svg" alt="Macrodata" width="180">
|
|
33
|
+
</p>
|
|
34
|
+
|
|
35
|
+
<h1 align="center">Macrodata Refiner</h1>
|
|
36
|
+
|
|
37
|
+
Refiner is an open-source engine for turning raw, unstructured, and multimodal data into **high-quality datasets** for large model training.
|
|
38
|
+
|
|
39
|
+
It replaces the brittle scripts and stitched-together data tooling that teams still use for training data work, while offering much better support for multimodal data, robotics workflows, and model-based processing.
|
|
40
|
+
|
|
41
|
+
It also plugs into the Macrodata platform, which gives you visibility into what is happening to your data while pipelines run: job and shard lifecycle, logs, metrics, manifests, and pipeline behavior. The same code can run locally for development and then scale out through Macrodata's elastic serverless cloud.
|
|
42
|
+
|
|
43
|
+
## Quickstart
|
|
44
|
+
|
|
45
|
+
Install:
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
pip install macrodata-refiner
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Create a Macrodata API key:
|
|
52
|
+
|
|
53
|
+
- https://macrodata.co/settings/api-keys
|
|
54
|
+
|
|
55
|
+
Log in:
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
macrodata login
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### Cloud example
|
|
62
|
+
|
|
63
|
+
Launch a robotics pipeline on Macrodata Cloud.
|
|
64
|
+
|
|
65
|
+
This requires a valid API key.
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
import refiner as mdr
|
|
69
|
+
|
|
70
|
+
(
|
|
71
|
+
mdr.read_lerobot("hf://datasets/macrodata/aloha_static_battery_ep005_009")
|
|
72
|
+
.map(
|
|
73
|
+
mdr.robotics.motion_trim(
|
|
74
|
+
threshold=0.001,
|
|
75
|
+
pad_frames=5,
|
|
76
|
+
)
|
|
77
|
+
)
|
|
78
|
+
.write_lerobot("hf://buckets/macrodata/test_bucket/aloha_motion")
|
|
79
|
+
.launch_cloud(
|
|
80
|
+
name="motion_trim",
|
|
81
|
+
num_workers=4,
|
|
82
|
+
)
|
|
83
|
+
)
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### Local example
|
|
87
|
+
|
|
88
|
+
Launch a local pipeline:
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
import refiner as mdr
|
|
92
|
+
|
|
93
|
+
def add_preview(row):
|
|
94
|
+
return row.update(
|
|
95
|
+
preview=" ".join(row["text"].split()[:20]),
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
(
|
|
99
|
+
mdr.read_jsonl("input/*.jsonl")
|
|
100
|
+
.filter(mdr.col("lang") == "en")
|
|
101
|
+
.with_columns(
|
|
102
|
+
text=mdr.col("text").str.strip(),
|
|
103
|
+
text_len=mdr.col("text").str.len(),
|
|
104
|
+
)
|
|
105
|
+
.map(add_preview)
|
|
106
|
+
.write_parquet("s3://my-bucket/english-cleanup/")
|
|
107
|
+
.launch_local(
|
|
108
|
+
name="english-cleanup",
|
|
109
|
+
num_workers=2,
|
|
110
|
+
)
|
|
111
|
+
)
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
`pip install` gives you:
|
|
115
|
+
|
|
116
|
+
- the Python package as `refiner`
|
|
117
|
+
- the CLI as `macrodata`
|
|
118
|
+
|
|
119
|
+
## Batteries included
|
|
120
|
+
|
|
121
|
+
- training-data-first pipeline primitives instead of generic ETL abstractions
|
|
122
|
+
- multimodal processing, with robotics support today
|
|
123
|
+
- a lot of built-in readers, transforms, sinks, and lifecycle/runtime machinery so you do not have to rebuild the same scaffolding in scripts
|
|
124
|
+
- access to any storage backend supported by `fsspec` (S3, GCP, Hugging Face, etc.)
|
|
125
|
+
- local execution for development and elastic cloud execution for large runs
|
|
126
|
+
- built-in observability through the Macrodata platform, so you can inspect how your data is changing instead of debugging blindly after the fact
|
|
127
|
+
|
|
128
|
+
## Docs
|
|
129
|
+
|
|
130
|
+
Getting started:
|
|
131
|
+
|
|
132
|
+
- [Pipeline basics](docs/pipeline-basics.md)
|
|
133
|
+
- [Launchers](docs/launchers.md)
|
|
134
|
+
- [CLI](docs/cli.md)
|
|
135
|
+
|
|
136
|
+
Core concepts:
|
|
137
|
+
|
|
138
|
+
- [Reading and writing data](docs/reading-and-writing.md)
|
|
139
|
+
- [Transforms](docs/transforms.md)
|
|
140
|
+
- [Expressions](docs/expressions.md)
|
|
141
|
+
- [In-process debugging](docs/in-process-debugging.md)
|
|
142
|
+
- [Task pipelines](docs/task-pipelines.md)
|
|
143
|
+
|
|
144
|
+
Modalities and platform:
|
|
145
|
+
|
|
146
|
+
- [Robotics](docs/robotics.md)
|
|
147
|
+
- [Observability](docs/observability.md)
|
|
148
|
+
|
|
149
|
+
## Community
|
|
150
|
+
|
|
151
|
+
- join the Macrodata Discord: https://discord.gg/S8kZtmBR2x
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
src/macrodata_refiner.egg-info/PKG-INFO
|
|
5
|
+
src/macrodata_refiner.egg-info/SOURCES.txt
|
|
6
|
+
src/macrodata_refiner.egg-info/dependency_links.txt
|
|
7
|
+
src/macrodata_refiner.egg-info/entry_points.txt
|
|
8
|
+
src/macrodata_refiner.egg-info/requires.txt
|
|
9
|
+
src/macrodata_refiner.egg-info/top_level.txt
|
|
10
|
+
src/refiner/__init__.py
|
|
11
|
+
src/refiner/py.typed
|
|
12
|
+
src/refiner/cli/__init__.py
|
|
13
|
+
src/refiner/cli/auth.py
|
|
14
|
+
src/refiner/cli/main.py
|
|
15
|
+
src/refiner/cli/ui.py
|
|
16
|
+
src/refiner/execution/__init__.py
|
|
17
|
+
src/refiner/execution/buffer.py
|
|
18
|
+
src/refiner/execution/engine.py
|
|
19
|
+
src/refiner/execution/asyncio/runtime.py
|
|
20
|
+
src/refiner/execution/asyncio/window.py
|
|
21
|
+
src/refiner/execution/operators/__init__.py
|
|
22
|
+
src/refiner/execution/operators/row.py
|
|
23
|
+
src/refiner/execution/operators/vectorized.py
|
|
24
|
+
src/refiner/execution/tracking/__init__.py
|
|
25
|
+
src/refiner/execution/tracking/shards.py
|
|
26
|
+
src/refiner/io/__init__.py
|
|
27
|
+
src/refiner/io/datafile.py
|
|
28
|
+
src/refiner/io/datafolder.py
|
|
29
|
+
src/refiner/io/fileset.py
|
|
30
|
+
src/refiner/launchers/__init__.py
|
|
31
|
+
src/refiner/launchers/base.py
|
|
32
|
+
src/refiner/launchers/cloud.py
|
|
33
|
+
src/refiner/launchers/local.py
|
|
34
|
+
src/refiner/media/__init__.py
|
|
35
|
+
src/refiner/media/video/__init__.py
|
|
36
|
+
src/refiner/media/video/remux.py
|
|
37
|
+
src/refiner/media/video/transcode.py
|
|
38
|
+
src/refiner/media/video/types.py
|
|
39
|
+
src/refiner/media/video/writer.py
|
|
40
|
+
src/refiner/pipeline/__init__.py
|
|
41
|
+
src/refiner/pipeline/expressions.py
|
|
42
|
+
src/refiner/pipeline/pipeline.py
|
|
43
|
+
src/refiner/pipeline/planning.py
|
|
44
|
+
src/refiner/pipeline/steps.py
|
|
45
|
+
src/refiner/pipeline/data/block.py
|
|
46
|
+
src/refiner/pipeline/data/row.py
|
|
47
|
+
src/refiner/pipeline/data/shard.py
|
|
48
|
+
src/refiner/pipeline/data/tabular.py
|
|
49
|
+
src/refiner/pipeline/sinks/__init__.py
|
|
50
|
+
src/refiner/pipeline/sinks/base.py
|
|
51
|
+
src/refiner/pipeline/sinks/jsonl.py
|
|
52
|
+
src/refiner/pipeline/sinks/lerobot.py
|
|
53
|
+
src/refiner/pipeline/sinks/lerobot_reducer.py
|
|
54
|
+
src/refiner/pipeline/sinks/parquet.py
|
|
55
|
+
src/refiner/pipeline/sources/__init__.py
|
|
56
|
+
src/refiner/pipeline/sources/base.py
|
|
57
|
+
src/refiner/pipeline/sources/items.py
|
|
58
|
+
src/refiner/pipeline/sources/task.py
|
|
59
|
+
src/refiner/pipeline/sources/readers/__init__.py
|
|
60
|
+
src/refiner/pipeline/sources/readers/base.py
|
|
61
|
+
src/refiner/pipeline/sources/readers/csv.py
|
|
62
|
+
src/refiner/pipeline/sources/readers/jsonl.py
|
|
63
|
+
src/refiner/pipeline/sources/readers/lerobot.py
|
|
64
|
+
src/refiner/pipeline/sources/readers/parquet.py
|
|
65
|
+
src/refiner/pipeline/sources/readers/utils.py
|
|
66
|
+
src/refiner/pipeline/utils/__init__.py
|
|
67
|
+
src/refiner/pipeline/utils/cache/__init__.py
|
|
68
|
+
src/refiner/pipeline/utils/cache/decoder_cache.py
|
|
69
|
+
src/refiner/pipeline/utils/cache/file_cache.py
|
|
70
|
+
src/refiner/pipeline/utils/cache/lease_cache.py
|
|
71
|
+
src/refiner/platform/__init__.py
|
|
72
|
+
src/refiner/platform/auth.py
|
|
73
|
+
src/refiner/platform/manifest.py
|
|
74
|
+
src/refiner/platform/client/__init__.py
|
|
75
|
+
src/refiner/platform/client/api.py
|
|
76
|
+
src/refiner/platform/client/http.py
|
|
77
|
+
src/refiner/platform/client/models.py
|
|
78
|
+
src/refiner/platform/client/serialize.py
|
|
79
|
+
src/refiner/robotics/__init__.py
|
|
80
|
+
src/refiner/robotics/motion.py
|
|
81
|
+
src/refiner/robotics/lerobot_format/__init__.py
|
|
82
|
+
src/refiner/robotics/lerobot_format/row.py
|
|
83
|
+
src/refiner/robotics/lerobot_format/tabular.py
|
|
84
|
+
src/refiner/robotics/lerobot_format/metadata/__init__.py
|
|
85
|
+
src/refiner/robotics/lerobot_format/metadata/info.py
|
|
86
|
+
src/refiner/robotics/lerobot_format/metadata/metadata.py
|
|
87
|
+
src/refiner/robotics/lerobot_format/metadata/stats.py
|
|
88
|
+
src/refiner/robotics/lerobot_format/metadata/tasks.py
|
|
89
|
+
src/refiner/worker/__init__.py
|
|
90
|
+
src/refiner/worker/context.py
|
|
91
|
+
src/refiner/worker/entrypoint.py
|
|
92
|
+
src/refiner/worker/runner.py
|
|
93
|
+
src/refiner/worker/workdir.py
|
|
94
|
+
src/refiner/worker/lifecycle/__init__.py
|
|
95
|
+
src/refiner/worker/lifecycle/base.py
|
|
96
|
+
src/refiner/worker/lifecycle/platform.py
|
|
97
|
+
src/refiner/worker/lifecycle/local/__init__.py
|
|
98
|
+
src/refiner/worker/lifecycle/local/claim.py
|
|
99
|
+
src/refiner/worker/lifecycle/local/files.py
|
|
100
|
+
src/refiner/worker/lifecycle/local/lifecycle.py
|
|
101
|
+
src/refiner/worker/metrics/__init__.py
|
|
102
|
+
src/refiner/worker/metrics/api.py
|
|
103
|
+
src/refiner/worker/metrics/context.py
|
|
104
|
+
src/refiner/worker/metrics/otel.py
|
|
105
|
+
src/refiner/worker/resources/__init__.py
|
|
106
|
+
src/refiner/worker/resources/cpu.py
|
|
107
|
+
src/refiner/worker/resources/memory.py
|
|
108
|
+
src/refiner/worker/resources/network.py
|
|
109
|
+
tests/test_cache.py
|
|
110
|
+
tests/test_expressions.py
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import refiner.robotics as robotics
|
|
2
|
+
from refiner.io import DataFile, DataFileSet, DataFolder
|
|
3
|
+
from refiner.launchers import LaunchStats, LocalLauncher
|
|
4
|
+
from refiner.media import VideoFile
|
|
5
|
+
from refiner.pipeline import (
|
|
6
|
+
RefinerPipeline,
|
|
7
|
+
Row,
|
|
8
|
+
Shard,
|
|
9
|
+
from_items,
|
|
10
|
+
from_source,
|
|
11
|
+
read_csv,
|
|
12
|
+
read_jsonl,
|
|
13
|
+
read_lerobot,
|
|
14
|
+
read_parquet,
|
|
15
|
+
task,
|
|
16
|
+
)
|
|
17
|
+
from refiner.pipeline.expressions import coalesce, col, if_else, lit
|
|
18
|
+
from refiner.worker.metrics.api import (
|
|
19
|
+
log_gauge,
|
|
20
|
+
log_gauges,
|
|
21
|
+
log_histogram,
|
|
22
|
+
log_throughput,
|
|
23
|
+
register_gauge,
|
|
24
|
+
)
|
|
25
|
+
from refiner.worker.runner import Worker, WorkerRunStats
|
|
26
|
+
|
|
27
|
+
__all__ = [
|
|
28
|
+
"RefinerPipeline",
|
|
29
|
+
"LocalLauncher",
|
|
30
|
+
"LaunchStats",
|
|
31
|
+
"DataFile",
|
|
32
|
+
"DataFolder",
|
|
33
|
+
"DataFileSet",
|
|
34
|
+
"Shard",
|
|
35
|
+
"Row",
|
|
36
|
+
"Worker",
|
|
37
|
+
"WorkerRunStats",
|
|
38
|
+
"read_csv",
|
|
39
|
+
"read_jsonl",
|
|
40
|
+
"read_lerobot",
|
|
41
|
+
"read_parquet",
|
|
42
|
+
"from_items",
|
|
43
|
+
"from_source",
|
|
44
|
+
"task",
|
|
45
|
+
"log_throughput",
|
|
46
|
+
"log_gauge",
|
|
47
|
+
"log_gauges",
|
|
48
|
+
"register_gauge",
|
|
49
|
+
"log_histogram",
|
|
50
|
+
"col",
|
|
51
|
+
"lit",
|
|
52
|
+
"coalesce",
|
|
53
|
+
"if_else",
|
|
54
|
+
"VideoFile",
|
|
55
|
+
"Video",
|
|
56
|
+
"robotics",
|
|
57
|
+
]
|