macrodata-refiner 0.2.2__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/PKG-INFO +61 -32
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/README.md +22 -19
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/pyproject.toml +45 -12
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/macrodata_refiner.egg-info/PKG-INFO +61 -32
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/macrodata_refiner.egg-info/SOURCES.txt +79 -16
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/macrodata_refiner.egg-info/entry_points.txt +1 -0
- macrodata_refiner-0.3.1/src/macrodata_refiner.egg-info/requires.txt +67 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/__init__.py +36 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/cli/auth.py +13 -13
- macrodata_refiner-0.3.1/src/refiner/cli/commands/__init__.py +1 -0
- macrodata_refiner-0.2.2/src/refiner/cli/main.py → macrodata_refiner-0.3.1/src/refiner/cli/commands/auth.py +3 -21
- macrodata_refiner-0.3.1/src/refiner/cli/commands/jobs.py +194 -0
- macrodata_refiner-0.3.1/src/refiner/cli/commands/run.py +42 -0
- macrodata_refiner-0.3.1/src/refiner/cli/commands/secrets.py +53 -0
- macrodata_refiner-0.3.1/src/refiner/cli/common.py +66 -0
- macrodata_refiner-0.3.1/src/refiner/cli/jobs/__init__.py +1 -0
- macrodata_refiner-0.3.1/src/refiner/cli/jobs/attach.py +49 -0
- macrodata_refiner-0.3.1/src/refiner/cli/jobs/common.py +161 -0
- macrodata_refiner-0.3.1/src/refiner/cli/jobs/control.py +30 -0
- macrodata_refiner-0.3.1/src/refiner/cli/jobs/follow.py +299 -0
- macrodata_refiner-0.3.1/src/refiner/cli/jobs/get.py +237 -0
- macrodata_refiner-0.3.1/src/refiner/cli/jobs/list.py +86 -0
- macrodata_refiner-0.3.1/src/refiner/cli/jobs/logs.py +574 -0
- macrodata_refiner-0.3.1/src/refiner/cli/jobs/manifest.py +158 -0
- macrodata_refiner-0.3.1/src/refiner/cli/jobs/metrics.py +346 -0
- macrodata_refiner-0.3.1/src/refiner/cli/jobs/workers.py +87 -0
- macrodata_refiner-0.3.1/src/refiner/cli/main.py +34 -0
- macrodata_refiner-0.3.1/src/refiner/cli/run/__init__.py +1 -0
- macrodata_refiner-0.3.1/src/refiner/cli/run/cloud.py +575 -0
- macrodata_refiner-0.3.1/src/refiner/cli/run/command.py +92 -0
- macrodata_refiner-0.3.1/src/refiner/cli/run/local.py +343 -0
- macrodata_refiner-0.3.1/src/refiner/cli/run/modes.py +69 -0
- macrodata_refiner-0.3.1/src/refiner/cli/secrets.py +105 -0
- macrodata_refiner-0.3.1/src/refiner/cli/ui/__init__.py +15 -0
- macrodata_refiner-0.3.1/src/refiner/cli/ui/console.py +943 -0
- macrodata_refiner-0.2.2/src/refiner/cli/ui.py → macrodata_refiner-0.3.1/src/refiner/cli/ui/terminal.py +7 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/execution/asyncio/runtime.py +1 -3
- macrodata_refiner-0.3.1/src/refiner/execution/asyncio/window.py +130 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/execution/engine.py +158 -14
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/execution/operators/row.py +36 -10
- macrodata_refiner-0.3.1/src/refiner/execution/operators/vectorized.py +234 -0
- macrodata_refiner-0.3.1/src/refiner/inference/__init__.py +84 -0
- macrodata_refiner-0.3.1/src/refiner/inference/capabilities.py +230 -0
- macrodata_refiner-0.3.1/src/refiner/inference/generate_pooling.py +49 -0
- macrodata_refiner-0.3.1/src/refiner/inference/generate_text.py +258 -0
- macrodata_refiner-0.3.1/src/refiner/inference/internal/__init__.py +1 -0
- macrodata_refiner-0.3.1/src/refiner/inference/internal/media.py +133 -0
- macrodata_refiner-0.3.1/src/refiner/inference/internal/message_conversion.py +45 -0
- macrodata_refiner-0.3.1/src/refiner/inference/internal/response.py +70 -0
- macrodata_refiner-0.3.1/src/refiner/inference/internal/runtime.py +194 -0
- macrodata_refiner-0.3.1/src/refiner/inference/internal/schema.py +71 -0
- macrodata_refiner-0.3.1/src/refiner/inference/internal/transport.py +415 -0
- macrodata_refiner-0.3.1/src/refiner/inference/internal/usage.py +31 -0
- macrodata_refiner-0.3.1/src/refiner/inference/providers/__init__.py +15 -0
- macrodata_refiner-0.3.1/src/refiner/inference/providers/anthropic.py +701 -0
- macrodata_refiner-0.3.1/src/refiner/inference/providers/base.py +138 -0
- macrodata_refiner-0.3.1/src/refiner/inference/providers/google.py +794 -0
- macrodata_refiner-0.3.1/src/refiner/inference/providers/openai.py +1257 -0
- macrodata_refiner-0.3.1/src/refiner/inference/providers/warnings.py +55 -0
- macrodata_refiner-0.3.1/src/refiner/inference/types.py +357 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/io/datafile.py +67 -1
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/io/datafolder.py +10 -6
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/io/fileset.py +86 -17
- macrodata_refiner-0.3.1/src/refiner/job_urls.py +16 -0
- macrodata_refiner-0.3.1/src/refiner/launchers/base.py +104 -0
- macrodata_refiner-0.3.1/src/refiner/launchers/cloud.py +372 -0
- macrodata_refiner-0.3.1/src/refiner/launchers/local.py +516 -0
- macrodata_refiner-0.3.1/src/refiner/launchers/secrets.py +153 -0
- macrodata_refiner-0.3.1/src/refiner/pipeline/__init__.py +55 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/pipeline/data/block.py +9 -3
- macrodata_refiner-0.3.1/src/refiner/pipeline/data/datatype.py +409 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/pipeline/data/shard.py +10 -2
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/pipeline/data/tabular.py +209 -43
- macrodata_refiner-0.3.1/src/refiner/pipeline/pipeline.py +1488 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/pipeline/planning.py +77 -45
- macrodata_refiner-0.3.1/src/refiner/pipeline/resources.py +48 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sinks/__init__.py +4 -1
- macrodata_refiner-0.3.1/src/refiner/pipeline/sinks/assets.py +430 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sinks/base.py +37 -7
- macrodata_refiner-0.3.1/src/refiner/pipeline/sinks/jsonl.py +147 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sinks/lerobot.py +228 -49
- macrodata_refiner-0.3.1/src/refiner/pipeline/sinks/parquet.py +146 -0
- macrodata_refiner-0.3.1/src/refiner/pipeline/sinks/reducer/__init__.py +9 -0
- macrodata_refiner-0.3.1/src/refiner/pipeline/sinks/reducer/file.py +180 -0
- macrodata_refiner-0.2.2/src/refiner/pipeline/sinks/lerobot_reducer.py → macrodata_refiner-0.3.1/src/refiner/pipeline/sinks/reducer/lerobot.py +29 -19
- macrodata_refiner-0.3.1/src/refiner/pipeline/sinks/reducer/zarr.py +281 -0
- macrodata_refiner-0.3.1/src/refiner/pipeline/sinks/zarr.py +602 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sources/__init__.py +16 -2
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sources/base.py +4 -0
- macrodata_refiner-0.3.1/src/refiner/pipeline/sources/readers/__init__.py +29 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sources/readers/base.py +29 -1
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sources/readers/csv.py +23 -5
- macrodata_refiner-0.3.1/src/refiner/pipeline/sources/readers/files.py +166 -0
- macrodata_refiner-0.3.1/src/refiner/pipeline/sources/readers/hdf5.py +280 -0
- macrodata_refiner-0.3.1/src/refiner/pipeline/sources/readers/hf_dataset.py +416 -0
- macrodata_refiner-0.3.1/src/refiner/pipeline/sources/readers/json.py +167 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sources/readers/lerobot.py +70 -20
- macrodata_refiner-0.3.1/src/refiner/pipeline/sources/readers/mcap.py +967 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sources/readers/parquet.py +55 -11
- macrodata_refiner-0.3.1/src/refiner/pipeline/sources/readers/tfds.py +392 -0
- macrodata_refiner-0.3.1/src/refiner/pipeline/sources/readers/tfrecord.py +205 -0
- macrodata_refiner-0.3.1/src/refiner/pipeline/sources/readers/utils.py +237 -0
- macrodata_refiner-0.3.1/src/refiner/pipeline/sources/readers/zarr.py +577 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/pipeline/steps.py +6 -1
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/pipeline/utils/cache/decoder_cache.py +15 -11
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/platform/auth.py +14 -4
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/platform/client/__init__.py +20 -13
- macrodata_refiner-0.3.1/src/refiner/platform/client/api.py +577 -0
- macrodata_refiner-0.3.1/src/refiner/platform/client/models.py +319 -0
- macrodata_refiner-0.3.1/src/refiner/platform/client/serialize.py +39 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/platform/manifest.py +59 -2
- macrodata_refiner-0.3.1/src/refiner/robotics/__init__.py +45 -0
- macrodata_refiner-0.3.1/src/refiner/robotics/hand_tracking.py +151 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/robotics/lerobot_format/__init__.py +0 -2
- macrodata_refiner-0.3.1/src/refiner/robotics/lerobot_format/row.py +446 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/robotics/lerobot_format/tabular.py +37 -8
- macrodata_refiner-0.3.1/src/refiner/robotics/motion.py +181 -0
- macrodata_refiner-0.3.1/src/refiner/robotics/reward.py +288 -0
- macrodata_refiner-0.3.1/src/refiner/robotics/row.py +1042 -0
- macrodata_refiner-0.3.1/src/refiner/robotics/subtask_annotation.py +540 -0
- macrodata_refiner-0.3.1/src/refiner/robotics/synchronization.py +244 -0
- macrodata_refiner-0.3.1/src/refiner/robotics/tabular.py +172 -0
- macrodata_refiner-0.3.1/src/refiner/services/__init__.py +14 -0
- macrodata_refiner-0.3.1/src/refiner/services/base.py +44 -0
- macrodata_refiner-0.3.1/src/refiner/services/discovery.py +102 -0
- macrodata_refiner-0.3.1/src/refiner/services/manager.py +251 -0
- macrodata_refiner-0.3.1/src/refiner/services/vllm.py +78 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/text/commoncrawl.py +9 -2
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/video/__init__.py +27 -1
- macrodata_refiner-0.3.1/src/refiner/video/decode.py +279 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/video/remux.py +68 -15
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/video/transcode.py +57 -44
- macrodata_refiner-0.3.1/src/refiner/video/types.py +520 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/video/writer.py +80 -31
- macrodata_refiner-0.3.1/src/refiner/worker/context.py +177 -0
- macrodata_refiner-0.3.1/src/refiner/worker/entrypoint.py +99 -0
- macrodata_refiner-0.3.1/src/refiner/worker/lifecycle.py +142 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/worker/metrics/api.py +4 -2
- macrodata_refiner-0.3.1/src/refiner/worker/metrics/emitter.py +112 -0
- macrodata_refiner-0.3.1/src/refiner/worker/resources/cpu.py +24 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/worker/resources/gpu.py +7 -7
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/worker/runner.py +115 -163
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/worker/workdir.py +2 -2
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/tests/test_cache.py +2 -3
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/tests/test_commoncrawl_text.py +56 -5
- macrodata_refiner-0.3.1/tests/test_video_decode.py +255 -0
- macrodata_refiner-0.2.2/src/macrodata_refiner.egg-info/requires.txt +0 -35
- macrodata_refiner-0.2.2/src/refiner/execution/asyncio/window.py +0 -91
- macrodata_refiner-0.2.2/src/refiner/execution/operators/vectorized.py +0 -143
- macrodata_refiner-0.2.2/src/refiner/launchers/base.py +0 -215
- macrodata_refiner-0.2.2/src/refiner/launchers/cloud.py +0 -210
- macrodata_refiner-0.2.2/src/refiner/launchers/local.py +0 -336
- macrodata_refiner-0.2.2/src/refiner/pipeline/__init__.py +0 -25
- macrodata_refiner-0.2.2/src/refiner/pipeline/pipeline.py +0 -603
- macrodata_refiner-0.2.2/src/refiner/pipeline/sinks/jsonl.py +0 -81
- macrodata_refiner-0.2.2/src/refiner/pipeline/sinks/parquet.py +0 -78
- macrodata_refiner-0.2.2/src/refiner/pipeline/sources/readers/__init__.py +0 -15
- macrodata_refiner-0.2.2/src/refiner/pipeline/sources/readers/jsonl.py +0 -97
- macrodata_refiner-0.2.2/src/refiner/pipeline/sources/readers/utils.py +0 -104
- macrodata_refiner-0.2.2/src/refiner/platform/client/api.py +0 -263
- macrodata_refiner-0.2.2/src/refiner/platform/client/http.py +0 -118
- macrodata_refiner-0.2.2/src/refiner/platform/client/models.py +0 -197
- macrodata_refiner-0.2.2/src/refiner/platform/client/serialize.py +0 -34
- macrodata_refiner-0.2.2/src/refiner/robotics/__init__.py +0 -25
- macrodata_refiner-0.2.2/src/refiner/robotics/lerobot_format/row.py +0 -288
- macrodata_refiner-0.2.2/src/refiner/robotics/motion.py +0 -165
- macrodata_refiner-0.2.2/src/refiner/video/types.py +0 -23
- macrodata_refiner-0.2.2/src/refiner/worker/context.py +0 -121
- macrodata_refiner-0.2.2/src/refiner/worker/entrypoint.py +0 -113
- macrodata_refiner-0.2.2/src/refiner/worker/lifecycle/__init__.py +0 -5
- macrodata_refiner-0.2.2/src/refiner/worker/lifecycle/base.py +0 -25
- macrodata_refiner-0.2.2/src/refiner/worker/lifecycle/local/__init__.py +0 -3
- macrodata_refiner-0.2.2/src/refiner/worker/lifecycle/local/claim.py +0 -147
- macrodata_refiner-0.2.2/src/refiner/worker/lifecycle/local/files.py +0 -41
- macrodata_refiner-0.2.2/src/refiner/worker/lifecycle/local/lifecycle.py +0 -308
- macrodata_refiner-0.2.2/src/refiner/worker/lifecycle/platform.py +0 -99
- macrodata_refiner-0.2.2/src/refiner/worker/metrics/context.py +0 -147
- macrodata_refiner-0.2.2/src/refiner/worker/metrics/otel.py +0 -364
- macrodata_refiner-0.2.2/src/refiner/worker/resources/cpu.py +0 -123
- macrodata_refiner-0.2.2/src/refiner/worker/resources/memory.py +0 -63
- macrodata_refiner-0.2.2/src/refiner/worker/resources/network.py +0 -27
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/LICENSE +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/setup.cfg +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/macrodata_refiner.egg-info/dependency_links.txt +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/macrodata_refiner.egg-info/top_level.txt +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/cli/__init__.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/execution/__init__.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/execution/asyncio/__init__.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/execution/buffer.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/execution/operators/__init__.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/execution/tracking/__init__.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/execution/tracking/shards.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/io/__init__.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/launchers/__init__.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/pipeline/data/row.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/pipeline/expressions.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sources/items.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sources/task.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/pipeline/utils/__init__.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/pipeline/utils/cache/__init__.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/pipeline/utils/cache/file_cache.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/pipeline/utils/cache/lease_cache.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/platform/__init__.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/py.typed +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/robotics/lerobot_format/metadata/__init__.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/robotics/lerobot_format/metadata/info.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/robotics/lerobot_format/metadata/metadata.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/robotics/lerobot_format/metadata/stats.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/robotics/lerobot_format/metadata/tasks.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/text/__init__.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/utils/__init__.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/utils/imports.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/worker/__init__.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/worker/metrics/__init__.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/src/refiner/worker/resources/__init__.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/tests/test_expressions.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.1}/tests/test_optional_dependencies.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: macrodata-refiner
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: Refiner by Macrodata Labs, a data processing framework for Machine Learning large scale datasets
|
|
5
5
|
Author: Macrodata Labs
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -12,35 +12,61 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
12
12
|
Requires-Python: >=3.10
|
|
13
13
|
Description-Content-Type: text/markdown
|
|
14
14
|
License-File: LICENSE
|
|
15
|
+
Requires-Dist: aiohttp
|
|
15
16
|
Requires-Dist: cloudpickle==3.1.2
|
|
16
|
-
Requires-Dist: fsspec
|
|
17
|
+
Requires-Dist: fsspec[http]
|
|
17
18
|
Requires-Dist: httpx
|
|
18
19
|
Requires-Dist: loguru
|
|
19
|
-
Requires-Dist: opentelemetry-exporter-otlp-proto-http
|
|
20
|
-
Requires-Dist: opentelemetry-sdk
|
|
21
20
|
Requires-Dist: numpy
|
|
22
|
-
Requires-Dist: psutil
|
|
23
21
|
Requires-Dist: orjson
|
|
22
|
+
Requires-Dist: packaging
|
|
24
23
|
Requires-Dist: pyarrow
|
|
25
24
|
Requires-Dist: msgspec>=0.20.0
|
|
25
|
+
Requires-Dist: pydantic>=2.0.0
|
|
26
26
|
Provides-Extra: video
|
|
27
27
|
Requires-Dist: av; extra == "video"
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
Requires-Dist:
|
|
31
|
-
Requires-Dist:
|
|
28
|
+
Requires-Dist: pillow; extra == "video"
|
|
29
|
+
Provides-Extra: hf
|
|
30
|
+
Requires-Dist: datasets>=3.0.0; extra == "hf"
|
|
31
|
+
Requires-Dist: huggingface-hub>=1.4.1; extra == "hf"
|
|
32
|
+
Requires-Dist: hf>=1.7.1; extra == "hf"
|
|
33
|
+
Provides-Extra: hand-tracking
|
|
34
|
+
Requires-Dist: macrodata-refiner[hf]; extra == "hand-tracking"
|
|
35
|
+
Requires-Dist: macrodata-refiner[video]; extra == "hand-tracking"
|
|
36
|
+
Requires-Dist: ego-vision[models]>=0.1.25; extra == "hand-tracking"
|
|
32
37
|
Provides-Extra: text
|
|
33
38
|
Requires-Dist: warcio; extra == "text"
|
|
39
|
+
Provides-Extra: hdf5
|
|
40
|
+
Requires-Dist: h5py; extra == "hdf5"
|
|
41
|
+
Provides-Extra: zarr
|
|
42
|
+
Requires-Dist: zarr<3,>=2.18; extra == "zarr"
|
|
43
|
+
Requires-Dist: numcodecs<0.16; extra == "zarr"
|
|
44
|
+
Provides-Extra: mcap
|
|
45
|
+
Requires-Dist: av; extra == "mcap"
|
|
46
|
+
Requires-Dist: mcap; extra == "mcap"
|
|
47
|
+
Requires-Dist: mcap-protobuf-support; extra == "mcap"
|
|
48
|
+
Requires-Dist: mcap-ros2-support; extra == "mcap"
|
|
49
|
+
Requires-Dist: pillow; extra == "mcap"
|
|
34
50
|
Provides-Extra: s3
|
|
35
51
|
Requires-Dist: s3fs; extra == "s3"
|
|
52
|
+
Provides-Extra: tensorflow
|
|
53
|
+
Requires-Dist: tensorflow; extra == "tensorflow"
|
|
54
|
+
Provides-Extra: tfds
|
|
55
|
+
Requires-Dist: macrodata-refiner[tensorflow]; extra == "tfds"
|
|
56
|
+
Requires-Dist: tensorflow-datasets; extra == "tfds"
|
|
36
57
|
Provides-Extra: testing
|
|
37
|
-
Requires-Dist: macrodata-refiner[
|
|
38
|
-
Requires-Dist: macrodata-refiner[text]; extra == "testing"
|
|
39
|
-
Requires-Dist: macrodata-refiner[s3]; extra == "testing"
|
|
58
|
+
Requires-Dist: macrodata-refiner[all]; extra == "testing"
|
|
40
59
|
Requires-Dist: pytest>=8.0.0; extra == "testing"
|
|
41
60
|
Requires-Dist: pytest-cov>=5.0.0; extra == "testing"
|
|
42
61
|
Provides-Extra: all
|
|
43
|
-
Requires-Dist: macrodata-refiner[
|
|
62
|
+
Requires-Dist: macrodata-refiner[hdf5]; extra == "all"
|
|
63
|
+
Requires-Dist: macrodata-refiner[hf]; extra == "all"
|
|
64
|
+
Requires-Dist: macrodata-refiner[mcap]; extra == "all"
|
|
65
|
+
Requires-Dist: macrodata-refiner[video]; extra == "all"
|
|
66
|
+
Requires-Dist: macrodata-refiner[zarr]; extra == "all"
|
|
67
|
+
Requires-Dist: macrodata-refiner[text]; extra == "all"
|
|
68
|
+
Requires-Dist: macrodata-refiner[s3]; extra == "all"
|
|
69
|
+
Requires-Dist: macrodata-refiner[tfds]; extra == "all"
|
|
44
70
|
Dynamic: license-file
|
|
45
71
|
|
|
46
72
|
<p align="center">
|
|
@@ -49,9 +75,10 @@ Dynamic: license-file
|
|
|
49
75
|
|
|
50
76
|
<h1 align="center">Macrodata Refiner</h1>
|
|
51
77
|
|
|
52
|
-
Refiner is an open-source engine for turning raw
|
|
78
|
+
Refiner is an open-source engine for turning raw robotics and multimodal data into **high-quality datasets** for model training.
|
|
53
79
|
|
|
54
|
-
It
|
|
80
|
+
It gives training-data teams one pipeline model for multimodal data, robotics
|
|
81
|
+
workflows, and model-based processing.
|
|
55
82
|
|
|
56
83
|
It also plugs into the Macrodata platform, which gives you visibility into what is happening to your data while pipelines run: job and shard lifecycle, logs, metrics, manifests, and pipeline behavior. The same code can run locally for development and then scale out through Macrodata's elastic serverless cloud.
|
|
57
84
|
|
|
@@ -90,7 +117,7 @@ import refiner as mdr
|
|
|
90
117
|
pad_frames=5,
|
|
91
118
|
)
|
|
92
119
|
)
|
|
93
|
-
.write_lerobot("hf://buckets/
|
|
120
|
+
.write_lerobot("hf://buckets/acme-robotics/aloha_motion")
|
|
94
121
|
.launch_cloud(
|
|
95
122
|
name="motion_trim",
|
|
96
123
|
num_workers=4,
|
|
@@ -98,7 +125,7 @@ import refiner as mdr
|
|
|
98
125
|
)
|
|
99
126
|
```
|
|
100
127
|
|
|
101
|
-
Need cloud GPUs? See [
|
|
128
|
+
Need cloud GPUs? See [Resources, GPUs, and Services](docs/running-pipelines/resources-gpus-and-services.md).
|
|
102
129
|
|
|
103
130
|
### Local example
|
|
104
131
|
|
|
@@ -137,31 +164,33 @@ def add_preview(row):
|
|
|
137
164
|
|
|
138
165
|
- training-data-first pipeline primitives instead of generic ETL abstractions
|
|
139
166
|
- multimodal processing, with robotics support today
|
|
140
|
-
-
|
|
167
|
+
- built-in readers, transforms, sinks, and runtime machinery for common dataset work
|
|
141
168
|
- access to any storage backend supported by `fsspec` (S3, GCP, Hugging Face, etc.)
|
|
142
169
|
- local execution for development and elastic cloud execution for large runs
|
|
143
|
-
- built-in observability through the Macrodata platform
|
|
170
|
+
- built-in observability through the Macrodata platform for job state, logs, metrics, and manifests
|
|
144
171
|
|
|
145
172
|
## Docs
|
|
146
173
|
|
|
147
|
-
|
|
174
|
+
Start here:
|
|
148
175
|
|
|
149
|
-
- [
|
|
150
|
-
- [
|
|
151
|
-
- [
|
|
176
|
+
- [Docs index](docs/index.md)
|
|
177
|
+
- [Quickstart](docs/quickstart.md)
|
|
178
|
+
- [Running pipelines](docs/running-pipelines/index.md)
|
|
152
179
|
|
|
153
|
-
|
|
180
|
+
Build a dataset:
|
|
154
181
|
|
|
155
|
-
- [Reading
|
|
156
|
-
- [
|
|
157
|
-
- [
|
|
158
|
-
- [
|
|
159
|
-
- [
|
|
182
|
+
- [Reading data](docs/reading-data/index.md)
|
|
183
|
+
- [Episode data](docs/episode-data/index.md)
|
|
184
|
+
- [Transforms](docs/transforms/index.md)
|
|
185
|
+
- [Episode operations](docs/episode-operations/index.md)
|
|
186
|
+
- [Writing data](docs/writing-data/index.md)
|
|
187
|
+
- [Examples](docs/examples/index.md)
|
|
160
188
|
|
|
161
|
-
|
|
189
|
+
Operate jobs:
|
|
162
190
|
|
|
163
|
-
- [
|
|
164
|
-
- [
|
|
191
|
+
- [Platform](docs/platform/index.md)
|
|
192
|
+
- [CLI](docs/cli/index.md)
|
|
193
|
+
- [Reference](docs/reference/index.md)
|
|
165
194
|
|
|
166
195
|
## Community
|
|
167
196
|
|
|
@@ -4,9 +4,10 @@
|
|
|
4
4
|
|
|
5
5
|
<h1 align="center">Macrodata Refiner</h1>
|
|
6
6
|
|
|
7
|
-
Refiner is an open-source engine for turning raw
|
|
7
|
+
Refiner is an open-source engine for turning raw robotics and multimodal data into **high-quality datasets** for model training.
|
|
8
8
|
|
|
9
|
-
It
|
|
9
|
+
It gives training-data teams one pipeline model for multimodal data, robotics
|
|
10
|
+
workflows, and model-based processing.
|
|
10
11
|
|
|
11
12
|
It also plugs into the Macrodata platform, which gives you visibility into what is happening to your data while pipelines run: job and shard lifecycle, logs, metrics, manifests, and pipeline behavior. The same code can run locally for development and then scale out through Macrodata's elastic serverless cloud.
|
|
12
13
|
|
|
@@ -45,7 +46,7 @@ import refiner as mdr
|
|
|
45
46
|
pad_frames=5,
|
|
46
47
|
)
|
|
47
48
|
)
|
|
48
|
-
.write_lerobot("hf://buckets/
|
|
49
|
+
.write_lerobot("hf://buckets/acme-robotics/aloha_motion")
|
|
49
50
|
.launch_cloud(
|
|
50
51
|
name="motion_trim",
|
|
51
52
|
num_workers=4,
|
|
@@ -53,7 +54,7 @@ import refiner as mdr
|
|
|
53
54
|
)
|
|
54
55
|
```
|
|
55
56
|
|
|
56
|
-
Need cloud GPUs? See [
|
|
57
|
+
Need cloud GPUs? See [Resources, GPUs, and Services](docs/running-pipelines/resources-gpus-and-services.md).
|
|
57
58
|
|
|
58
59
|
### Local example
|
|
59
60
|
|
|
@@ -92,31 +93,33 @@ def add_preview(row):
|
|
|
92
93
|
|
|
93
94
|
- training-data-first pipeline primitives instead of generic ETL abstractions
|
|
94
95
|
- multimodal processing, with robotics support today
|
|
95
|
-
-
|
|
96
|
+
- built-in readers, transforms, sinks, and runtime machinery for common dataset work
|
|
96
97
|
- access to any storage backend supported by `fsspec` (S3, GCP, Hugging Face, etc.)
|
|
97
98
|
- local execution for development and elastic cloud execution for large runs
|
|
98
|
-
- built-in observability through the Macrodata platform
|
|
99
|
+
- built-in observability through the Macrodata platform for job state, logs, metrics, and manifests
|
|
99
100
|
|
|
100
101
|
## Docs
|
|
101
102
|
|
|
102
|
-
|
|
103
|
+
Start here:
|
|
103
104
|
|
|
104
|
-
- [
|
|
105
|
-
- [
|
|
106
|
-
- [
|
|
105
|
+
- [Docs index](docs/index.md)
|
|
106
|
+
- [Quickstart](docs/quickstart.md)
|
|
107
|
+
- [Running pipelines](docs/running-pipelines/index.md)
|
|
107
108
|
|
|
108
|
-
|
|
109
|
+
Build a dataset:
|
|
109
110
|
|
|
110
|
-
- [Reading
|
|
111
|
-
- [
|
|
112
|
-
- [
|
|
113
|
-
- [
|
|
114
|
-
- [
|
|
111
|
+
- [Reading data](docs/reading-data/index.md)
|
|
112
|
+
- [Episode data](docs/episode-data/index.md)
|
|
113
|
+
- [Transforms](docs/transforms/index.md)
|
|
114
|
+
- [Episode operations](docs/episode-operations/index.md)
|
|
115
|
+
- [Writing data](docs/writing-data/index.md)
|
|
116
|
+
- [Examples](docs/examples/index.md)
|
|
115
117
|
|
|
116
|
-
|
|
118
|
+
Operate jobs:
|
|
117
119
|
|
|
118
|
-
- [
|
|
119
|
-
- [
|
|
120
|
+
- [Platform](docs/platform/index.md)
|
|
121
|
+
- [CLI](docs/cli/index.md)
|
|
122
|
+
- [Reference](docs/reference/index.md)
|
|
120
123
|
|
|
121
124
|
## Community
|
|
122
125
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "macrodata-refiner"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.3.1"
|
|
4
4
|
description = "Refiner by Macrodata Labs, a data processing framework for Machine Learning large scale datasets"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
license = "Apache-2.0"
|
|
@@ -16,47 +16,80 @@ authors = [
|
|
|
16
16
|
]
|
|
17
17
|
requires-python = ">=3.10"
|
|
18
18
|
dependencies = [
|
|
19
|
+
"aiohttp",
|
|
19
20
|
"cloudpickle==3.1.2",
|
|
20
|
-
"fsspec",
|
|
21
|
+
"fsspec[http]",
|
|
21
22
|
"httpx",
|
|
22
23
|
"loguru",
|
|
23
|
-
"opentelemetry-exporter-otlp-proto-http",
|
|
24
|
-
"opentelemetry-sdk",
|
|
25
24
|
"numpy",
|
|
26
|
-
"psutil",
|
|
27
25
|
"orjson",
|
|
26
|
+
"packaging",
|
|
28
27
|
"pyarrow",
|
|
29
28
|
"msgspec>=0.20.0",
|
|
29
|
+
"pydantic>=2.0.0",
|
|
30
30
|
]
|
|
31
31
|
|
|
32
32
|
[project.optional-dependencies]
|
|
33
33
|
video = [
|
|
34
34
|
"av",
|
|
35
|
+
"pillow",
|
|
35
36
|
]
|
|
36
|
-
|
|
37
|
-
"
|
|
37
|
+
hf = [
|
|
38
|
+
"datasets>=3.0.0",
|
|
38
39
|
"huggingface-hub>=1.4.1",
|
|
39
40
|
"hf>=1.7.1",
|
|
40
41
|
]
|
|
42
|
+
hand_tracking = [
|
|
43
|
+
"macrodata-refiner[hf]",
|
|
44
|
+
"macrodata-refiner[video]",
|
|
45
|
+
"ego-vision[models]>=0.1.25",
|
|
46
|
+
]
|
|
41
47
|
text = [
|
|
42
48
|
"warcio",
|
|
43
49
|
]
|
|
50
|
+
hdf5 = [
|
|
51
|
+
"h5py",
|
|
52
|
+
]
|
|
53
|
+
zarr = [
|
|
54
|
+
"zarr>=2.18,<3",
|
|
55
|
+
"numcodecs<0.16",
|
|
56
|
+
]
|
|
57
|
+
mcap = [
|
|
58
|
+
"av",
|
|
59
|
+
"mcap",
|
|
60
|
+
"mcap-protobuf-support",
|
|
61
|
+
"mcap-ros2-support",
|
|
62
|
+
"pillow",
|
|
63
|
+
]
|
|
44
64
|
s3 = [
|
|
45
65
|
"s3fs",
|
|
46
66
|
]
|
|
67
|
+
tensorflow = [
|
|
68
|
+
"tensorflow",
|
|
69
|
+
]
|
|
70
|
+
tfds = [
|
|
71
|
+
"macrodata-refiner[tensorflow]",
|
|
72
|
+
"tensorflow-datasets",
|
|
73
|
+
]
|
|
47
74
|
testing = [
|
|
48
|
-
"macrodata-refiner[
|
|
49
|
-
"macrodata-refiner[text]",
|
|
50
|
-
"macrodata-refiner[s3]",
|
|
75
|
+
"macrodata-refiner[all]",
|
|
51
76
|
"pytest>=8.0.0",
|
|
52
77
|
"pytest-cov>=5.0.0",
|
|
53
78
|
]
|
|
54
79
|
all = [
|
|
55
|
-
"macrodata-refiner[
|
|
80
|
+
"macrodata-refiner[hdf5]",
|
|
81
|
+
"macrodata-refiner[hf]",
|
|
82
|
+
"macrodata-refiner[mcap]",
|
|
83
|
+
"macrodata-refiner[video]",
|
|
84
|
+
"macrodata-refiner[zarr]",
|
|
85
|
+
"macrodata-refiner[text]",
|
|
86
|
+
"macrodata-refiner[s3]",
|
|
87
|
+
"macrodata-refiner[tfds]",
|
|
56
88
|
]
|
|
57
89
|
|
|
58
90
|
[project.scripts]
|
|
59
91
|
macrodata = "refiner.cli.main:main"
|
|
92
|
+
mdr = "refiner.cli.main:main"
|
|
60
93
|
|
|
61
94
|
[build-system]
|
|
62
95
|
requires = ["setuptools>=77", "wheel"]
|
|
@@ -70,7 +103,7 @@ refiner = ["py.typed"]
|
|
|
70
103
|
|
|
71
104
|
[dependency-groups]
|
|
72
105
|
dev = [
|
|
73
|
-
"macrodata-refiner[
|
|
106
|
+
"macrodata-refiner[testing]",
|
|
74
107
|
"pre-commit>=4.0.0",
|
|
75
108
|
"ruff>=0.14.10",
|
|
76
109
|
"ty>=0.0.7",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: macrodata-refiner
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: Refiner by Macrodata Labs, a data processing framework for Machine Learning large scale datasets
|
|
5
5
|
Author: Macrodata Labs
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -12,35 +12,61 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
12
12
|
Requires-Python: >=3.10
|
|
13
13
|
Description-Content-Type: text/markdown
|
|
14
14
|
License-File: LICENSE
|
|
15
|
+
Requires-Dist: aiohttp
|
|
15
16
|
Requires-Dist: cloudpickle==3.1.2
|
|
16
|
-
Requires-Dist: fsspec
|
|
17
|
+
Requires-Dist: fsspec[http]
|
|
17
18
|
Requires-Dist: httpx
|
|
18
19
|
Requires-Dist: loguru
|
|
19
|
-
Requires-Dist: opentelemetry-exporter-otlp-proto-http
|
|
20
|
-
Requires-Dist: opentelemetry-sdk
|
|
21
20
|
Requires-Dist: numpy
|
|
22
|
-
Requires-Dist: psutil
|
|
23
21
|
Requires-Dist: orjson
|
|
22
|
+
Requires-Dist: packaging
|
|
24
23
|
Requires-Dist: pyarrow
|
|
25
24
|
Requires-Dist: msgspec>=0.20.0
|
|
25
|
+
Requires-Dist: pydantic>=2.0.0
|
|
26
26
|
Provides-Extra: video
|
|
27
27
|
Requires-Dist: av; extra == "video"
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
Requires-Dist:
|
|
31
|
-
Requires-Dist:
|
|
28
|
+
Requires-Dist: pillow; extra == "video"
|
|
29
|
+
Provides-Extra: hf
|
|
30
|
+
Requires-Dist: datasets>=3.0.0; extra == "hf"
|
|
31
|
+
Requires-Dist: huggingface-hub>=1.4.1; extra == "hf"
|
|
32
|
+
Requires-Dist: hf>=1.7.1; extra == "hf"
|
|
33
|
+
Provides-Extra: hand-tracking
|
|
34
|
+
Requires-Dist: macrodata-refiner[hf]; extra == "hand-tracking"
|
|
35
|
+
Requires-Dist: macrodata-refiner[video]; extra == "hand-tracking"
|
|
36
|
+
Requires-Dist: ego-vision[models]>=0.1.25; extra == "hand-tracking"
|
|
32
37
|
Provides-Extra: text
|
|
33
38
|
Requires-Dist: warcio; extra == "text"
|
|
39
|
+
Provides-Extra: hdf5
|
|
40
|
+
Requires-Dist: h5py; extra == "hdf5"
|
|
41
|
+
Provides-Extra: zarr
|
|
42
|
+
Requires-Dist: zarr<3,>=2.18; extra == "zarr"
|
|
43
|
+
Requires-Dist: numcodecs<0.16; extra == "zarr"
|
|
44
|
+
Provides-Extra: mcap
|
|
45
|
+
Requires-Dist: av; extra == "mcap"
|
|
46
|
+
Requires-Dist: mcap; extra == "mcap"
|
|
47
|
+
Requires-Dist: mcap-protobuf-support; extra == "mcap"
|
|
48
|
+
Requires-Dist: mcap-ros2-support; extra == "mcap"
|
|
49
|
+
Requires-Dist: pillow; extra == "mcap"
|
|
34
50
|
Provides-Extra: s3
|
|
35
51
|
Requires-Dist: s3fs; extra == "s3"
|
|
52
|
+
Provides-Extra: tensorflow
|
|
53
|
+
Requires-Dist: tensorflow; extra == "tensorflow"
|
|
54
|
+
Provides-Extra: tfds
|
|
55
|
+
Requires-Dist: macrodata-refiner[tensorflow]; extra == "tfds"
|
|
56
|
+
Requires-Dist: tensorflow-datasets; extra == "tfds"
|
|
36
57
|
Provides-Extra: testing
|
|
37
|
-
Requires-Dist: macrodata-refiner[
|
|
38
|
-
Requires-Dist: macrodata-refiner[text]; extra == "testing"
|
|
39
|
-
Requires-Dist: macrodata-refiner[s3]; extra == "testing"
|
|
58
|
+
Requires-Dist: macrodata-refiner[all]; extra == "testing"
|
|
40
59
|
Requires-Dist: pytest>=8.0.0; extra == "testing"
|
|
41
60
|
Requires-Dist: pytest-cov>=5.0.0; extra == "testing"
|
|
42
61
|
Provides-Extra: all
|
|
43
|
-
Requires-Dist: macrodata-refiner[
|
|
62
|
+
Requires-Dist: macrodata-refiner[hdf5]; extra == "all"
|
|
63
|
+
Requires-Dist: macrodata-refiner[hf]; extra == "all"
|
|
64
|
+
Requires-Dist: macrodata-refiner[mcap]; extra == "all"
|
|
65
|
+
Requires-Dist: macrodata-refiner[video]; extra == "all"
|
|
66
|
+
Requires-Dist: macrodata-refiner[zarr]; extra == "all"
|
|
67
|
+
Requires-Dist: macrodata-refiner[text]; extra == "all"
|
|
68
|
+
Requires-Dist: macrodata-refiner[s3]; extra == "all"
|
|
69
|
+
Requires-Dist: macrodata-refiner[tfds]; extra == "all"
|
|
44
70
|
Dynamic: license-file
|
|
45
71
|
|
|
46
72
|
<p align="center">
|
|
@@ -49,9 +75,10 @@ Dynamic: license-file
|
|
|
49
75
|
|
|
50
76
|
<h1 align="center">Macrodata Refiner</h1>
|
|
51
77
|
|
|
52
|
-
Refiner is an open-source engine for turning raw
|
|
78
|
+
Refiner is an open-source engine for turning raw robotics and multimodal data into **high-quality datasets** for model training.
|
|
53
79
|
|
|
54
|
-
It
|
|
80
|
+
It gives training-data teams one pipeline model for multimodal data, robotics
|
|
81
|
+
workflows, and model-based processing.
|
|
55
82
|
|
|
56
83
|
It also plugs into the Macrodata platform, which gives you visibility into what is happening to your data while pipelines run: job and shard lifecycle, logs, metrics, manifests, and pipeline behavior. The same code can run locally for development and then scale out through Macrodata's elastic serverless cloud.
|
|
57
84
|
|
|
@@ -90,7 +117,7 @@ import refiner as mdr
|
|
|
90
117
|
pad_frames=5,
|
|
91
118
|
)
|
|
92
119
|
)
|
|
93
|
-
.write_lerobot("hf://buckets/
|
|
120
|
+
.write_lerobot("hf://buckets/acme-robotics/aloha_motion")
|
|
94
121
|
.launch_cloud(
|
|
95
122
|
name="motion_trim",
|
|
96
123
|
num_workers=4,
|
|
@@ -98,7 +125,7 @@ import refiner as mdr
|
|
|
98
125
|
)
|
|
99
126
|
```
|
|
100
127
|
|
|
101
|
-
Need cloud GPUs? See [
|
|
128
|
+
Need cloud GPUs? See [Resources, GPUs, and Services](docs/running-pipelines/resources-gpus-and-services.md).
|
|
102
129
|
|
|
103
130
|
### Local example
|
|
104
131
|
|
|
@@ -137,31 +164,33 @@ def add_preview(row):
|
|
|
137
164
|
|
|
138
165
|
- training-data-first pipeline primitives instead of generic ETL abstractions
|
|
139
166
|
- multimodal processing, with robotics support today
|
|
140
|
-
-
|
|
167
|
+
- built-in readers, transforms, sinks, and runtime machinery for common dataset work
|
|
141
168
|
- access to any storage backend supported by `fsspec` (S3, GCP, Hugging Face, etc.)
|
|
142
169
|
- local execution for development and elastic cloud execution for large runs
|
|
143
|
-
- built-in observability through the Macrodata platform
|
|
170
|
+
- built-in observability through the Macrodata platform for job state, logs, metrics, and manifests
|
|
144
171
|
|
|
145
172
|
## Docs
|
|
146
173
|
|
|
147
|
-
|
|
174
|
+
Start here:
|
|
148
175
|
|
|
149
|
-
- [
|
|
150
|
-
- [
|
|
151
|
-
- [
|
|
176
|
+
- [Docs index](docs/index.md)
|
|
177
|
+
- [Quickstart](docs/quickstart.md)
|
|
178
|
+
- [Running pipelines](docs/running-pipelines/index.md)
|
|
152
179
|
|
|
153
|
-
|
|
180
|
+
Build a dataset:
|
|
154
181
|
|
|
155
|
-
- [Reading
|
|
156
|
-
- [
|
|
157
|
-
- [
|
|
158
|
-
- [
|
|
159
|
-
- [
|
|
182
|
+
- [Reading data](docs/reading-data/index.md)
|
|
183
|
+
- [Episode data](docs/episode-data/index.md)
|
|
184
|
+
- [Transforms](docs/transforms/index.md)
|
|
185
|
+
- [Episode operations](docs/episode-operations/index.md)
|
|
186
|
+
- [Writing data](docs/writing-data/index.md)
|
|
187
|
+
- [Examples](docs/examples/index.md)
|
|
160
188
|
|
|
161
|
-
|
|
189
|
+
Operate jobs:
|
|
162
190
|
|
|
163
|
-
- [
|
|
164
|
-
- [
|
|
191
|
+
- [Platform](docs/platform/index.md)
|
|
192
|
+
- [CLI](docs/cli/index.md)
|
|
193
|
+
- [Reference](docs/reference/index.md)
|
|
165
194
|
|
|
166
195
|
## Community
|
|
167
196
|
|