macrodata-refiner 0.2.2__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/PKG-INFO +57 -29
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/README.md +22 -19
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/pyproject.toml +40 -8
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/macrodata_refiner.egg-info/PKG-INFO +57 -29
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/macrodata_refiner.egg-info/SOURCES.txt +79 -16
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/macrodata_refiner.egg-info/entry_points.txt +1 -0
- macrodata_refiner-0.3.0/src/macrodata_refiner.egg-info/requires.txt +66 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/__init__.py +36 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/cli/auth.py +13 -13
- macrodata_refiner-0.3.0/src/refiner/cli/commands/__init__.py +1 -0
- macrodata_refiner-0.2.2/src/refiner/cli/main.py → macrodata_refiner-0.3.0/src/refiner/cli/commands/auth.py +3 -21
- macrodata_refiner-0.3.0/src/refiner/cli/commands/jobs.py +194 -0
- macrodata_refiner-0.3.0/src/refiner/cli/commands/run.py +42 -0
- macrodata_refiner-0.3.0/src/refiner/cli/commands/secrets.py +53 -0
- macrodata_refiner-0.3.0/src/refiner/cli/common.py +66 -0
- macrodata_refiner-0.3.0/src/refiner/cli/jobs/__init__.py +1 -0
- macrodata_refiner-0.3.0/src/refiner/cli/jobs/attach.py +49 -0
- macrodata_refiner-0.3.0/src/refiner/cli/jobs/common.py +161 -0
- macrodata_refiner-0.3.0/src/refiner/cli/jobs/control.py +30 -0
- macrodata_refiner-0.3.0/src/refiner/cli/jobs/follow.py +299 -0
- macrodata_refiner-0.3.0/src/refiner/cli/jobs/get.py +237 -0
- macrodata_refiner-0.3.0/src/refiner/cli/jobs/list.py +86 -0
- macrodata_refiner-0.3.0/src/refiner/cli/jobs/logs.py +574 -0
- macrodata_refiner-0.3.0/src/refiner/cli/jobs/manifest.py +158 -0
- macrodata_refiner-0.3.0/src/refiner/cli/jobs/metrics.py +346 -0
- macrodata_refiner-0.3.0/src/refiner/cli/jobs/workers.py +87 -0
- macrodata_refiner-0.3.0/src/refiner/cli/main.py +34 -0
- macrodata_refiner-0.3.0/src/refiner/cli/run/__init__.py +1 -0
- macrodata_refiner-0.3.0/src/refiner/cli/run/cloud.py +575 -0
- macrodata_refiner-0.3.0/src/refiner/cli/run/command.py +92 -0
- macrodata_refiner-0.3.0/src/refiner/cli/run/local.py +343 -0
- macrodata_refiner-0.3.0/src/refiner/cli/run/modes.py +69 -0
- macrodata_refiner-0.3.0/src/refiner/cli/secrets.py +105 -0
- macrodata_refiner-0.3.0/src/refiner/cli/ui/__init__.py +15 -0
- macrodata_refiner-0.3.0/src/refiner/cli/ui/console.py +943 -0
- macrodata_refiner-0.2.2/src/refiner/cli/ui.py → macrodata_refiner-0.3.0/src/refiner/cli/ui/terminal.py +7 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/execution/asyncio/runtime.py +1 -3
- macrodata_refiner-0.3.0/src/refiner/execution/asyncio/window.py +119 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/execution/engine.py +158 -14
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/execution/operators/row.py +2 -2
- macrodata_refiner-0.3.0/src/refiner/execution/operators/vectorized.py +234 -0
- macrodata_refiner-0.3.0/src/refiner/inference/__init__.py +82 -0
- macrodata_refiner-0.3.0/src/refiner/inference/capabilities.py +246 -0
- macrodata_refiner-0.3.0/src/refiner/inference/generate_pooling.py +49 -0
- macrodata_refiner-0.3.0/src/refiner/inference/generate_text.py +241 -0
- macrodata_refiner-0.3.0/src/refiner/inference/internal/__init__.py +1 -0
- macrodata_refiner-0.3.0/src/refiner/inference/internal/media.py +133 -0
- macrodata_refiner-0.3.0/src/refiner/inference/internal/message_conversion.py +45 -0
- macrodata_refiner-0.3.0/src/refiner/inference/internal/response.py +70 -0
- macrodata_refiner-0.3.0/src/refiner/inference/internal/runtime.py +177 -0
- macrodata_refiner-0.3.0/src/refiner/inference/internal/schema.py +71 -0
- macrodata_refiner-0.3.0/src/refiner/inference/internal/transport.py +380 -0
- macrodata_refiner-0.3.0/src/refiner/inference/internal/usage.py +31 -0
- macrodata_refiner-0.3.0/src/refiner/inference/providers/__init__.py +15 -0
- macrodata_refiner-0.3.0/src/refiner/inference/providers/anthropic.py +694 -0
- macrodata_refiner-0.3.0/src/refiner/inference/providers/base.py +138 -0
- macrodata_refiner-0.3.0/src/refiner/inference/providers/google.py +787 -0
- macrodata_refiner-0.3.0/src/refiner/inference/providers/openai.py +1242 -0
- macrodata_refiner-0.3.0/src/refiner/inference/providers/warnings.py +55 -0
- macrodata_refiner-0.3.0/src/refiner/inference/types.py +342 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/io/datafile.py +67 -1
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/io/datafolder.py +10 -6
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/io/fileset.py +86 -17
- macrodata_refiner-0.3.0/src/refiner/job_urls.py +16 -0
- macrodata_refiner-0.3.0/src/refiner/launchers/base.py +104 -0
- macrodata_refiner-0.3.0/src/refiner/launchers/cloud.py +372 -0
- macrodata_refiner-0.3.0/src/refiner/launchers/local.py +516 -0
- macrodata_refiner-0.3.0/src/refiner/launchers/secrets.py +153 -0
- macrodata_refiner-0.3.0/src/refiner/pipeline/__init__.py +55 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/data/block.py +9 -3
- macrodata_refiner-0.3.0/src/refiner/pipeline/data/datatype.py +409 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/data/shard.py +10 -2
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/data/tabular.py +209 -43
- macrodata_refiner-0.3.0/src/refiner/pipeline/pipeline.py +1274 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/planning.py +77 -45
- macrodata_refiner-0.3.0/src/refiner/pipeline/resources.py +48 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/sinks/__init__.py +4 -1
- macrodata_refiner-0.3.0/src/refiner/pipeline/sinks/assets.py +430 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/sinks/base.py +37 -7
- macrodata_refiner-0.3.0/src/refiner/pipeline/sinks/jsonl.py +147 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/sinks/lerobot.py +165 -45
- macrodata_refiner-0.3.0/src/refiner/pipeline/sinks/parquet.py +146 -0
- macrodata_refiner-0.3.0/src/refiner/pipeline/sinks/reducer/__init__.py +9 -0
- macrodata_refiner-0.3.0/src/refiner/pipeline/sinks/reducer/file.py +180 -0
- macrodata_refiner-0.2.2/src/refiner/pipeline/sinks/lerobot_reducer.py → macrodata_refiner-0.3.0/src/refiner/pipeline/sinks/reducer/lerobot.py +29 -19
- macrodata_refiner-0.3.0/src/refiner/pipeline/sinks/reducer/zarr.py +281 -0
- macrodata_refiner-0.3.0/src/refiner/pipeline/sinks/zarr.py +602 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/sources/__init__.py +16 -2
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/sources/base.py +4 -0
- macrodata_refiner-0.3.0/src/refiner/pipeline/sources/readers/__init__.py +29 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/sources/readers/base.py +29 -1
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/sources/readers/csv.py +23 -5
- macrodata_refiner-0.3.0/src/refiner/pipeline/sources/readers/files.py +166 -0
- macrodata_refiner-0.3.0/src/refiner/pipeline/sources/readers/hdf5.py +280 -0
- macrodata_refiner-0.3.0/src/refiner/pipeline/sources/readers/hf_dataset.py +416 -0
- macrodata_refiner-0.3.0/src/refiner/pipeline/sources/readers/json.py +167 -0
- macrodata_refiner-0.3.0/src/refiner/pipeline/sources/readers/mcap.py +967 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/sources/readers/parquet.py +55 -11
- macrodata_refiner-0.3.0/src/refiner/pipeline/sources/readers/tfds.py +392 -0
- macrodata_refiner-0.3.0/src/refiner/pipeline/sources/readers/tfrecord.py +205 -0
- macrodata_refiner-0.3.0/src/refiner/pipeline/sources/readers/utils.py +237 -0
- macrodata_refiner-0.3.0/src/refiner/pipeline/sources/readers/zarr.py +577 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/steps.py +6 -1
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/utils/cache/decoder_cache.py +15 -11
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/platform/auth.py +14 -4
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/platform/client/__init__.py +20 -13
- macrodata_refiner-0.3.0/src/refiner/platform/client/api.py +577 -0
- macrodata_refiner-0.3.0/src/refiner/platform/client/models.py +319 -0
- macrodata_refiner-0.3.0/src/refiner/platform/client/serialize.py +39 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/platform/manifest.py +59 -2
- macrodata_refiner-0.3.0/src/refiner/robotics/__init__.py +47 -0
- macrodata_refiner-0.3.0/src/refiner/robotics/egocentric.py +99 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/robotics/lerobot_format/__init__.py +0 -2
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/robotics/lerobot_format/row.py +135 -72
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/robotics/lerobot_format/tabular.py +37 -8
- macrodata_refiner-0.3.0/src/refiner/robotics/motion.py +181 -0
- macrodata_refiner-0.3.0/src/refiner/robotics/reward.py +268 -0
- macrodata_refiner-0.3.0/src/refiner/robotics/row.py +867 -0
- macrodata_refiner-0.3.0/src/refiner/robotics/subtask_annotation.py +466 -0
- macrodata_refiner-0.3.0/src/refiner/robotics/synchronization.py +244 -0
- macrodata_refiner-0.3.0/src/refiner/robotics/tabular.py +172 -0
- macrodata_refiner-0.3.0/src/refiner/services/__init__.py +14 -0
- macrodata_refiner-0.3.0/src/refiner/services/base.py +44 -0
- macrodata_refiner-0.3.0/src/refiner/services/discovery.py +102 -0
- macrodata_refiner-0.3.0/src/refiner/services/manager.py +251 -0
- macrodata_refiner-0.3.0/src/refiner/services/vllm.py +78 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/text/commoncrawl.py +9 -2
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/video/__init__.py +27 -1
- macrodata_refiner-0.3.0/src/refiner/video/decode.py +279 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/video/remux.py +68 -15
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/video/transcode.py +57 -44
- macrodata_refiner-0.3.0/src/refiner/video/types.py +520 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/video/writer.py +80 -31
- macrodata_refiner-0.3.0/src/refiner/worker/context.py +177 -0
- macrodata_refiner-0.3.0/src/refiner/worker/entrypoint.py +99 -0
- macrodata_refiner-0.3.0/src/refiner/worker/lifecycle.py +142 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/worker/metrics/api.py +4 -2
- macrodata_refiner-0.3.0/src/refiner/worker/metrics/emitter.py +112 -0
- macrodata_refiner-0.3.0/src/refiner/worker/resources/cpu.py +24 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/worker/resources/gpu.py +7 -7
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/worker/runner.py +115 -163
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/worker/workdir.py +2 -2
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/tests/test_cache.py +2 -3
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/tests/test_commoncrawl_text.py +56 -5
- macrodata_refiner-0.3.0/tests/test_video_decode.py +255 -0
- macrodata_refiner-0.2.2/src/macrodata_refiner.egg-info/requires.txt +0 -35
- macrodata_refiner-0.2.2/src/refiner/execution/asyncio/window.py +0 -91
- macrodata_refiner-0.2.2/src/refiner/execution/operators/vectorized.py +0 -143
- macrodata_refiner-0.2.2/src/refiner/launchers/base.py +0 -215
- macrodata_refiner-0.2.2/src/refiner/launchers/cloud.py +0 -210
- macrodata_refiner-0.2.2/src/refiner/launchers/local.py +0 -336
- macrodata_refiner-0.2.2/src/refiner/pipeline/__init__.py +0 -25
- macrodata_refiner-0.2.2/src/refiner/pipeline/pipeline.py +0 -603
- macrodata_refiner-0.2.2/src/refiner/pipeline/sinks/jsonl.py +0 -81
- macrodata_refiner-0.2.2/src/refiner/pipeline/sinks/parquet.py +0 -78
- macrodata_refiner-0.2.2/src/refiner/pipeline/sources/readers/__init__.py +0 -15
- macrodata_refiner-0.2.2/src/refiner/pipeline/sources/readers/jsonl.py +0 -97
- macrodata_refiner-0.2.2/src/refiner/pipeline/sources/readers/utils.py +0 -104
- macrodata_refiner-0.2.2/src/refiner/platform/client/api.py +0 -263
- macrodata_refiner-0.2.2/src/refiner/platform/client/http.py +0 -118
- macrodata_refiner-0.2.2/src/refiner/platform/client/models.py +0 -197
- macrodata_refiner-0.2.2/src/refiner/platform/client/serialize.py +0 -34
- macrodata_refiner-0.2.2/src/refiner/robotics/__init__.py +0 -25
- macrodata_refiner-0.2.2/src/refiner/robotics/motion.py +0 -165
- macrodata_refiner-0.2.2/src/refiner/video/types.py +0 -23
- macrodata_refiner-0.2.2/src/refiner/worker/context.py +0 -121
- macrodata_refiner-0.2.2/src/refiner/worker/entrypoint.py +0 -113
- macrodata_refiner-0.2.2/src/refiner/worker/lifecycle/__init__.py +0 -5
- macrodata_refiner-0.2.2/src/refiner/worker/lifecycle/base.py +0 -25
- macrodata_refiner-0.2.2/src/refiner/worker/lifecycle/local/__init__.py +0 -3
- macrodata_refiner-0.2.2/src/refiner/worker/lifecycle/local/claim.py +0 -147
- macrodata_refiner-0.2.2/src/refiner/worker/lifecycle/local/files.py +0 -41
- macrodata_refiner-0.2.2/src/refiner/worker/lifecycle/local/lifecycle.py +0 -308
- macrodata_refiner-0.2.2/src/refiner/worker/lifecycle/platform.py +0 -99
- macrodata_refiner-0.2.2/src/refiner/worker/metrics/context.py +0 -147
- macrodata_refiner-0.2.2/src/refiner/worker/metrics/otel.py +0 -364
- macrodata_refiner-0.2.2/src/refiner/worker/resources/cpu.py +0 -123
- macrodata_refiner-0.2.2/src/refiner/worker/resources/memory.py +0 -63
- macrodata_refiner-0.2.2/src/refiner/worker/resources/network.py +0 -27
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/LICENSE +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/setup.cfg +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/macrodata_refiner.egg-info/dependency_links.txt +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/macrodata_refiner.egg-info/top_level.txt +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/cli/__init__.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/execution/__init__.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/execution/asyncio/__init__.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/execution/buffer.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/execution/operators/__init__.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/execution/tracking/__init__.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/execution/tracking/shards.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/io/__init__.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/launchers/__init__.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/data/row.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/expressions.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/sources/items.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/sources/readers/lerobot.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/sources/task.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/utils/__init__.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/utils/cache/__init__.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/utils/cache/file_cache.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/utils/cache/lease_cache.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/platform/__init__.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/py.typed +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/robotics/lerobot_format/metadata/__init__.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/robotics/lerobot_format/metadata/info.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/robotics/lerobot_format/metadata/metadata.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/robotics/lerobot_format/metadata/stats.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/robotics/lerobot_format/metadata/tasks.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/text/__init__.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/utils/__init__.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/utils/imports.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/worker/__init__.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/worker/metrics/__init__.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/worker/resources/__init__.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/tests/test_expressions.py +0 -0
- {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/tests/test_optional_dependencies.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: macrodata-refiner
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Refiner by Macrodata Labs, a data processing framework for Machine Learning large scale datasets
|
|
5
5
|
Author: Macrodata Labs
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -13,30 +13,55 @@ Requires-Python: >=3.10
|
|
|
13
13
|
Description-Content-Type: text/markdown
|
|
14
14
|
License-File: LICENSE
|
|
15
15
|
Requires-Dist: cloudpickle==3.1.2
|
|
16
|
-
Requires-Dist: fsspec
|
|
16
|
+
Requires-Dist: fsspec[http]
|
|
17
17
|
Requires-Dist: httpx
|
|
18
18
|
Requires-Dist: loguru
|
|
19
|
-
Requires-Dist: opentelemetry-exporter-otlp-proto-http
|
|
20
|
-
Requires-Dist: opentelemetry-sdk
|
|
21
19
|
Requires-Dist: numpy
|
|
22
|
-
Requires-Dist: psutil
|
|
23
20
|
Requires-Dist: orjson
|
|
21
|
+
Requires-Dist: packaging
|
|
24
22
|
Requires-Dist: pyarrow
|
|
25
23
|
Requires-Dist: msgspec>=0.20.0
|
|
24
|
+
Requires-Dist: pydantic>=2.0.0
|
|
26
25
|
Provides-Extra: video
|
|
27
26
|
Requires-Dist: av; extra == "video"
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
Requires-Dist:
|
|
31
|
-
Requires-Dist:
|
|
27
|
+
Requires-Dist: pillow; extra == "video"
|
|
28
|
+
Provides-Extra: hf
|
|
29
|
+
Requires-Dist: datasets>=3.0.0; extra == "hf"
|
|
30
|
+
Requires-Dist: huggingface-hub>=1.4.1; extra == "hf"
|
|
31
|
+
Requires-Dist: hf>=1.7.1; extra == "hf"
|
|
32
|
+
Provides-Extra: egocentric
|
|
33
|
+
Requires-Dist: macrodata-refiner[hf]; extra == "egocentric"
|
|
34
|
+
Requires-Dist: macrodata-refiner[video]; extra == "egocentric"
|
|
35
|
+
Requires-Dist: ego-vision[models]>=0.1.8; extra == "egocentric"
|
|
32
36
|
Provides-Extra: text
|
|
33
37
|
Requires-Dist: warcio; extra == "text"
|
|
38
|
+
Provides-Extra: hdf5
|
|
39
|
+
Requires-Dist: h5py; extra == "hdf5"
|
|
40
|
+
Provides-Extra: zarr
|
|
41
|
+
Requires-Dist: zarr<3,>=2.18; extra == "zarr"
|
|
42
|
+
Requires-Dist: numcodecs<0.16; extra == "zarr"
|
|
43
|
+
Provides-Extra: mcap
|
|
44
|
+
Requires-Dist: av; extra == "mcap"
|
|
45
|
+
Requires-Dist: mcap; extra == "mcap"
|
|
46
|
+
Requires-Dist: mcap-protobuf-support; extra == "mcap"
|
|
47
|
+
Requires-Dist: mcap-ros2-support; extra == "mcap"
|
|
48
|
+
Requires-Dist: pillow; extra == "mcap"
|
|
34
49
|
Provides-Extra: s3
|
|
35
50
|
Requires-Dist: s3fs; extra == "s3"
|
|
51
|
+
Provides-Extra: tensorflow
|
|
52
|
+
Requires-Dist: tensorflow; extra == "tensorflow"
|
|
53
|
+
Provides-Extra: tfds
|
|
54
|
+
Requires-Dist: macrodata-refiner[tensorflow]; extra == "tfds"
|
|
55
|
+
Requires-Dist: tensorflow-datasets; extra == "tfds"
|
|
36
56
|
Provides-Extra: testing
|
|
37
|
-
Requires-Dist: macrodata-refiner[
|
|
57
|
+
Requires-Dist: macrodata-refiner[hdf5]; extra == "testing"
|
|
58
|
+
Requires-Dist: macrodata-refiner[hf]; extra == "testing"
|
|
59
|
+
Requires-Dist: macrodata-refiner[mcap]; extra == "testing"
|
|
60
|
+
Requires-Dist: macrodata-refiner[video]; extra == "testing"
|
|
61
|
+
Requires-Dist: macrodata-refiner[zarr]; extra == "testing"
|
|
38
62
|
Requires-Dist: macrodata-refiner[text]; extra == "testing"
|
|
39
63
|
Requires-Dist: macrodata-refiner[s3]; extra == "testing"
|
|
64
|
+
Requires-Dist: macrodata-refiner[tfds]; extra == "testing"
|
|
40
65
|
Requires-Dist: pytest>=8.0.0; extra == "testing"
|
|
41
66
|
Requires-Dist: pytest-cov>=5.0.0; extra == "testing"
|
|
42
67
|
Provides-Extra: all
|
|
@@ -49,9 +74,10 @@ Dynamic: license-file
|
|
|
49
74
|
|
|
50
75
|
<h1 align="center">Macrodata Refiner</h1>
|
|
51
76
|
|
|
52
|
-
Refiner is an open-source engine for turning raw
|
|
77
|
+
Refiner is an open-source engine for turning raw robotics and multimodal data into **high-quality datasets** for model training.
|
|
53
78
|
|
|
54
|
-
It
|
|
79
|
+
It gives training-data teams one pipeline model for multimodal data, robotics
|
|
80
|
+
workflows, and model-based processing.
|
|
55
81
|
|
|
56
82
|
It also plugs into the Macrodata platform, which gives you visibility into what is happening to your data while pipelines run: job and shard lifecycle, logs, metrics, manifests, and pipeline behavior. The same code can run locally for development and then scale out through Macrodata's elastic serverless cloud.
|
|
57
83
|
|
|
@@ -90,7 +116,7 @@ import refiner as mdr
|
|
|
90
116
|
pad_frames=5,
|
|
91
117
|
)
|
|
92
118
|
)
|
|
93
|
-
.write_lerobot("hf://buckets/
|
|
119
|
+
.write_lerobot("hf://buckets/acme-robotics/aloha_motion")
|
|
94
120
|
.launch_cloud(
|
|
95
121
|
name="motion_trim",
|
|
96
122
|
num_workers=4,
|
|
@@ -98,7 +124,7 @@ import refiner as mdr
|
|
|
98
124
|
)
|
|
99
125
|
```
|
|
100
126
|
|
|
101
|
-
Need cloud GPUs? See [
|
|
127
|
+
Need cloud GPUs? See [Resources, GPUs, and Services](docs/running-pipelines/resources-gpus-and-services.md).
|
|
102
128
|
|
|
103
129
|
### Local example
|
|
104
130
|
|
|
@@ -137,31 +163,33 @@ def add_preview(row):
|
|
|
137
163
|
|
|
138
164
|
- training-data-first pipeline primitives instead of generic ETL abstractions
|
|
139
165
|
- multimodal processing, with robotics support today
|
|
140
|
-
-
|
|
166
|
+
- built-in readers, transforms, sinks, and runtime machinery for common dataset work
|
|
141
167
|
- access to any storage backend supported by `fsspec` (S3, GCP, Hugging Face, etc.)
|
|
142
168
|
- local execution for development and elastic cloud execution for large runs
|
|
143
|
-
- built-in observability through the Macrodata platform
|
|
169
|
+
- built-in observability through the Macrodata platform for job state, logs, metrics, and manifests
|
|
144
170
|
|
|
145
171
|
## Docs
|
|
146
172
|
|
|
147
|
-
|
|
173
|
+
Start here:
|
|
148
174
|
|
|
149
|
-
- [
|
|
150
|
-
- [
|
|
151
|
-
- [
|
|
175
|
+
- [Docs index](docs/index.md)
|
|
176
|
+
- [Quickstart](docs/quickstart.md)
|
|
177
|
+
- [Running pipelines](docs/running-pipelines/index.md)
|
|
152
178
|
|
|
153
|
-
|
|
179
|
+
Build a dataset:
|
|
154
180
|
|
|
155
|
-
- [Reading
|
|
156
|
-
- [
|
|
157
|
-
- [
|
|
158
|
-
- [
|
|
159
|
-
- [
|
|
181
|
+
- [Reading data](docs/reading-data/index.md)
|
|
182
|
+
- [Episode data](docs/episode-data/index.md)
|
|
183
|
+
- [Transforms](docs/transforms/index.md)
|
|
184
|
+
- [Episode operations](docs/episode-operations/index.md)
|
|
185
|
+
- [Writing data](docs/writing-data/index.md)
|
|
186
|
+
- [Examples](docs/examples/index.md)
|
|
160
187
|
|
|
161
|
-
|
|
188
|
+
Operate jobs:
|
|
162
189
|
|
|
163
|
-
- [
|
|
164
|
-
- [
|
|
190
|
+
- [Platform](docs/platform/index.md)
|
|
191
|
+
- [CLI](docs/cli/index.md)
|
|
192
|
+
- [Reference](docs/reference/index.md)
|
|
165
193
|
|
|
166
194
|
## Community
|
|
167
195
|
|
|
@@ -4,9 +4,10 @@
|
|
|
4
4
|
|
|
5
5
|
<h1 align="center">Macrodata Refiner</h1>
|
|
6
6
|
|
|
7
|
-
Refiner is an open-source engine for turning raw
|
|
7
|
+
Refiner is an open-source engine for turning raw robotics and multimodal data into **high-quality datasets** for model training.
|
|
8
8
|
|
|
9
|
-
It
|
|
9
|
+
It gives training-data teams one pipeline model for multimodal data, robotics
|
|
10
|
+
workflows, and model-based processing.
|
|
10
11
|
|
|
11
12
|
It also plugs into the Macrodata platform, which gives you visibility into what is happening to your data while pipelines run: job and shard lifecycle, logs, metrics, manifests, and pipeline behavior. The same code can run locally for development and then scale out through Macrodata's elastic serverless cloud.
|
|
12
13
|
|
|
@@ -45,7 +46,7 @@ import refiner as mdr
|
|
|
45
46
|
pad_frames=5,
|
|
46
47
|
)
|
|
47
48
|
)
|
|
48
|
-
.write_lerobot("hf://buckets/
|
|
49
|
+
.write_lerobot("hf://buckets/acme-robotics/aloha_motion")
|
|
49
50
|
.launch_cloud(
|
|
50
51
|
name="motion_trim",
|
|
51
52
|
num_workers=4,
|
|
@@ -53,7 +54,7 @@ import refiner as mdr
|
|
|
53
54
|
)
|
|
54
55
|
```
|
|
55
56
|
|
|
56
|
-
Need cloud GPUs? See [
|
|
57
|
+
Need cloud GPUs? See [Resources, GPUs, and Services](docs/running-pipelines/resources-gpus-and-services.md).
|
|
57
58
|
|
|
58
59
|
### Local example
|
|
59
60
|
|
|
@@ -92,31 +93,33 @@ def add_preview(row):
|
|
|
92
93
|
|
|
93
94
|
- training-data-first pipeline primitives instead of generic ETL abstractions
|
|
94
95
|
- multimodal processing, with robotics support today
|
|
95
|
-
-
|
|
96
|
+
- built-in readers, transforms, sinks, and runtime machinery for common dataset work
|
|
96
97
|
- access to any storage backend supported by `fsspec` (S3, GCP, Hugging Face, etc.)
|
|
97
98
|
- local execution for development and elastic cloud execution for large runs
|
|
98
|
-
- built-in observability through the Macrodata platform
|
|
99
|
+
- built-in observability through the Macrodata platform for job state, logs, metrics, and manifests
|
|
99
100
|
|
|
100
101
|
## Docs
|
|
101
102
|
|
|
102
|
-
|
|
103
|
+
Start here:
|
|
103
104
|
|
|
104
|
-
- [
|
|
105
|
-
- [
|
|
106
|
-
- [
|
|
105
|
+
- [Docs index](docs/index.md)
|
|
106
|
+
- [Quickstart](docs/quickstart.md)
|
|
107
|
+
- [Running pipelines](docs/running-pipelines/index.md)
|
|
107
108
|
|
|
108
|
-
|
|
109
|
+
Build a dataset:
|
|
109
110
|
|
|
110
|
-
- [Reading
|
|
111
|
-
- [
|
|
112
|
-
- [
|
|
113
|
-
- [
|
|
114
|
-
- [
|
|
111
|
+
- [Reading data](docs/reading-data/index.md)
|
|
112
|
+
- [Episode data](docs/episode-data/index.md)
|
|
113
|
+
- [Transforms](docs/transforms/index.md)
|
|
114
|
+
- [Episode operations](docs/episode-operations/index.md)
|
|
115
|
+
- [Writing data](docs/writing-data/index.md)
|
|
116
|
+
- [Examples](docs/examples/index.md)
|
|
115
117
|
|
|
116
|
-
|
|
118
|
+
Operate jobs:
|
|
117
119
|
|
|
118
|
-
- [
|
|
119
|
-
- [
|
|
120
|
+
- [Platform](docs/platform/index.md)
|
|
121
|
+
- [CLI](docs/cli/index.md)
|
|
122
|
+
- [Reference](docs/reference/index.md)
|
|
120
123
|
|
|
121
124
|
## Community
|
|
122
125
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "macrodata-refiner"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.3.0"
|
|
4
4
|
description = "Refiner by Macrodata Labs, a data processing framework for Machine Learning large scale datasets"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
license = "Apache-2.0"
|
|
@@ -17,37 +17,68 @@ authors = [
|
|
|
17
17
|
requires-python = ">=3.10"
|
|
18
18
|
dependencies = [
|
|
19
19
|
"cloudpickle==3.1.2",
|
|
20
|
-
"fsspec",
|
|
20
|
+
"fsspec[http]",
|
|
21
21
|
"httpx",
|
|
22
22
|
"loguru",
|
|
23
|
-
"opentelemetry-exporter-otlp-proto-http",
|
|
24
|
-
"opentelemetry-sdk",
|
|
25
23
|
"numpy",
|
|
26
|
-
"psutil",
|
|
27
24
|
"orjson",
|
|
25
|
+
"packaging",
|
|
28
26
|
"pyarrow",
|
|
29
27
|
"msgspec>=0.20.0",
|
|
28
|
+
"pydantic>=2.0.0",
|
|
30
29
|
]
|
|
31
30
|
|
|
32
31
|
[project.optional-dependencies]
|
|
33
32
|
video = [
|
|
34
33
|
"av",
|
|
34
|
+
"pillow",
|
|
35
35
|
]
|
|
36
|
-
|
|
37
|
-
"
|
|
36
|
+
hf = [
|
|
37
|
+
"datasets>=3.0.0",
|
|
38
38
|
"huggingface-hub>=1.4.1",
|
|
39
39
|
"hf>=1.7.1",
|
|
40
40
|
]
|
|
41
|
+
egocentric = [
|
|
42
|
+
"macrodata-refiner[hf]",
|
|
43
|
+
"macrodata-refiner[video]",
|
|
44
|
+
"ego-vision[models]>=0.1.8",
|
|
45
|
+
]
|
|
41
46
|
text = [
|
|
42
47
|
"warcio",
|
|
43
48
|
]
|
|
49
|
+
hdf5 = [
|
|
50
|
+
"h5py",
|
|
51
|
+
]
|
|
52
|
+
zarr = [
|
|
53
|
+
"zarr>=2.18,<3",
|
|
54
|
+
"numcodecs<0.16",
|
|
55
|
+
]
|
|
56
|
+
mcap = [
|
|
57
|
+
"av",
|
|
58
|
+
"mcap",
|
|
59
|
+
"mcap-protobuf-support",
|
|
60
|
+
"mcap-ros2-support",
|
|
61
|
+
"pillow",
|
|
62
|
+
]
|
|
44
63
|
s3 = [
|
|
45
64
|
"s3fs",
|
|
46
65
|
]
|
|
66
|
+
tensorflow = [
|
|
67
|
+
"tensorflow",
|
|
68
|
+
]
|
|
69
|
+
tfds = [
|
|
70
|
+
"macrodata-refiner[tensorflow]",
|
|
71
|
+
"tensorflow-datasets",
|
|
72
|
+
]
|
|
47
73
|
testing = [
|
|
48
|
-
"macrodata-refiner[
|
|
74
|
+
"macrodata-refiner[hdf5]",
|
|
75
|
+
"macrodata-refiner[hf]",
|
|
76
|
+
"macrodata-refiner[mcap]",
|
|
77
|
+
"macrodata-refiner[video]",
|
|
78
|
+
"macrodata-refiner[zarr]",
|
|
49
79
|
"macrodata-refiner[text]",
|
|
50
80
|
"macrodata-refiner[s3]",
|
|
81
|
+
"macrodata-refiner[tfds]",
|
|
51
82
|
"pytest>=8.0.0",
|
|
52
83
|
"pytest-cov>=5.0.0",
|
|
53
84
|
]
|
|
@@ -57,6 +88,7 @@ all = [
|
|
|
57
88
|
|
|
58
89
|
[project.scripts]
|
|
59
90
|
macrodata = "refiner.cli.main:main"
|
|
91
|
+
mdr = "refiner.cli.main:main"
|
|
60
92
|
|
|
61
93
|
[build-system]
|
|
62
94
|
requires = ["setuptools>=77", "wheel"]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: macrodata-refiner
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Refiner by Macrodata Labs, a data processing framework for Machine Learning large scale datasets
|
|
5
5
|
Author: Macrodata Labs
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -13,30 +13,55 @@ Requires-Python: >=3.10
|
|
|
13
13
|
Description-Content-Type: text/markdown
|
|
14
14
|
License-File: LICENSE
|
|
15
15
|
Requires-Dist: cloudpickle==3.1.2
|
|
16
|
-
Requires-Dist: fsspec
|
|
16
|
+
Requires-Dist: fsspec[http]
|
|
17
17
|
Requires-Dist: httpx
|
|
18
18
|
Requires-Dist: loguru
|
|
19
|
-
Requires-Dist: opentelemetry-exporter-otlp-proto-http
|
|
20
|
-
Requires-Dist: opentelemetry-sdk
|
|
21
19
|
Requires-Dist: numpy
|
|
22
|
-
Requires-Dist: psutil
|
|
23
20
|
Requires-Dist: orjson
|
|
21
|
+
Requires-Dist: packaging
|
|
24
22
|
Requires-Dist: pyarrow
|
|
25
23
|
Requires-Dist: msgspec>=0.20.0
|
|
24
|
+
Requires-Dist: pydantic>=2.0.0
|
|
26
25
|
Provides-Extra: video
|
|
27
26
|
Requires-Dist: av; extra == "video"
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
Requires-Dist:
|
|
31
|
-
Requires-Dist:
|
|
27
|
+
Requires-Dist: pillow; extra == "video"
|
|
28
|
+
Provides-Extra: hf
|
|
29
|
+
Requires-Dist: datasets>=3.0.0; extra == "hf"
|
|
30
|
+
Requires-Dist: huggingface-hub>=1.4.1; extra == "hf"
|
|
31
|
+
Requires-Dist: hf>=1.7.1; extra == "hf"
|
|
32
|
+
Provides-Extra: egocentric
|
|
33
|
+
Requires-Dist: macrodata-refiner[hf]; extra == "egocentric"
|
|
34
|
+
Requires-Dist: macrodata-refiner[video]; extra == "egocentric"
|
|
35
|
+
Requires-Dist: ego-vision[models]>=0.1.8; extra == "egocentric"
|
|
32
36
|
Provides-Extra: text
|
|
33
37
|
Requires-Dist: warcio; extra == "text"
|
|
38
|
+
Provides-Extra: hdf5
|
|
39
|
+
Requires-Dist: h5py; extra == "hdf5"
|
|
40
|
+
Provides-Extra: zarr
|
|
41
|
+
Requires-Dist: zarr<3,>=2.18; extra == "zarr"
|
|
42
|
+
Requires-Dist: numcodecs<0.16; extra == "zarr"
|
|
43
|
+
Provides-Extra: mcap
|
|
44
|
+
Requires-Dist: av; extra == "mcap"
|
|
45
|
+
Requires-Dist: mcap; extra == "mcap"
|
|
46
|
+
Requires-Dist: mcap-protobuf-support; extra == "mcap"
|
|
47
|
+
Requires-Dist: mcap-ros2-support; extra == "mcap"
|
|
48
|
+
Requires-Dist: pillow; extra == "mcap"
|
|
34
49
|
Provides-Extra: s3
|
|
35
50
|
Requires-Dist: s3fs; extra == "s3"
|
|
51
|
+
Provides-Extra: tensorflow
|
|
52
|
+
Requires-Dist: tensorflow; extra == "tensorflow"
|
|
53
|
+
Provides-Extra: tfds
|
|
54
|
+
Requires-Dist: macrodata-refiner[tensorflow]; extra == "tfds"
|
|
55
|
+
Requires-Dist: tensorflow-datasets; extra == "tfds"
|
|
36
56
|
Provides-Extra: testing
|
|
37
|
-
Requires-Dist: macrodata-refiner[
|
|
57
|
+
Requires-Dist: macrodata-refiner[hdf5]; extra == "testing"
|
|
58
|
+
Requires-Dist: macrodata-refiner[hf]; extra == "testing"
|
|
59
|
+
Requires-Dist: macrodata-refiner[mcap]; extra == "testing"
|
|
60
|
+
Requires-Dist: macrodata-refiner[video]; extra == "testing"
|
|
61
|
+
Requires-Dist: macrodata-refiner[zarr]; extra == "testing"
|
|
38
62
|
Requires-Dist: macrodata-refiner[text]; extra == "testing"
|
|
39
63
|
Requires-Dist: macrodata-refiner[s3]; extra == "testing"
|
|
64
|
+
Requires-Dist: macrodata-refiner[tfds]; extra == "testing"
|
|
40
65
|
Requires-Dist: pytest>=8.0.0; extra == "testing"
|
|
41
66
|
Requires-Dist: pytest-cov>=5.0.0; extra == "testing"
|
|
42
67
|
Provides-Extra: all
|
|
@@ -49,9 +74,10 @@ Dynamic: license-file
|
|
|
49
74
|
|
|
50
75
|
<h1 align="center">Macrodata Refiner</h1>
|
|
51
76
|
|
|
52
|
-
Refiner is an open-source engine for turning raw
|
|
77
|
+
Refiner is an open-source engine for turning raw robotics and multimodal data into **high-quality datasets** for model training.
|
|
53
78
|
|
|
54
|
-
It
|
|
79
|
+
It gives training-data teams one pipeline model for multimodal data, robotics
|
|
80
|
+
workflows, and model-based processing.
|
|
55
81
|
|
|
56
82
|
It also plugs into the Macrodata platform, which gives you visibility into what is happening to your data while pipelines run: job and shard lifecycle, logs, metrics, manifests, and pipeline behavior. The same code can run locally for development and then scale out through Macrodata's elastic serverless cloud.
|
|
57
83
|
|
|
@@ -90,7 +116,7 @@ import refiner as mdr
|
|
|
90
116
|
pad_frames=5,
|
|
91
117
|
)
|
|
92
118
|
)
|
|
93
|
-
.write_lerobot("hf://buckets/
|
|
119
|
+
.write_lerobot("hf://buckets/acme-robotics/aloha_motion")
|
|
94
120
|
.launch_cloud(
|
|
95
121
|
name="motion_trim",
|
|
96
122
|
num_workers=4,
|
|
@@ -98,7 +124,7 @@ import refiner as mdr
|
|
|
98
124
|
)
|
|
99
125
|
```
|
|
100
126
|
|
|
101
|
-
Need cloud GPUs? See [
|
|
127
|
+
Need cloud GPUs? See [Resources, GPUs, and Services](docs/running-pipelines/resources-gpus-and-services.md).
|
|
102
128
|
|
|
103
129
|
### Local example
|
|
104
130
|
|
|
@@ -137,31 +163,33 @@ def add_preview(row):
|
|
|
137
163
|
|
|
138
164
|
- training-data-first pipeline primitives instead of generic ETL abstractions
|
|
139
165
|
- multimodal processing, with robotics support today
|
|
140
|
-
-
|
|
166
|
+
- built-in readers, transforms, sinks, and runtime machinery for common dataset work
|
|
141
167
|
- access to any storage backend supported by `fsspec` (S3, GCP, Hugging Face, etc.)
|
|
142
168
|
- local execution for development and elastic cloud execution for large runs
|
|
143
|
-
- built-in observability through the Macrodata platform
|
|
169
|
+
- built-in observability through the Macrodata platform for job state, logs, metrics, and manifests
|
|
144
170
|
|
|
145
171
|
## Docs
|
|
146
172
|
|
|
147
|
-
|
|
173
|
+
Start here:
|
|
148
174
|
|
|
149
|
-
- [
|
|
150
|
-
- [
|
|
151
|
-
- [
|
|
175
|
+
- [Docs index](docs/index.md)
|
|
176
|
+
- [Quickstart](docs/quickstart.md)
|
|
177
|
+
- [Running pipelines](docs/running-pipelines/index.md)
|
|
152
178
|
|
|
153
|
-
|
|
179
|
+
Build a dataset:
|
|
154
180
|
|
|
155
|
-
- [Reading
|
|
156
|
-
- [
|
|
157
|
-
- [
|
|
158
|
-
- [
|
|
159
|
-
- [
|
|
181
|
+
- [Reading data](docs/reading-data/index.md)
|
|
182
|
+
- [Episode data](docs/episode-data/index.md)
|
|
183
|
+
- [Transforms](docs/transforms/index.md)
|
|
184
|
+
- [Episode operations](docs/episode-operations/index.md)
|
|
185
|
+
- [Writing data](docs/writing-data/index.md)
|
|
186
|
+
- [Examples](docs/examples/index.md)
|
|
160
187
|
|
|
161
|
-
|
|
188
|
+
Operate jobs:
|
|
162
189
|
|
|
163
|
-
- [
|
|
164
|
-
- [
|
|
190
|
+
- [Platform](docs/platform/index.md)
|
|
191
|
+
- [CLI](docs/cli/index.md)
|
|
192
|
+
- [Reference](docs/reference/index.md)
|
|
165
193
|
|
|
166
194
|
## Community
|
|
167
195
|
|
{macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/macrodata_refiner.egg-info/SOURCES.txt
RENAMED
|
@@ -8,11 +8,37 @@ src/macrodata_refiner.egg-info/entry_points.txt
|
|
|
8
8
|
src/macrodata_refiner.egg-info/requires.txt
|
|
9
9
|
src/macrodata_refiner.egg-info/top_level.txt
|
|
10
10
|
src/refiner/__init__.py
|
|
11
|
+
src/refiner/job_urls.py
|
|
11
12
|
src/refiner/py.typed
|
|
12
13
|
src/refiner/cli/__init__.py
|
|
13
14
|
src/refiner/cli/auth.py
|
|
15
|
+
src/refiner/cli/common.py
|
|
14
16
|
src/refiner/cli/main.py
|
|
15
|
-
src/refiner/cli/
|
|
17
|
+
src/refiner/cli/secrets.py
|
|
18
|
+
src/refiner/cli/commands/__init__.py
|
|
19
|
+
src/refiner/cli/commands/auth.py
|
|
20
|
+
src/refiner/cli/commands/jobs.py
|
|
21
|
+
src/refiner/cli/commands/run.py
|
|
22
|
+
src/refiner/cli/commands/secrets.py
|
|
23
|
+
src/refiner/cli/jobs/__init__.py
|
|
24
|
+
src/refiner/cli/jobs/attach.py
|
|
25
|
+
src/refiner/cli/jobs/common.py
|
|
26
|
+
src/refiner/cli/jobs/control.py
|
|
27
|
+
src/refiner/cli/jobs/follow.py
|
|
28
|
+
src/refiner/cli/jobs/get.py
|
|
29
|
+
src/refiner/cli/jobs/list.py
|
|
30
|
+
src/refiner/cli/jobs/logs.py
|
|
31
|
+
src/refiner/cli/jobs/manifest.py
|
|
32
|
+
src/refiner/cli/jobs/metrics.py
|
|
33
|
+
src/refiner/cli/jobs/workers.py
|
|
34
|
+
src/refiner/cli/run/__init__.py
|
|
35
|
+
src/refiner/cli/run/cloud.py
|
|
36
|
+
src/refiner/cli/run/command.py
|
|
37
|
+
src/refiner/cli/run/local.py
|
|
38
|
+
src/refiner/cli/run/modes.py
|
|
39
|
+
src/refiner/cli/ui/__init__.py
|
|
40
|
+
src/refiner/cli/ui/console.py
|
|
41
|
+
src/refiner/cli/ui/terminal.py
|
|
16
42
|
src/refiner/execution/__init__.py
|
|
17
43
|
src/refiner/execution/buffer.py
|
|
18
44
|
src/refiner/execution/engine.py
|
|
@@ -24,6 +50,25 @@ src/refiner/execution/operators/row.py
|
|
|
24
50
|
src/refiner/execution/operators/vectorized.py
|
|
25
51
|
src/refiner/execution/tracking/__init__.py
|
|
26
52
|
src/refiner/execution/tracking/shards.py
|
|
53
|
+
src/refiner/inference/__init__.py
|
|
54
|
+
src/refiner/inference/capabilities.py
|
|
55
|
+
src/refiner/inference/generate_pooling.py
|
|
56
|
+
src/refiner/inference/generate_text.py
|
|
57
|
+
src/refiner/inference/types.py
|
|
58
|
+
src/refiner/inference/internal/__init__.py
|
|
59
|
+
src/refiner/inference/internal/media.py
|
|
60
|
+
src/refiner/inference/internal/message_conversion.py
|
|
61
|
+
src/refiner/inference/internal/response.py
|
|
62
|
+
src/refiner/inference/internal/runtime.py
|
|
63
|
+
src/refiner/inference/internal/schema.py
|
|
64
|
+
src/refiner/inference/internal/transport.py
|
|
65
|
+
src/refiner/inference/internal/usage.py
|
|
66
|
+
src/refiner/inference/providers/__init__.py
|
|
67
|
+
src/refiner/inference/providers/anthropic.py
|
|
68
|
+
src/refiner/inference/providers/base.py
|
|
69
|
+
src/refiner/inference/providers/google.py
|
|
70
|
+
src/refiner/inference/providers/openai.py
|
|
71
|
+
src/refiner/inference/providers/warnings.py
|
|
27
72
|
src/refiner/io/__init__.py
|
|
28
73
|
src/refiner/io/datafile.py
|
|
29
74
|
src/refiner/io/datafolder.py
|
|
@@ -32,21 +77,29 @@ src/refiner/launchers/__init__.py
|
|
|
32
77
|
src/refiner/launchers/base.py
|
|
33
78
|
src/refiner/launchers/cloud.py
|
|
34
79
|
src/refiner/launchers/local.py
|
|
80
|
+
src/refiner/launchers/secrets.py
|
|
35
81
|
src/refiner/pipeline/__init__.py
|
|
36
82
|
src/refiner/pipeline/expressions.py
|
|
37
83
|
src/refiner/pipeline/pipeline.py
|
|
38
84
|
src/refiner/pipeline/planning.py
|
|
85
|
+
src/refiner/pipeline/resources.py
|
|
39
86
|
src/refiner/pipeline/steps.py
|
|
40
87
|
src/refiner/pipeline/data/block.py
|
|
88
|
+
src/refiner/pipeline/data/datatype.py
|
|
41
89
|
src/refiner/pipeline/data/row.py
|
|
42
90
|
src/refiner/pipeline/data/shard.py
|
|
43
91
|
src/refiner/pipeline/data/tabular.py
|
|
44
92
|
src/refiner/pipeline/sinks/__init__.py
|
|
93
|
+
src/refiner/pipeline/sinks/assets.py
|
|
45
94
|
src/refiner/pipeline/sinks/base.py
|
|
46
95
|
src/refiner/pipeline/sinks/jsonl.py
|
|
47
96
|
src/refiner/pipeline/sinks/lerobot.py
|
|
48
|
-
src/refiner/pipeline/sinks/lerobot_reducer.py
|
|
49
97
|
src/refiner/pipeline/sinks/parquet.py
|
|
98
|
+
src/refiner/pipeline/sinks/zarr.py
|
|
99
|
+
src/refiner/pipeline/sinks/reducer/__init__.py
|
|
100
|
+
src/refiner/pipeline/sinks/reducer/file.py
|
|
101
|
+
src/refiner/pipeline/sinks/reducer/lerobot.py
|
|
102
|
+
src/refiner/pipeline/sinks/reducer/zarr.py
|
|
50
103
|
src/refiner/pipeline/sources/__init__.py
|
|
51
104
|
src/refiner/pipeline/sources/base.py
|
|
52
105
|
src/refiner/pipeline/sources/items.py
|
|
@@ -54,10 +107,17 @@ src/refiner/pipeline/sources/task.py
|
|
|
54
107
|
src/refiner/pipeline/sources/readers/__init__.py
|
|
55
108
|
src/refiner/pipeline/sources/readers/base.py
|
|
56
109
|
src/refiner/pipeline/sources/readers/csv.py
|
|
57
|
-
src/refiner/pipeline/sources/readers/
|
|
110
|
+
src/refiner/pipeline/sources/readers/files.py
|
|
111
|
+
src/refiner/pipeline/sources/readers/hdf5.py
|
|
112
|
+
src/refiner/pipeline/sources/readers/hf_dataset.py
|
|
113
|
+
src/refiner/pipeline/sources/readers/json.py
|
|
58
114
|
src/refiner/pipeline/sources/readers/lerobot.py
|
|
115
|
+
src/refiner/pipeline/sources/readers/mcap.py
|
|
59
116
|
src/refiner/pipeline/sources/readers/parquet.py
|
|
117
|
+
src/refiner/pipeline/sources/readers/tfds.py
|
|
118
|
+
src/refiner/pipeline/sources/readers/tfrecord.py
|
|
60
119
|
src/refiner/pipeline/sources/readers/utils.py
|
|
120
|
+
src/refiner/pipeline/sources/readers/zarr.py
|
|
61
121
|
src/refiner/pipeline/utils/__init__.py
|
|
62
122
|
src/refiner/pipeline/utils/cache/__init__.py
|
|
63
123
|
src/refiner/pipeline/utils/cache/decoder_cache.py
|
|
@@ -68,11 +128,16 @@ src/refiner/platform/auth.py
|
|
|
68
128
|
src/refiner/platform/manifest.py
|
|
69
129
|
src/refiner/platform/client/__init__.py
|
|
70
130
|
src/refiner/platform/client/api.py
|
|
71
|
-
src/refiner/platform/client/http.py
|
|
72
131
|
src/refiner/platform/client/models.py
|
|
73
132
|
src/refiner/platform/client/serialize.py
|
|
74
133
|
src/refiner/robotics/__init__.py
|
|
134
|
+
src/refiner/robotics/egocentric.py
|
|
75
135
|
src/refiner/robotics/motion.py
|
|
136
|
+
src/refiner/robotics/reward.py
|
|
137
|
+
src/refiner/robotics/row.py
|
|
138
|
+
src/refiner/robotics/subtask_annotation.py
|
|
139
|
+
src/refiner/robotics/synchronization.py
|
|
140
|
+
src/refiner/robotics/tabular.py
|
|
76
141
|
src/refiner/robotics/lerobot_format/__init__.py
|
|
77
142
|
src/refiner/robotics/lerobot_format/row.py
|
|
78
143
|
src/refiner/robotics/lerobot_format/tabular.py
|
|
@@ -81,11 +146,17 @@ src/refiner/robotics/lerobot_format/metadata/info.py
|
|
|
81
146
|
src/refiner/robotics/lerobot_format/metadata/metadata.py
|
|
82
147
|
src/refiner/robotics/lerobot_format/metadata/stats.py
|
|
83
148
|
src/refiner/robotics/lerobot_format/metadata/tasks.py
|
|
149
|
+
src/refiner/services/__init__.py
|
|
150
|
+
src/refiner/services/base.py
|
|
151
|
+
src/refiner/services/discovery.py
|
|
152
|
+
src/refiner/services/manager.py
|
|
153
|
+
src/refiner/services/vllm.py
|
|
84
154
|
src/refiner/text/__init__.py
|
|
85
155
|
src/refiner/text/commoncrawl.py
|
|
86
156
|
src/refiner/utils/__init__.py
|
|
87
157
|
src/refiner/utils/imports.py
|
|
88
158
|
src/refiner/video/__init__.py
|
|
159
|
+
src/refiner/video/decode.py
|
|
89
160
|
src/refiner/video/remux.py
|
|
90
161
|
src/refiner/video/transcode.py
|
|
91
162
|
src/refiner/video/types.py
|
|
@@ -93,25 +164,17 @@ src/refiner/video/writer.py
|
|
|
93
164
|
src/refiner/worker/__init__.py
|
|
94
165
|
src/refiner/worker/context.py
|
|
95
166
|
src/refiner/worker/entrypoint.py
|
|
167
|
+
src/refiner/worker/lifecycle.py
|
|
96
168
|
src/refiner/worker/runner.py
|
|
97
169
|
src/refiner/worker/workdir.py
|
|
98
|
-
src/refiner/worker/lifecycle/__init__.py
|
|
99
|
-
src/refiner/worker/lifecycle/base.py
|
|
100
|
-
src/refiner/worker/lifecycle/platform.py
|
|
101
|
-
src/refiner/worker/lifecycle/local/__init__.py
|
|
102
|
-
src/refiner/worker/lifecycle/local/claim.py
|
|
103
|
-
src/refiner/worker/lifecycle/local/files.py
|
|
104
|
-
src/refiner/worker/lifecycle/local/lifecycle.py
|
|
105
170
|
src/refiner/worker/metrics/__init__.py
|
|
106
171
|
src/refiner/worker/metrics/api.py
|
|
107
|
-
src/refiner/worker/metrics/
|
|
108
|
-
src/refiner/worker/metrics/otel.py
|
|
172
|
+
src/refiner/worker/metrics/emitter.py
|
|
109
173
|
src/refiner/worker/resources/__init__.py
|
|
110
174
|
src/refiner/worker/resources/cpu.py
|
|
111
175
|
src/refiner/worker/resources/gpu.py
|
|
112
|
-
src/refiner/worker/resources/memory.py
|
|
113
|
-
src/refiner/worker/resources/network.py
|
|
114
176
|
tests/test_cache.py
|
|
115
177
|
tests/test_commoncrawl_text.py
|
|
116
178
|
tests/test_expressions.py
|
|
117
|
-
tests/test_optional_dependencies.py
|
|
179
|
+
tests/test_optional_dependencies.py
|
|
180
|
+
tests/test_video_decode.py
|