macrodata-refiner 0.3.1__tar.gz → 0.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/PKG-INFO +8 -2
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/pyproject.toml +10 -2
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/macrodata_refiner.egg-info/PKG-INFO +8 -2
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/macrodata_refiner.egg-info/SOURCES.txt +1 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/macrodata_refiner.egg-info/requires.txt +9 -1
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/io/datafile.py +4 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/io/datafolder.py +4 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/io/fileset.py +20 -1
- macrodata_refiner-0.3.2/src/refiner/io/utils.py +29 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/launchers/cloud.py +18 -9
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/pipeline.py +11 -6
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/planning.py +11 -2
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sinks/base.py +22 -1
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sinks/lerobot.py +3 -2
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sinks/reducer/zarr.py +3 -2
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sinks/zarr.py +5 -1
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/base.py +19 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/readers/base.py +3 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/readers/hdf5.py +3 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/readers/hf_dataset.py +46 -20
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/readers/mcap.py +3 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/readers/tfds.py +3 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/readers/tfrecord.py +3 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/readers/zarr.py +9 -2
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/platform/manifest.py +76 -17
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/robotics/hand_tracking.py +8 -7
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/text/commoncrawl.py +46 -21
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/tests/test_commoncrawl_text.py +25 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/LICENSE +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/README.md +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/setup.cfg +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/macrodata_refiner.egg-info/dependency_links.txt +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/macrodata_refiner.egg-info/entry_points.txt +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/macrodata_refiner.egg-info/top_level.txt +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/__init__.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/cli/__init__.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/cli/auth.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/cli/commands/__init__.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/cli/commands/auth.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/cli/commands/jobs.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/cli/commands/run.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/cli/commands/secrets.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/cli/common.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/cli/jobs/__init__.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/cli/jobs/attach.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/cli/jobs/common.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/cli/jobs/control.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/cli/jobs/follow.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/cli/jobs/get.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/cli/jobs/list.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/cli/jobs/logs.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/cli/jobs/manifest.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/cli/jobs/metrics.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/cli/jobs/workers.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/cli/main.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/cli/run/__init__.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/cli/run/cloud.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/cli/run/command.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/cli/run/local.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/cli/run/modes.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/cli/secrets.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/cli/ui/__init__.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/cli/ui/console.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/cli/ui/terminal.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/execution/__init__.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/execution/asyncio/__init__.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/execution/asyncio/runtime.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/execution/asyncio/window.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/execution/buffer.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/execution/engine.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/execution/operators/__init__.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/execution/operators/row.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/execution/operators/vectorized.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/execution/tracking/__init__.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/execution/tracking/shards.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/inference/__init__.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/inference/capabilities.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/inference/generate_pooling.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/inference/generate_text.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/inference/internal/__init__.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/inference/internal/media.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/inference/internal/message_conversion.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/inference/internal/response.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/inference/internal/runtime.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/inference/internal/schema.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/inference/internal/transport.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/inference/internal/usage.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/inference/providers/__init__.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/inference/providers/anthropic.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/inference/providers/base.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/inference/providers/google.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/inference/providers/openai.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/inference/providers/warnings.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/inference/types.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/io/__init__.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/job_urls.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/launchers/__init__.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/launchers/base.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/launchers/local.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/launchers/secrets.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/__init__.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/data/block.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/data/datatype.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/data/row.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/data/shard.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/data/tabular.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/expressions.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/resources.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sinks/__init__.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sinks/assets.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sinks/jsonl.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sinks/parquet.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sinks/reducer/__init__.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sinks/reducer/file.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sinks/reducer/lerobot.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/__init__.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/items.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/readers/__init__.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/readers/csv.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/readers/files.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/readers/json.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/readers/lerobot.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/readers/parquet.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/readers/utils.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/task.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/steps.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/utils/__init__.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/utils/cache/__init__.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/utils/cache/decoder_cache.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/utils/cache/file_cache.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/utils/cache/lease_cache.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/platform/__init__.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/platform/auth.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/platform/client/__init__.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/platform/client/api.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/platform/client/models.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/platform/client/serialize.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/py.typed +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/robotics/__init__.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/robotics/lerobot_format/__init__.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/robotics/lerobot_format/metadata/__init__.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/robotics/lerobot_format/metadata/info.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/robotics/lerobot_format/metadata/metadata.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/robotics/lerobot_format/metadata/stats.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/robotics/lerobot_format/metadata/tasks.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/robotics/lerobot_format/row.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/robotics/lerobot_format/tabular.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/robotics/motion.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/robotics/reward.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/robotics/row.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/robotics/subtask_annotation.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/robotics/synchronization.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/robotics/tabular.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/services/__init__.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/services/base.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/services/discovery.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/services/manager.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/services/vllm.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/text/__init__.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/utils/__init__.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/utils/imports.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/video/__init__.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/video/decode.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/video/remux.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/video/transcode.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/video/types.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/video/writer.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/worker/__init__.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/worker/context.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/worker/entrypoint.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/worker/lifecycle.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/worker/metrics/__init__.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/worker/metrics/api.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/worker/metrics/emitter.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/worker/resources/__init__.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/worker/resources/cpu.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/worker/resources/gpu.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/worker/runner.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/worker/workdir.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/tests/test_cache.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/tests/test_expressions.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/tests/test_optional_dependencies.py +0 -0
- {macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/tests/test_video_decode.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: macrodata-refiner
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.2
|
|
4
4
|
Summary: Refiner by Macrodata Labs, a data processing framework for Machine Learning large scale datasets
|
|
5
5
|
Author: Macrodata Labs
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -27,9 +27,11 @@ Provides-Extra: video
|
|
|
27
27
|
Requires-Dist: av; extra == "video"
|
|
28
28
|
Requires-Dist: pillow; extra == "video"
|
|
29
29
|
Provides-Extra: hf
|
|
30
|
-
Requires-Dist: datasets>=3.0.0; extra == "hf"
|
|
31
30
|
Requires-Dist: huggingface-hub>=1.4.1; extra == "hf"
|
|
32
31
|
Requires-Dist: hf>=1.7.1; extra == "hf"
|
|
32
|
+
Provides-Extra: datasets
|
|
33
|
+
Requires-Dist: macrodata-refiner[hf]; extra == "datasets"
|
|
34
|
+
Requires-Dist: datasets>=3.0.0; extra == "datasets"
|
|
33
35
|
Provides-Extra: hand-tracking
|
|
34
36
|
Requires-Dist: macrodata-refiner[hf]; extra == "hand-tracking"
|
|
35
37
|
Requires-Dist: macrodata-refiner[video]; extra == "hand-tracking"
|
|
@@ -49,6 +51,8 @@ Requires-Dist: mcap-ros2-support; extra == "mcap"
|
|
|
49
51
|
Requires-Dist: pillow; extra == "mcap"
|
|
50
52
|
Provides-Extra: s3
|
|
51
53
|
Requires-Dist: s3fs; extra == "s3"
|
|
54
|
+
Provides-Extra: gcs
|
|
55
|
+
Requires-Dist: gcsfs; extra == "gcs"
|
|
52
56
|
Provides-Extra: tensorflow
|
|
53
57
|
Requires-Dist: tensorflow; extra == "tensorflow"
|
|
54
58
|
Provides-Extra: tfds
|
|
@@ -59,6 +63,7 @@ Requires-Dist: macrodata-refiner[all]; extra == "testing"
|
|
|
59
63
|
Requires-Dist: pytest>=8.0.0; extra == "testing"
|
|
60
64
|
Requires-Dist: pytest-cov>=5.0.0; extra == "testing"
|
|
61
65
|
Provides-Extra: all
|
|
66
|
+
Requires-Dist: macrodata-refiner[datasets]; extra == "all"
|
|
62
67
|
Requires-Dist: macrodata-refiner[hdf5]; extra == "all"
|
|
63
68
|
Requires-Dist: macrodata-refiner[hf]; extra == "all"
|
|
64
69
|
Requires-Dist: macrodata-refiner[mcap]; extra == "all"
|
|
@@ -66,6 +71,7 @@ Requires-Dist: macrodata-refiner[video]; extra == "all"
|
|
|
66
71
|
Requires-Dist: macrodata-refiner[zarr]; extra == "all"
|
|
67
72
|
Requires-Dist: macrodata-refiner[text]; extra == "all"
|
|
68
73
|
Requires-Dist: macrodata-refiner[s3]; extra == "all"
|
|
74
|
+
Requires-Dist: macrodata-refiner[gcs]; extra == "all"
|
|
69
75
|
Requires-Dist: macrodata-refiner[tfds]; extra == "all"
|
|
70
76
|
Dynamic: license-file
|
|
71
77
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "macrodata-refiner"
|
|
3
|
-
version = "0.3.
|
|
3
|
+
version = "0.3.2"
|
|
4
4
|
description = "Refiner by Macrodata Labs, a data processing framework for Machine Learning large scale datasets"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
license = "Apache-2.0"
|
|
@@ -35,10 +35,13 @@ video = [
|
|
|
35
35
|
"pillow",
|
|
36
36
|
]
|
|
37
37
|
hf = [
|
|
38
|
-
"datasets>=3.0.0",
|
|
39
38
|
"huggingface-hub>=1.4.1",
|
|
40
39
|
"hf>=1.7.1",
|
|
41
40
|
]
|
|
41
|
+
datasets = [
|
|
42
|
+
"macrodata-refiner[hf]",
|
|
43
|
+
"datasets>=3.0.0",
|
|
44
|
+
]
|
|
42
45
|
hand_tracking = [
|
|
43
46
|
"macrodata-refiner[hf]",
|
|
44
47
|
"macrodata-refiner[video]",
|
|
@@ -64,6 +67,9 @@ mcap = [
|
|
|
64
67
|
s3 = [
|
|
65
68
|
"s3fs",
|
|
66
69
|
]
|
|
70
|
+
gcs = [
|
|
71
|
+
"gcsfs",
|
|
72
|
+
]
|
|
67
73
|
tensorflow = [
|
|
68
74
|
"tensorflow",
|
|
69
75
|
]
|
|
@@ -77,6 +83,7 @@ testing = [
|
|
|
77
83
|
"pytest-cov>=5.0.0",
|
|
78
84
|
]
|
|
79
85
|
all = [
|
|
86
|
+
"macrodata-refiner[datasets]",
|
|
80
87
|
"macrodata-refiner[hdf5]",
|
|
81
88
|
"macrodata-refiner[hf]",
|
|
82
89
|
"macrodata-refiner[mcap]",
|
|
@@ -84,6 +91,7 @@ all = [
|
|
|
84
91
|
"macrodata-refiner[zarr]",
|
|
85
92
|
"macrodata-refiner[text]",
|
|
86
93
|
"macrodata-refiner[s3]",
|
|
94
|
+
"macrodata-refiner[gcs]",
|
|
87
95
|
"macrodata-refiner[tfds]",
|
|
88
96
|
]
|
|
89
97
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: macrodata-refiner
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.2
|
|
4
4
|
Summary: Refiner by Macrodata Labs, a data processing framework for Machine Learning large scale datasets
|
|
5
5
|
Author: Macrodata Labs
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -27,9 +27,11 @@ Provides-Extra: video
|
|
|
27
27
|
Requires-Dist: av; extra == "video"
|
|
28
28
|
Requires-Dist: pillow; extra == "video"
|
|
29
29
|
Provides-Extra: hf
|
|
30
|
-
Requires-Dist: datasets>=3.0.0; extra == "hf"
|
|
31
30
|
Requires-Dist: huggingface-hub>=1.4.1; extra == "hf"
|
|
32
31
|
Requires-Dist: hf>=1.7.1; extra == "hf"
|
|
32
|
+
Provides-Extra: datasets
|
|
33
|
+
Requires-Dist: macrodata-refiner[hf]; extra == "datasets"
|
|
34
|
+
Requires-Dist: datasets>=3.0.0; extra == "datasets"
|
|
33
35
|
Provides-Extra: hand-tracking
|
|
34
36
|
Requires-Dist: macrodata-refiner[hf]; extra == "hand-tracking"
|
|
35
37
|
Requires-Dist: macrodata-refiner[video]; extra == "hand-tracking"
|
|
@@ -49,6 +51,8 @@ Requires-Dist: mcap-ros2-support; extra == "mcap"
|
|
|
49
51
|
Requires-Dist: pillow; extra == "mcap"
|
|
50
52
|
Provides-Extra: s3
|
|
51
53
|
Requires-Dist: s3fs; extra == "s3"
|
|
54
|
+
Provides-Extra: gcs
|
|
55
|
+
Requires-Dist: gcsfs; extra == "gcs"
|
|
52
56
|
Provides-Extra: tensorflow
|
|
53
57
|
Requires-Dist: tensorflow; extra == "tensorflow"
|
|
54
58
|
Provides-Extra: tfds
|
|
@@ -59,6 +63,7 @@ Requires-Dist: macrodata-refiner[all]; extra == "testing"
|
|
|
59
63
|
Requires-Dist: pytest>=8.0.0; extra == "testing"
|
|
60
64
|
Requires-Dist: pytest-cov>=5.0.0; extra == "testing"
|
|
61
65
|
Provides-Extra: all
|
|
66
|
+
Requires-Dist: macrodata-refiner[datasets]; extra == "all"
|
|
62
67
|
Requires-Dist: macrodata-refiner[hdf5]; extra == "all"
|
|
63
68
|
Requires-Dist: macrodata-refiner[hf]; extra == "all"
|
|
64
69
|
Requires-Dist: macrodata-refiner[mcap]; extra == "all"
|
|
@@ -66,6 +71,7 @@ Requires-Dist: macrodata-refiner[video]; extra == "all"
|
|
|
66
71
|
Requires-Dist: macrodata-refiner[zarr]; extra == "all"
|
|
67
72
|
Requires-Dist: macrodata-refiner[text]; extra == "all"
|
|
68
73
|
Requires-Dist: macrodata-refiner[s3]; extra == "all"
|
|
74
|
+
Requires-Dist: macrodata-refiner[gcs]; extra == "all"
|
|
69
75
|
Requires-Dist: macrodata-refiner[tfds]; extra == "all"
|
|
70
76
|
Dynamic: license-file
|
|
71
77
|
|
{macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/macrodata_refiner.egg-info/requires.txt
RENAMED
|
@@ -11,6 +11,7 @@ msgspec>=0.20.0
|
|
|
11
11
|
pydantic>=2.0.0
|
|
12
12
|
|
|
13
13
|
[all]
|
|
14
|
+
macrodata-refiner[datasets]
|
|
14
15
|
macrodata-refiner[hdf5]
|
|
15
16
|
macrodata-refiner[hf]
|
|
16
17
|
macrodata-refiner[mcap]
|
|
@@ -18,8 +19,16 @@ macrodata-refiner[video]
|
|
|
18
19
|
macrodata-refiner[zarr]
|
|
19
20
|
macrodata-refiner[text]
|
|
20
21
|
macrodata-refiner[s3]
|
|
22
|
+
macrodata-refiner[gcs]
|
|
21
23
|
macrodata-refiner[tfds]
|
|
22
24
|
|
|
25
|
+
[datasets]
|
|
26
|
+
macrodata-refiner[hf]
|
|
27
|
+
datasets>=3.0.0
|
|
28
|
+
|
|
29
|
+
[gcs]
|
|
30
|
+
gcsfs
|
|
31
|
+
|
|
23
32
|
[hand_tracking]
|
|
24
33
|
macrodata-refiner[hf]
|
|
25
34
|
macrodata-refiner[video]
|
|
@@ -29,7 +38,6 @@ ego-vision[models]>=0.1.25
|
|
|
29
38
|
h5py
|
|
30
39
|
|
|
31
40
|
[hf]
|
|
32
|
-
datasets>=3.0.0
|
|
33
41
|
huggingface-hub>=1.4.1
|
|
34
42
|
hf>=1.7.1
|
|
35
43
|
|
|
@@ -11,6 +11,7 @@ from typing import Any, TypeAlias, Union, cast
|
|
|
11
11
|
from fsspec import AbstractFileSystem, url_to_fs
|
|
12
12
|
from fsspec.implementations.http import HTTPFileSystem
|
|
13
13
|
from fsspec.implementations.local import LocalFileSystem
|
|
14
|
+
from refiner.io.utils import required_refiner_extras
|
|
14
15
|
|
|
15
16
|
DataFilePath: TypeAlias = str | PathLike[str]
|
|
16
17
|
DataFileSpec: TypeAlias = tuple[DataFilePath, AbstractFileSystem]
|
|
@@ -146,6 +147,9 @@ class DataFile:
|
|
|
146
147
|
def abs_path(self) -> str:
|
|
147
148
|
return self.fs.unstrip_protocol(self.path).removeprefix("file://")
|
|
148
149
|
|
|
150
|
+
def required_refiner_extras(self) -> tuple[str, ...]:
|
|
151
|
+
return required_refiner_extras(self.path, self.fs)
|
|
152
|
+
|
|
149
153
|
@property
|
|
150
154
|
def is_local(self) -> bool:
|
|
151
155
|
return isinstance(self.fs, LocalFileSystem)
|
|
@@ -7,6 +7,7 @@ from fsspec.implementations.dirfs import DirFileSystem
|
|
|
7
7
|
from fsspec.implementations.local import LocalFileSystem
|
|
8
8
|
|
|
9
9
|
from refiner.io.datafile import DataFile, _storage_options_for_path
|
|
10
|
+
from refiner.io.utils import required_refiner_extras
|
|
10
11
|
|
|
11
12
|
DataFolderPath: TypeAlias = str | PathLike[str]
|
|
12
13
|
DataFolderSpec: TypeAlias = tuple[DataFolderPath, AbstractFileSystem]
|
|
@@ -102,6 +103,9 @@ class DataFolder(DirFileSystem):
|
|
|
102
103
|
# make sure we strip file:// and similar
|
|
103
104
|
return self.fs.unstrip_protocol(self._join(path)).removeprefix("file://")
|
|
104
105
|
|
|
106
|
+
def required_refiner_extras(self) -> tuple[str, ...]:
|
|
107
|
+
return required_refiner_extras(self.path, self.fs)
|
|
108
|
+
|
|
105
109
|
def abs_paths(self, paths: str | Iterable[str]) -> str | list[str]:
|
|
106
110
|
"""
|
|
107
111
|
Transform a list of relative paths into a list of complete paths (including fs protocol and base path)
|
|
@@ -8,8 +8,13 @@ from typing import Any, Literal, TypeAlias, Union, cast
|
|
|
8
8
|
|
|
9
9
|
from fsspec import AbstractFileSystem, url_to_fs
|
|
10
10
|
|
|
11
|
-
from refiner.io.datafile import
|
|
11
|
+
from refiner.io.datafile import (
|
|
12
|
+
DataFile,
|
|
13
|
+
DataFileSpec,
|
|
14
|
+
_storage_options_for_path,
|
|
15
|
+
)
|
|
12
16
|
from refiner.io.datafolder import DataFolder, DataFolderSpec
|
|
17
|
+
from refiner.io.utils import required_refiner_extras
|
|
13
18
|
|
|
14
19
|
DataFileSetInput: TypeAlias = Union[
|
|
15
20
|
str, PathLike[str], DataFileSpec, DataFolderSpec, DataFile, DataFolder
|
|
@@ -22,6 +27,9 @@ class _PathSource:
|
|
|
22
27
|
path: str
|
|
23
28
|
fs: AbstractFileSystem
|
|
24
29
|
|
|
30
|
+
def required_refiner_extras(self) -> tuple[str, ...]:
|
|
31
|
+
return required_refiner_extras(self.path, self.fs)
|
|
32
|
+
|
|
25
33
|
|
|
26
34
|
@dataclass(frozen=True, slots=True)
|
|
27
35
|
class DataFileSet:
|
|
@@ -174,6 +182,17 @@ class DataFileSet:
|
|
|
174
182
|
raise TypeError("DataFileSet entries are not all folders")
|
|
175
183
|
return cast(tuple[DataFolder, ...], entries)
|
|
176
184
|
|
|
185
|
+
def required_refiner_extras(self) -> tuple[str, ...]:
|
|
186
|
+
return tuple(
|
|
187
|
+
sorted(
|
|
188
|
+
{
|
|
189
|
+
extra
|
|
190
|
+
for entry in self.entries
|
|
191
|
+
for extra in entry.required_refiner_extras()
|
|
192
|
+
}
|
|
193
|
+
)
|
|
194
|
+
)
|
|
195
|
+
|
|
177
196
|
@property
|
|
178
197
|
def resolved_entries(self) -> tuple[DataFile | DataFolder, ...]:
|
|
179
198
|
entries: list[DataFile | DataFolder] = []
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from fsspec import AbstractFileSystem
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
_PROTOCOL_REFINER_EXTRAS = {
|
|
7
|
+
"s3": "s3",
|
|
8
|
+
"s3a": "s3",
|
|
9
|
+
"hf": "hf",
|
|
10
|
+
"gcs": "gcs",
|
|
11
|
+
"gs": "gcs",
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def required_refiner_extras(path: str, fs: AbstractFileSystem) -> tuple[str, ...]:
|
|
16
|
+
protocol = fs.protocol
|
|
17
|
+
protocols = (protocol,) if isinstance(protocol, str) else tuple(protocol)
|
|
18
|
+
path_protocol, sep, _rest = path.partition("://")
|
|
19
|
+
return tuple(
|
|
20
|
+
sorted(
|
|
21
|
+
{
|
|
22
|
+
extra
|
|
23
|
+
for item in (*protocols, path_protocol if sep else None)
|
|
24
|
+
if item is not None
|
|
25
|
+
and (extra := _PROTOCOL_REFINER_EXTRAS.get(str(item).lower()))
|
|
26
|
+
is not None
|
|
27
|
+
}
|
|
28
|
+
)
|
|
29
|
+
)
|
|
@@ -103,9 +103,11 @@ class CloudLauncher(BaseLauncher):
|
|
|
103
103
|
gpu: Optional GPU runtime request for cloud scheduling.
|
|
104
104
|
sync_local_dependencies: Whether to include packages detected from the
|
|
105
105
|
local environment in the cloud runtime.
|
|
106
|
-
|
|
107
|
-
Entries are requirement strings.
|
|
108
|
-
|
|
106
|
+
dependencies: Additional packages to install in the cloud runtime.
|
|
107
|
+
Entries are requirement strings.
|
|
108
|
+
refiner_extras: Additional macrodata-refiner extras to install in the
|
|
109
|
+
cloud runtime. Built-in blocks automatically declare the extras they
|
|
110
|
+
require; pass this for extras used outside those blocks.
|
|
109
111
|
secrets: Optional secret sources mounted into the cloud runtime.
|
|
110
112
|
env: Optional plain environment variables mounted into the cloud runtime.
|
|
111
113
|
"""
|
|
@@ -119,8 +121,9 @@ class CloudLauncher(BaseLauncher):
|
|
|
119
121
|
cpus_per_worker: int | None = None,
|
|
120
122
|
mem_mb_per_worker: int | None = None,
|
|
121
123
|
gpu: GPU | None = None,
|
|
122
|
-
sync_local_dependencies: bool =
|
|
123
|
-
|
|
124
|
+
sync_local_dependencies: bool = False,
|
|
125
|
+
dependencies: Sequence[str] | None = None,
|
|
126
|
+
refiner_extras: Sequence[str] | None = None,
|
|
124
127
|
secrets: SecretInput | None = None,
|
|
125
128
|
env: dict[str, object | None] | None = None,
|
|
126
129
|
continue_from_job: str | None = None,
|
|
@@ -141,7 +144,8 @@ class CloudLauncher(BaseLauncher):
|
|
|
141
144
|
self.cpus_per_worker = cpus_per_worker
|
|
142
145
|
self.mem_mb_per_worker = mem_mb_per_worker
|
|
143
146
|
self.sync_local_dependencies = sync_local_dependencies
|
|
144
|
-
self.
|
|
147
|
+
self.dependencies = dependencies
|
|
148
|
+
self.refiner_extras = refiner_extras
|
|
145
149
|
self.secrets = normalize_secret_sources(secrets)
|
|
146
150
|
self.env = env
|
|
147
151
|
self.continue_from_job = normalized_continue_from_job
|
|
@@ -153,12 +157,14 @@ class CloudLauncher(BaseLauncher):
|
|
|
153
157
|
return raw.strip().lower() in {"1", "true", "yes", "on"}
|
|
154
158
|
|
|
155
159
|
def _resolve_cloud_manifest(
|
|
156
|
-
self, *, secret_values: tuple[str, ...]
|
|
160
|
+
self, *, secret_values: tuple[str, ...], stages: list[PlannedStage]
|
|
157
161
|
) -> dict[str, object]:
|
|
158
162
|
manifest = build_run_manifest(
|
|
159
163
|
secret_values=secret_values,
|
|
160
164
|
capture_dependencies=self.sync_local_dependencies,
|
|
161
|
-
|
|
165
|
+
dependencies=self.dependencies,
|
|
166
|
+
refiner_extras=self.refiner_extras,
|
|
167
|
+
pipeline_stages=stages,
|
|
162
168
|
)
|
|
163
169
|
environment = manifest.get("environment")
|
|
164
170
|
if environment is None:
|
|
@@ -286,7 +292,10 @@ class CloudLauncher(BaseLauncher):
|
|
|
286
292
|
resolved_secret_sources, secret_values = resolve_secret_sources(self.secrets)
|
|
287
293
|
resolved_env = resolve_env_mapping(self.env) if self.env else None
|
|
288
294
|
stages = self._resolved_stages()
|
|
289
|
-
manifest = self._resolve_cloud_manifest(
|
|
295
|
+
manifest = self._resolve_cloud_manifest(
|
|
296
|
+
secret_values=secret_values,
|
|
297
|
+
stages=stages,
|
|
298
|
+
)
|
|
290
299
|
plan = self._compiled_plan(stages, secret_values=secret_values)
|
|
291
300
|
try:
|
|
292
301
|
pipeline_payloads = self._upload_stage_payloads(
|
|
@@ -708,8 +708,9 @@ class RefinerPipeline:
|
|
|
708
708
|
cpus_per_worker: int | None = None,
|
|
709
709
|
mem_mb_per_worker: int | None = None,
|
|
710
710
|
gpu: GPU | None = None,
|
|
711
|
-
sync_local_dependencies: bool =
|
|
712
|
-
|
|
711
|
+
sync_local_dependencies: bool = False,
|
|
712
|
+
dependencies: Sequence[str] | None = None,
|
|
713
|
+
refiner_extras: Sequence[str] | None = None,
|
|
713
714
|
secrets: SecretInput | None = None,
|
|
714
715
|
env: Mapping[str, object | None] | None = None,
|
|
715
716
|
continue_from_job: str | None = None,
|
|
@@ -725,10 +726,13 @@ class RefinerPipeline:
|
|
|
725
726
|
gpu: Optional structured GPU request.
|
|
726
727
|
sync_local_dependencies: Include packages detected from the local
|
|
727
728
|
environment in the cloud runtime.
|
|
728
|
-
|
|
729
|
+
dependencies: Additional packages to install in the cloud runtime.
|
|
729
730
|
Entries are requirement strings such as `"torch"` or
|
|
730
|
-
`"ego-vision[models]==0.1.2"`.
|
|
731
|
-
|
|
731
|
+
`"ego-vision[models]==0.1.2"`.
|
|
732
|
+
refiner_extras: Additional macrodata-refiner extras to install in
|
|
733
|
+
the cloud runtime. Built-in blocks automatically declare the
|
|
734
|
+
extras they require; pass this for extras used outside those
|
|
735
|
+
blocks.
|
|
732
736
|
secrets: Secret sources to mount inside the cloud image. A mapping keeps
|
|
733
737
|
the legacy behavior; `None` values are loaded from the submitting
|
|
734
738
|
environment. `Secrets.env(...)` references stored workspace secrets.
|
|
@@ -750,7 +754,8 @@ class RefinerPipeline:
|
|
|
750
754
|
mem_mb_per_worker=mem_mb_per_worker,
|
|
751
755
|
gpu=gpu,
|
|
752
756
|
sync_local_dependencies=sync_local_dependencies,
|
|
753
|
-
|
|
757
|
+
dependencies=dependencies,
|
|
758
|
+
refiner_extras=refiner_extras,
|
|
754
759
|
secrets=secrets,
|
|
755
760
|
env=dict(env) if env is not None else None,
|
|
756
761
|
continue_from_job=continue_from_job,
|
|
@@ -329,10 +329,19 @@ def _builtin_description(fn: Any) -> dict[str, Any] | None:
|
|
|
329
329
|
return {"name": name, "args": args, "services": tuple(parsed_services)}
|
|
330
330
|
|
|
331
331
|
|
|
332
|
-
def describe_builtin(
|
|
332
|
+
def describe_builtin(
|
|
333
|
+
name: str, *, refiner_extras: tuple[str, ...] = (), **args: Any
|
|
334
|
+
) -> Any:
|
|
333
335
|
def _decorate(fn: Any) -> Any:
|
|
334
336
|
setattr(
|
|
335
|
-
fn,
|
|
337
|
+
fn,
|
|
338
|
+
_REFINER_BUILTIN_CALL_ATTR,
|
|
339
|
+
{
|
|
340
|
+
"name": name,
|
|
341
|
+
"args": args,
|
|
342
|
+
"services": (),
|
|
343
|
+
"refiner_extras": refiner_extras,
|
|
344
|
+
},
|
|
336
345
|
)
|
|
337
346
|
return fn
|
|
338
347
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from abc import ABC
|
|
4
|
-
from typing import Any
|
|
4
|
+
from typing import Any, cast
|
|
5
5
|
|
|
6
6
|
import pyarrow as pa
|
|
7
7
|
|
|
@@ -56,6 +56,27 @@ class BaseSink(ABC):
|
|
|
56
56
|
"""
|
|
57
57
|
return None
|
|
58
58
|
|
|
59
|
+
def required_refiner_extras(self) -> tuple[str, ...]:
|
|
60
|
+
"""macrodata-refiner extras required by this sink."""
|
|
61
|
+
return tuple(
|
|
62
|
+
sorted(
|
|
63
|
+
{
|
|
64
|
+
*self._declared_refiner_extras(),
|
|
65
|
+
*self._io_refiner_extras(),
|
|
66
|
+
}
|
|
67
|
+
)
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
def _declared_refiner_extras(self) -> tuple[str, ...]:
|
|
71
|
+
"""Feature extras declared by this sink."""
|
|
72
|
+
return ()
|
|
73
|
+
|
|
74
|
+
def _io_refiner_extras(self) -> tuple[str, ...]:
|
|
75
|
+
"""Storage extras required by this sink's output, if it has one."""
|
|
76
|
+
if not hasattr(self, "output"):
|
|
77
|
+
return ()
|
|
78
|
+
return cast(Any, self).output.required_refiner_extras()
|
|
79
|
+
|
|
59
80
|
def build_reducer(self) -> "BaseSink | None":
|
|
60
81
|
"""Return an optional 1-worker reducer sink for launched execution.
|
|
61
82
|
|
|
@@ -36,7 +36,6 @@ from refiner.robotics.lerobot_format import (
|
|
|
36
36
|
infer_feature_info,
|
|
37
37
|
)
|
|
38
38
|
from refiner.robotics.row import RoboticsRow
|
|
39
|
-
from refiner.utils import check_required_dependencies
|
|
40
39
|
from refiner.worker.context import get_active_worker_token
|
|
41
40
|
from refiner.worker.metrics.api import register_gauge
|
|
42
41
|
|
|
@@ -101,7 +100,6 @@ class LeRobotWriterSink(BaseSink):
|
|
|
101
100
|
quantile_bins: int = 5000,
|
|
102
101
|
force_recompute_video_stats: bool = False,
|
|
103
102
|
):
|
|
104
|
-
check_required_dependencies("write_lerobot", ["av"], dist="robotics")
|
|
105
103
|
self.output = DataFolder.resolve(output)
|
|
106
104
|
self.data_files_size_in_mb = data_files_size_in_mb
|
|
107
105
|
self.video_files_size_in_mb = video_files_size_in_mb
|
|
@@ -123,6 +121,9 @@ class LeRobotWriterSink(BaseSink):
|
|
|
123
121
|
)
|
|
124
122
|
self._episodes_in_flight_registered = False
|
|
125
123
|
|
|
124
|
+
def _declared_refiner_extras(self) -> tuple[str, ...]:
|
|
125
|
+
return ("video",)
|
|
126
|
+
|
|
126
127
|
def write_shard_block(self, shard_id: str, block: Block) -> None:
|
|
127
128
|
"""Submit one async write task per episode row in the shard-local block."""
|
|
128
129
|
if not self._episodes_in_flight_registered:
|
{macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sinks/reducer/zarr.py
RENAMED
|
@@ -15,7 +15,6 @@ from refiner.pipeline.sinks.zarr import (
|
|
|
15
15
|
_render_store_relpath,
|
|
16
16
|
_zarr_store,
|
|
17
17
|
)
|
|
18
|
-
from refiner.utils import check_required_dependencies
|
|
19
18
|
from refiner.worker.context import get_active_stage_index, get_finalized_workers
|
|
20
19
|
from refiner.worker.lifecycle import sort_finalized_workers
|
|
21
20
|
|
|
@@ -30,7 +29,6 @@ class ZarrReducerSink(FileCleanupReducerSink):
|
|
|
30
29
|
array_chunk_bytes: int = _DEFAULT_ARRAY_CHUNK_BYTES,
|
|
31
30
|
reduce_to_single_store: bool = True,
|
|
32
31
|
) -> None:
|
|
33
|
-
check_required_dependencies("write_zarr", ["zarr"], dist="zarr")
|
|
34
32
|
super().__init__(
|
|
35
33
|
output=output,
|
|
36
34
|
filename_template=(
|
|
@@ -43,6 +41,9 @@ class ZarrReducerSink(FileCleanupReducerSink):
|
|
|
43
41
|
self.array_chunk_bytes = array_chunk_bytes
|
|
44
42
|
self.reduce_to_single_store = reduce_to_single_store
|
|
45
43
|
|
|
44
|
+
def _declared_refiner_extras(self) -> tuple[str, ...]:
|
|
45
|
+
return ("zarr",)
|
|
46
|
+
|
|
46
47
|
def write_shard_block(self, shard_id: str, block: Block) -> None:
|
|
47
48
|
self._run_cleanup()
|
|
48
49
|
if self.reduce_to_single_store:
|
|
@@ -42,7 +42,6 @@ class ZarrSink(BaseSink):
|
|
|
42
42
|
array_chunk_bytes: int = _DEFAULT_ARRAY_CHUNK_BYTES,
|
|
43
43
|
reduce_to_single_store: bool = True,
|
|
44
44
|
):
|
|
45
|
-
check_required_dependencies("write_zarr", ["zarr"], dist="zarr")
|
|
46
45
|
if video_frame_batch_size <= 0:
|
|
47
46
|
raise ValueError("video_frame_batch_size must be greater than zero")
|
|
48
47
|
if array_chunk_bytes <= 0:
|
|
@@ -70,6 +69,9 @@ class ZarrSink(BaseSink):
|
|
|
70
69
|
self._stores: dict[str, _ZarrWriteState] = {}
|
|
71
70
|
self._default_arrays: dict[str, str] | None = None
|
|
72
71
|
|
|
72
|
+
def _declared_refiner_extras(self) -> tuple[str, ...]:
|
|
73
|
+
return ("zarr",)
|
|
74
|
+
|
|
73
75
|
def write_shard_block(self, shard_id: str, block: Block) -> int:
|
|
74
76
|
count = 0
|
|
75
77
|
pending_arrays: dict[str, list[np.ndarray]] = {}
|
|
@@ -311,6 +313,7 @@ class ZarrSink(BaseSink):
|
|
|
311
313
|
store = self._stores.get(relpath)
|
|
312
314
|
if store is not None:
|
|
313
315
|
return store
|
|
316
|
+
check_required_dependencies("write_zarr", ["zarr"], dist="zarr")
|
|
314
317
|
import zarr
|
|
315
318
|
|
|
316
319
|
store = _ZarrWriteState(
|
|
@@ -535,6 +538,7 @@ def _matching_length(lengths: list[int]) -> int | None:
|
|
|
535
538
|
|
|
536
539
|
|
|
537
540
|
def _zarr_store(output: DataFolder, path: str = "", *, mode: str = "r"):
|
|
541
|
+
check_required_dependencies("write_zarr", ["zarr"], dist="zarr")
|
|
538
542
|
import zarr
|
|
539
543
|
|
|
540
544
|
return zarr.storage.FSStore(
|
|
@@ -47,6 +47,25 @@ class BaseSource(ABC):
|
|
|
47
47
|
"""Optional source metadata for planning/observability."""
|
|
48
48
|
return {}
|
|
49
49
|
|
|
50
|
+
def required_refiner_extras(self) -> tuple[str, ...]:
|
|
51
|
+
"""macrodata-refiner extras required by this source."""
|
|
52
|
+
return tuple(
|
|
53
|
+
sorted(
|
|
54
|
+
{
|
|
55
|
+
*self._declared_refiner_extras(),
|
|
56
|
+
*self._io_refiner_extras(),
|
|
57
|
+
}
|
|
58
|
+
)
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
def _declared_refiner_extras(self) -> tuple[str, ...]:
|
|
62
|
+
"""Feature extras declared by this source."""
|
|
63
|
+
return ()
|
|
64
|
+
|
|
65
|
+
def _io_refiner_extras(self) -> tuple[str, ...]:
|
|
66
|
+
"""Storage extras required by this source's normalized IO handles."""
|
|
67
|
+
return ()
|
|
68
|
+
|
|
50
69
|
|
|
51
70
|
__all__ = ["BaseSource"]
|
|
52
71
|
|
{macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/readers/base.py
RENAMED
|
@@ -117,6 +117,9 @@ class BaseReader(BaseSource):
|
|
|
117
117
|
"file_path_column": self.file_path_column,
|
|
118
118
|
}
|
|
119
119
|
|
|
120
|
+
def _io_refiner_extras(self) -> tuple[str, ...]:
|
|
121
|
+
return self.fileset.required_refiner_extras()
|
|
122
|
+
|
|
120
123
|
def _with_file_path(
|
|
121
124
|
self, row: dict[str, Any], source_file: DataFile
|
|
122
125
|
) -> dict[str, Any]:
|
{macrodata_refiner-0.3.1 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/readers/hdf5.py
RENAMED
|
@@ -119,6 +119,9 @@ class Hdf5Reader(BaseReader):
|
|
|
119
119
|
)
|
|
120
120
|
return description
|
|
121
121
|
|
|
122
|
+
def _declared_refiner_extras(self) -> tuple[str, ...]:
|
|
123
|
+
return ("hdf5",)
|
|
124
|
+
|
|
122
125
|
def _validate_column_names(self) -> None:
|
|
123
126
|
for name, path in self.datasets.items():
|
|
124
127
|
if path.startswith("/"):
|