macrodata-refiner 0.3.0__tar.gz → 0.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/PKG-INFO +22 -15
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/pyproject.toml +19 -10
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/macrodata_refiner.egg-info/PKG-INFO +22 -15
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/macrodata_refiner.egg-info/SOURCES.txt +2 -1
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/macrodata_refiner.egg-info/requires.txt +21 -12
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/execution/asyncio/window.py +16 -5
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/execution/operators/row.py +36 -10
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/inference/__init__.py +2 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/inference/capabilities.py +9 -25
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/inference/generate_text.py +34 -17
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/inference/internal/message_conversion.py +1 -1
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/inference/internal/runtime.py +44 -27
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/inference/internal/transport.py +82 -47
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/inference/providers/anthropic.py +19 -12
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/inference/providers/google.py +14 -7
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/inference/providers/openai.py +31 -16
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/inference/providers/warnings.py +2 -2
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/inference/types.py +16 -1
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/io/datafile.py +4 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/io/datafolder.py +4 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/io/fileset.py +20 -1
- macrodata_refiner-0.3.2/src/refiner/io/utils.py +29 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/launchers/cloud.py +18 -9
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/pipeline.py +241 -22
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/planning.py +11 -2
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sinks/base.py +22 -1
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sinks/lerobot.py +82 -22
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sinks/reducer/zarr.py +3 -2
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sinks/zarr.py +5 -1
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/base.py +19 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/readers/base.py +3 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/readers/hdf5.py +3 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/readers/hf_dataset.py +46 -20
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/readers/lerobot.py +70 -20
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/readers/mcap.py +3 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/readers/tfds.py +3 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/readers/tfrecord.py +3 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/readers/zarr.py +9 -2
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/platform/manifest.py +76 -17
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/robotics/__init__.py +1 -3
- macrodata_refiner-0.3.2/src/refiner/robotics/hand_tracking.py +152 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/robotics/lerobot_format/row.py +100 -5
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/robotics/reward.py +51 -31
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/robotics/row.py +187 -12
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/robotics/subtask_annotation.py +234 -160
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/text/commoncrawl.py +46 -21
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/tests/test_commoncrawl_text.py +25 -0
- macrodata_refiner-0.3.0/src/refiner/robotics/egocentric.py +0 -99
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/LICENSE +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/README.md +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/setup.cfg +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/macrodata_refiner.egg-info/dependency_links.txt +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/macrodata_refiner.egg-info/entry_points.txt +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/macrodata_refiner.egg-info/top_level.txt +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/cli/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/cli/auth.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/cli/commands/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/cli/commands/auth.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/cli/commands/jobs.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/cli/commands/run.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/cli/commands/secrets.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/cli/common.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/cli/jobs/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/cli/jobs/attach.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/cli/jobs/common.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/cli/jobs/control.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/cli/jobs/follow.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/cli/jobs/get.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/cli/jobs/list.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/cli/jobs/logs.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/cli/jobs/manifest.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/cli/jobs/metrics.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/cli/jobs/workers.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/cli/main.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/cli/run/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/cli/run/cloud.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/cli/run/command.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/cli/run/local.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/cli/run/modes.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/cli/secrets.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/cli/ui/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/cli/ui/console.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/cli/ui/terminal.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/execution/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/execution/asyncio/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/execution/asyncio/runtime.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/execution/buffer.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/execution/engine.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/execution/operators/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/execution/operators/vectorized.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/execution/tracking/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/execution/tracking/shards.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/inference/generate_pooling.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/inference/internal/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/inference/internal/media.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/inference/internal/response.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/inference/internal/schema.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/inference/internal/usage.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/inference/providers/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/inference/providers/base.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/io/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/job_urls.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/launchers/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/launchers/base.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/launchers/local.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/launchers/secrets.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/data/block.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/data/datatype.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/data/row.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/data/shard.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/data/tabular.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/expressions.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/resources.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sinks/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sinks/assets.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sinks/jsonl.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sinks/parquet.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sinks/reducer/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sinks/reducer/file.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sinks/reducer/lerobot.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/items.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/readers/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/readers/csv.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/readers/files.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/readers/json.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/readers/parquet.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/readers/utils.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/sources/task.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/steps.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/utils/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/utils/cache/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/utils/cache/decoder_cache.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/utils/cache/file_cache.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/pipeline/utils/cache/lease_cache.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/platform/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/platform/auth.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/platform/client/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/platform/client/api.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/platform/client/models.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/platform/client/serialize.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/py.typed +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/robotics/lerobot_format/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/robotics/lerobot_format/metadata/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/robotics/lerobot_format/metadata/info.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/robotics/lerobot_format/metadata/metadata.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/robotics/lerobot_format/metadata/stats.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/robotics/lerobot_format/metadata/tasks.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/robotics/lerobot_format/tabular.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/robotics/motion.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/robotics/synchronization.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/robotics/tabular.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/services/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/services/base.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/services/discovery.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/services/manager.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/services/vllm.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/text/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/utils/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/utils/imports.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/video/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/video/decode.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/video/remux.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/video/transcode.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/video/types.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/video/writer.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/worker/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/worker/context.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/worker/entrypoint.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/worker/lifecycle.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/worker/metrics/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/worker/metrics/api.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/worker/metrics/emitter.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/worker/resources/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/worker/resources/cpu.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/worker/resources/gpu.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/worker/runner.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/refiner/worker/workdir.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/tests/test_cache.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/tests/test_expressions.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/tests/test_optional_dependencies.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/tests/test_video_decode.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: macrodata-refiner
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.2
|
|
4
4
|
Summary: Refiner by Macrodata Labs, a data processing framework for Machine Learning large scale datasets
|
|
5
5
|
Author: Macrodata Labs
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -12,6 +12,7 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
12
12
|
Requires-Python: >=3.10
|
|
13
13
|
Description-Content-Type: text/markdown
|
|
14
14
|
License-File: LICENSE
|
|
15
|
+
Requires-Dist: aiohttp
|
|
15
16
|
Requires-Dist: cloudpickle==3.1.2
|
|
16
17
|
Requires-Dist: fsspec[http]
|
|
17
18
|
Requires-Dist: httpx
|
|
@@ -26,13 +27,15 @@ Provides-Extra: video
|
|
|
26
27
|
Requires-Dist: av; extra == "video"
|
|
27
28
|
Requires-Dist: pillow; extra == "video"
|
|
28
29
|
Provides-Extra: hf
|
|
29
|
-
Requires-Dist: datasets>=3.0.0; extra == "hf"
|
|
30
30
|
Requires-Dist: huggingface-hub>=1.4.1; extra == "hf"
|
|
31
31
|
Requires-Dist: hf>=1.7.1; extra == "hf"
|
|
32
|
-
Provides-Extra:
|
|
33
|
-
Requires-Dist: macrodata-refiner[hf]; extra == "
|
|
34
|
-
Requires-Dist:
|
|
35
|
-
|
|
32
|
+
Provides-Extra: datasets
|
|
33
|
+
Requires-Dist: macrodata-refiner[hf]; extra == "datasets"
|
|
34
|
+
Requires-Dist: datasets>=3.0.0; extra == "datasets"
|
|
35
|
+
Provides-Extra: hand-tracking
|
|
36
|
+
Requires-Dist: macrodata-refiner[hf]; extra == "hand-tracking"
|
|
37
|
+
Requires-Dist: macrodata-refiner[video]; extra == "hand-tracking"
|
|
38
|
+
Requires-Dist: ego-vision[models]>=0.1.25; extra == "hand-tracking"
|
|
36
39
|
Provides-Extra: text
|
|
37
40
|
Requires-Dist: warcio; extra == "text"
|
|
38
41
|
Provides-Extra: hdf5
|
|
@@ -48,24 +51,28 @@ Requires-Dist: mcap-ros2-support; extra == "mcap"
|
|
|
48
51
|
Requires-Dist: pillow; extra == "mcap"
|
|
49
52
|
Provides-Extra: s3
|
|
50
53
|
Requires-Dist: s3fs; extra == "s3"
|
|
54
|
+
Provides-Extra: gcs
|
|
55
|
+
Requires-Dist: gcsfs; extra == "gcs"
|
|
51
56
|
Provides-Extra: tensorflow
|
|
52
57
|
Requires-Dist: tensorflow; extra == "tensorflow"
|
|
53
58
|
Provides-Extra: tfds
|
|
54
59
|
Requires-Dist: macrodata-refiner[tensorflow]; extra == "tfds"
|
|
55
60
|
Requires-Dist: tensorflow-datasets; extra == "tfds"
|
|
56
61
|
Provides-Extra: testing
|
|
57
|
-
Requires-Dist: macrodata-refiner[
|
|
58
|
-
Requires-Dist: macrodata-refiner[hf]; extra == "testing"
|
|
59
|
-
Requires-Dist: macrodata-refiner[mcap]; extra == "testing"
|
|
60
|
-
Requires-Dist: macrodata-refiner[video]; extra == "testing"
|
|
61
|
-
Requires-Dist: macrodata-refiner[zarr]; extra == "testing"
|
|
62
|
-
Requires-Dist: macrodata-refiner[text]; extra == "testing"
|
|
63
|
-
Requires-Dist: macrodata-refiner[s3]; extra == "testing"
|
|
64
|
-
Requires-Dist: macrodata-refiner[tfds]; extra == "testing"
|
|
62
|
+
Requires-Dist: macrodata-refiner[all]; extra == "testing"
|
|
65
63
|
Requires-Dist: pytest>=8.0.0; extra == "testing"
|
|
66
64
|
Requires-Dist: pytest-cov>=5.0.0; extra == "testing"
|
|
67
65
|
Provides-Extra: all
|
|
68
|
-
Requires-Dist: macrodata-refiner[
|
|
66
|
+
Requires-Dist: macrodata-refiner[datasets]; extra == "all"
|
|
67
|
+
Requires-Dist: macrodata-refiner[hdf5]; extra == "all"
|
|
68
|
+
Requires-Dist: macrodata-refiner[hf]; extra == "all"
|
|
69
|
+
Requires-Dist: macrodata-refiner[mcap]; extra == "all"
|
|
70
|
+
Requires-Dist: macrodata-refiner[video]; extra == "all"
|
|
71
|
+
Requires-Dist: macrodata-refiner[zarr]; extra == "all"
|
|
72
|
+
Requires-Dist: macrodata-refiner[text]; extra == "all"
|
|
73
|
+
Requires-Dist: macrodata-refiner[s3]; extra == "all"
|
|
74
|
+
Requires-Dist: macrodata-refiner[gcs]; extra == "all"
|
|
75
|
+
Requires-Dist: macrodata-refiner[tfds]; extra == "all"
|
|
69
76
|
Dynamic: license-file
|
|
70
77
|
|
|
71
78
|
<p align="center">
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "macrodata-refiner"
|
|
3
|
-
version = "0.3.
|
|
3
|
+
version = "0.3.2"
|
|
4
4
|
description = "Refiner by Macrodata Labs, a data processing framework for Machine Learning large scale datasets"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
license = "Apache-2.0"
|
|
@@ -16,6 +16,7 @@ authors = [
|
|
|
16
16
|
]
|
|
17
17
|
requires-python = ">=3.10"
|
|
18
18
|
dependencies = [
|
|
19
|
+
"aiohttp",
|
|
19
20
|
"cloudpickle==3.1.2",
|
|
20
21
|
"fsspec[http]",
|
|
21
22
|
"httpx",
|
|
@@ -34,14 +35,17 @@ video = [
|
|
|
34
35
|
"pillow",
|
|
35
36
|
]
|
|
36
37
|
hf = [
|
|
37
|
-
"datasets>=3.0.0",
|
|
38
38
|
"huggingface-hub>=1.4.1",
|
|
39
39
|
"hf>=1.7.1",
|
|
40
40
|
]
|
|
41
|
-
|
|
41
|
+
datasets = [
|
|
42
|
+
"macrodata-refiner[hf]",
|
|
43
|
+
"datasets>=3.0.0",
|
|
44
|
+
]
|
|
45
|
+
hand_tracking = [
|
|
42
46
|
"macrodata-refiner[hf]",
|
|
43
47
|
"macrodata-refiner[video]",
|
|
44
|
-
"ego-vision[models]>=0.1.
|
|
48
|
+
"ego-vision[models]>=0.1.25",
|
|
45
49
|
]
|
|
46
50
|
text = [
|
|
47
51
|
"warcio",
|
|
@@ -63,6 +67,9 @@ mcap = [
|
|
|
63
67
|
s3 = [
|
|
64
68
|
"s3fs",
|
|
65
69
|
]
|
|
70
|
+
gcs = [
|
|
71
|
+
"gcsfs",
|
|
72
|
+
]
|
|
66
73
|
tensorflow = [
|
|
67
74
|
"tensorflow",
|
|
68
75
|
]
|
|
@@ -71,6 +78,12 @@ tfds = [
|
|
|
71
78
|
"tensorflow-datasets",
|
|
72
79
|
]
|
|
73
80
|
testing = [
|
|
81
|
+
"macrodata-refiner[all]",
|
|
82
|
+
"pytest>=8.0.0",
|
|
83
|
+
"pytest-cov>=5.0.0",
|
|
84
|
+
]
|
|
85
|
+
all = [
|
|
86
|
+
"macrodata-refiner[datasets]",
|
|
74
87
|
"macrodata-refiner[hdf5]",
|
|
75
88
|
"macrodata-refiner[hf]",
|
|
76
89
|
"macrodata-refiner[mcap]",
|
|
@@ -78,12 +91,8 @@ testing = [
|
|
|
78
91
|
"macrodata-refiner[zarr]",
|
|
79
92
|
"macrodata-refiner[text]",
|
|
80
93
|
"macrodata-refiner[s3]",
|
|
94
|
+
"macrodata-refiner[gcs]",
|
|
81
95
|
"macrodata-refiner[tfds]",
|
|
82
|
-
"pytest>=8.0.0",
|
|
83
|
-
"pytest-cov>=5.0.0",
|
|
84
|
-
]
|
|
85
|
-
all = [
|
|
86
|
-
"macrodata-refiner[testing]",
|
|
87
96
|
]
|
|
88
97
|
|
|
89
98
|
[project.scripts]
|
|
@@ -102,7 +111,7 @@ refiner = ["py.typed"]
|
|
|
102
111
|
|
|
103
112
|
[dependency-groups]
|
|
104
113
|
dev = [
|
|
105
|
-
"macrodata-refiner[
|
|
114
|
+
"macrodata-refiner[testing]",
|
|
106
115
|
"pre-commit>=4.0.0",
|
|
107
116
|
"ruff>=0.14.10",
|
|
108
117
|
"ty>=0.0.7",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: macrodata-refiner
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.2
|
|
4
4
|
Summary: Refiner by Macrodata Labs, a data processing framework for Machine Learning large scale datasets
|
|
5
5
|
Author: Macrodata Labs
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -12,6 +12,7 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
12
12
|
Requires-Python: >=3.10
|
|
13
13
|
Description-Content-Type: text/markdown
|
|
14
14
|
License-File: LICENSE
|
|
15
|
+
Requires-Dist: aiohttp
|
|
15
16
|
Requires-Dist: cloudpickle==3.1.2
|
|
16
17
|
Requires-Dist: fsspec[http]
|
|
17
18
|
Requires-Dist: httpx
|
|
@@ -26,13 +27,15 @@ Provides-Extra: video
|
|
|
26
27
|
Requires-Dist: av; extra == "video"
|
|
27
28
|
Requires-Dist: pillow; extra == "video"
|
|
28
29
|
Provides-Extra: hf
|
|
29
|
-
Requires-Dist: datasets>=3.0.0; extra == "hf"
|
|
30
30
|
Requires-Dist: huggingface-hub>=1.4.1; extra == "hf"
|
|
31
31
|
Requires-Dist: hf>=1.7.1; extra == "hf"
|
|
32
|
-
Provides-Extra:
|
|
33
|
-
Requires-Dist: macrodata-refiner[hf]; extra == "
|
|
34
|
-
Requires-Dist:
|
|
35
|
-
|
|
32
|
+
Provides-Extra: datasets
|
|
33
|
+
Requires-Dist: macrodata-refiner[hf]; extra == "datasets"
|
|
34
|
+
Requires-Dist: datasets>=3.0.0; extra == "datasets"
|
|
35
|
+
Provides-Extra: hand-tracking
|
|
36
|
+
Requires-Dist: macrodata-refiner[hf]; extra == "hand-tracking"
|
|
37
|
+
Requires-Dist: macrodata-refiner[video]; extra == "hand-tracking"
|
|
38
|
+
Requires-Dist: ego-vision[models]>=0.1.25; extra == "hand-tracking"
|
|
36
39
|
Provides-Extra: text
|
|
37
40
|
Requires-Dist: warcio; extra == "text"
|
|
38
41
|
Provides-Extra: hdf5
|
|
@@ -48,24 +51,28 @@ Requires-Dist: mcap-ros2-support; extra == "mcap"
|
|
|
48
51
|
Requires-Dist: pillow; extra == "mcap"
|
|
49
52
|
Provides-Extra: s3
|
|
50
53
|
Requires-Dist: s3fs; extra == "s3"
|
|
54
|
+
Provides-Extra: gcs
|
|
55
|
+
Requires-Dist: gcsfs; extra == "gcs"
|
|
51
56
|
Provides-Extra: tensorflow
|
|
52
57
|
Requires-Dist: tensorflow; extra == "tensorflow"
|
|
53
58
|
Provides-Extra: tfds
|
|
54
59
|
Requires-Dist: macrodata-refiner[tensorflow]; extra == "tfds"
|
|
55
60
|
Requires-Dist: tensorflow-datasets; extra == "tfds"
|
|
56
61
|
Provides-Extra: testing
|
|
57
|
-
Requires-Dist: macrodata-refiner[
|
|
58
|
-
Requires-Dist: macrodata-refiner[hf]; extra == "testing"
|
|
59
|
-
Requires-Dist: macrodata-refiner[mcap]; extra == "testing"
|
|
60
|
-
Requires-Dist: macrodata-refiner[video]; extra == "testing"
|
|
61
|
-
Requires-Dist: macrodata-refiner[zarr]; extra == "testing"
|
|
62
|
-
Requires-Dist: macrodata-refiner[text]; extra == "testing"
|
|
63
|
-
Requires-Dist: macrodata-refiner[s3]; extra == "testing"
|
|
64
|
-
Requires-Dist: macrodata-refiner[tfds]; extra == "testing"
|
|
62
|
+
Requires-Dist: macrodata-refiner[all]; extra == "testing"
|
|
65
63
|
Requires-Dist: pytest>=8.0.0; extra == "testing"
|
|
66
64
|
Requires-Dist: pytest-cov>=5.0.0; extra == "testing"
|
|
67
65
|
Provides-Extra: all
|
|
68
|
-
Requires-Dist: macrodata-refiner[
|
|
66
|
+
Requires-Dist: macrodata-refiner[datasets]; extra == "all"
|
|
67
|
+
Requires-Dist: macrodata-refiner[hdf5]; extra == "all"
|
|
68
|
+
Requires-Dist: macrodata-refiner[hf]; extra == "all"
|
|
69
|
+
Requires-Dist: macrodata-refiner[mcap]; extra == "all"
|
|
70
|
+
Requires-Dist: macrodata-refiner[video]; extra == "all"
|
|
71
|
+
Requires-Dist: macrodata-refiner[zarr]; extra == "all"
|
|
72
|
+
Requires-Dist: macrodata-refiner[text]; extra == "all"
|
|
73
|
+
Requires-Dist: macrodata-refiner[s3]; extra == "all"
|
|
74
|
+
Requires-Dist: macrodata-refiner[gcs]; extra == "all"
|
|
75
|
+
Requires-Dist: macrodata-refiner[tfds]; extra == "all"
|
|
69
76
|
Dynamic: license-file
|
|
70
77
|
|
|
71
78
|
<p align="center">
|
{macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/macrodata_refiner.egg-info/SOURCES.txt
RENAMED
|
@@ -73,6 +73,7 @@ src/refiner/io/__init__.py
|
|
|
73
73
|
src/refiner/io/datafile.py
|
|
74
74
|
src/refiner/io/datafolder.py
|
|
75
75
|
src/refiner/io/fileset.py
|
|
76
|
+
src/refiner/io/utils.py
|
|
76
77
|
src/refiner/launchers/__init__.py
|
|
77
78
|
src/refiner/launchers/base.py
|
|
78
79
|
src/refiner/launchers/cloud.py
|
|
@@ -131,7 +132,7 @@ src/refiner/platform/client/api.py
|
|
|
131
132
|
src/refiner/platform/client/models.py
|
|
132
133
|
src/refiner/platform/client/serialize.py
|
|
133
134
|
src/refiner/robotics/__init__.py
|
|
134
|
-
src/refiner/robotics/
|
|
135
|
+
src/refiner/robotics/hand_tracking.py
|
|
135
136
|
src/refiner/robotics/motion.py
|
|
136
137
|
src/refiner/robotics/reward.py
|
|
137
138
|
src/refiner/robotics/row.py
|
{macrodata_refiner-0.3.0 → macrodata_refiner-0.3.2}/src/macrodata_refiner.egg-info/requires.txt
RENAMED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
aiohttp
|
|
1
2
|
cloudpickle==3.1.2
|
|
2
3
|
fsspec[http]
|
|
3
4
|
httpx
|
|
@@ -10,18 +11,33 @@ msgspec>=0.20.0
|
|
|
10
11
|
pydantic>=2.0.0
|
|
11
12
|
|
|
12
13
|
[all]
|
|
13
|
-
macrodata-refiner[
|
|
14
|
+
macrodata-refiner[datasets]
|
|
15
|
+
macrodata-refiner[hdf5]
|
|
16
|
+
macrodata-refiner[hf]
|
|
17
|
+
macrodata-refiner[mcap]
|
|
18
|
+
macrodata-refiner[video]
|
|
19
|
+
macrodata-refiner[zarr]
|
|
20
|
+
macrodata-refiner[text]
|
|
21
|
+
macrodata-refiner[s3]
|
|
22
|
+
macrodata-refiner[gcs]
|
|
23
|
+
macrodata-refiner[tfds]
|
|
14
24
|
|
|
15
|
-
[
|
|
25
|
+
[datasets]
|
|
26
|
+
macrodata-refiner[hf]
|
|
27
|
+
datasets>=3.0.0
|
|
28
|
+
|
|
29
|
+
[gcs]
|
|
30
|
+
gcsfs
|
|
31
|
+
|
|
32
|
+
[hand_tracking]
|
|
16
33
|
macrodata-refiner[hf]
|
|
17
34
|
macrodata-refiner[video]
|
|
18
|
-
ego-vision[models]>=0.1.
|
|
35
|
+
ego-vision[models]>=0.1.25
|
|
19
36
|
|
|
20
37
|
[hdf5]
|
|
21
38
|
h5py
|
|
22
39
|
|
|
23
40
|
[hf]
|
|
24
|
-
datasets>=3.0.0
|
|
25
41
|
huggingface-hub>=1.4.1
|
|
26
42
|
hf>=1.7.1
|
|
27
43
|
|
|
@@ -39,14 +55,7 @@ s3fs
|
|
|
39
55
|
tensorflow
|
|
40
56
|
|
|
41
57
|
[testing]
|
|
42
|
-
macrodata-refiner[
|
|
43
|
-
macrodata-refiner[hf]
|
|
44
|
-
macrodata-refiner[mcap]
|
|
45
|
-
macrodata-refiner[video]
|
|
46
|
-
macrodata-refiner[zarr]
|
|
47
|
-
macrodata-refiner[text]
|
|
48
|
-
macrodata-refiner[s3]
|
|
49
|
-
macrodata-refiner[tfds]
|
|
58
|
+
macrodata-refiner[all]
|
|
50
59
|
pytest>=8.0.0
|
|
51
60
|
pytest-cov>=5.0.0
|
|
52
61
|
|
|
@@ -2,7 +2,13 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import heapq
|
|
4
4
|
from collections.abc import Coroutine
|
|
5
|
-
from concurrent.futures import
|
|
5
|
+
from concurrent.futures import (
|
|
6
|
+
ALL_COMPLETED,
|
|
7
|
+
FIRST_COMPLETED,
|
|
8
|
+
FIRST_EXCEPTION,
|
|
9
|
+
Future,
|
|
10
|
+
wait,
|
|
11
|
+
)
|
|
6
12
|
from dataclasses import dataclass, field
|
|
7
13
|
from typing import Generic, TypeVar
|
|
8
14
|
|
|
@@ -65,8 +71,8 @@ class AsyncWindow(Generic[T]):
|
|
|
65
71
|
return self._take_ready()
|
|
66
72
|
|
|
67
73
|
def drain(self) -> list[T]:
|
|
68
|
-
"""Wait for
|
|
69
|
-
self._wait_until(return_when=
|
|
74
|
+
"""Wait for in-flight work, failing fast if any future raises."""
|
|
75
|
+
self._wait_until(return_when=FIRST_EXCEPTION)
|
|
70
76
|
return self._take_ready()
|
|
71
77
|
|
|
72
78
|
def cancel_pending(self) -> None:
|
|
@@ -83,13 +89,18 @@ class AsyncWindow(Generic[T]):
|
|
|
83
89
|
return
|
|
84
90
|
self._futures.difference_update(done)
|
|
85
91
|
for future in done:
|
|
86
|
-
|
|
92
|
+
try:
|
|
93
|
+
idx, value = future.result()
|
|
94
|
+
except BaseException:
|
|
95
|
+
self.cancel_pending()
|
|
96
|
+
raise
|
|
87
97
|
self._store_result(idx, value)
|
|
88
98
|
|
|
89
99
|
def _wait_until(self, *, return_when: str) -> None:
|
|
90
100
|
"""Block until the requested completion condition and collect results."""
|
|
91
101
|
while self._futures and (
|
|
92
|
-
return_when
|
|
102
|
+
return_when in {ALL_COMPLETED, FIRST_EXCEPTION}
|
|
103
|
+
or len(self._futures) >= self.max_in_flight
|
|
93
104
|
):
|
|
94
105
|
done, _ = wait(self._futures, return_when=return_when)
|
|
95
106
|
self._collect_done(done)
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import inspect
|
|
4
|
-
from collections.abc import Iterable, Iterator, Sequence
|
|
4
|
+
from collections.abc import Callable, Coroutine, Iterable, Iterator, Sequence
|
|
5
5
|
from typing import cast
|
|
6
6
|
|
|
7
7
|
from refiner.execution.asyncio.window import AsyncWindow
|
|
8
|
+
from refiner.execution.asyncio.runtime import submit
|
|
8
9
|
from refiner.execution.buffer import RowBuffer
|
|
9
10
|
from refiner.execution.tracking.shards import ShardDeltaFn, ShardDeltaTracker
|
|
10
11
|
from refiner.pipeline.data.row import Row
|
|
@@ -20,6 +21,8 @@ from refiner.pipeline.steps import (
|
|
|
20
21
|
from refiner.worker.context import set_active_step_index
|
|
21
22
|
from refiner.worker.metrics.api import register_gauge
|
|
22
23
|
|
|
24
|
+
AsyncCloseFn = Callable[[], Coroutine[object, object, None]]
|
|
25
|
+
|
|
23
26
|
|
|
24
27
|
def execute_row_steps(
|
|
25
28
|
rows: Iterable[Row],
|
|
@@ -95,11 +98,16 @@ def execute_row_steps(
|
|
|
95
98
|
if window is None:
|
|
96
99
|
return
|
|
97
100
|
for row in inp.take_all():
|
|
98
|
-
row.log_throughput("rows_processed", 1, unit="rows")
|
|
99
101
|
window.submit_blocking(_run_async_step(step=step, row=row))
|
|
100
|
-
|
|
102
|
+
completed_rows = window.take_completed()
|
|
103
|
+
for row in completed_rows:
|
|
104
|
+
row.log_throughput("rows_processed", 1, unit="rows")
|
|
105
|
+
out.extend(completed_rows)
|
|
101
106
|
if flush_all:
|
|
102
|
-
|
|
107
|
+
drained_rows = window.drain()
|
|
108
|
+
for row in drained_rows:
|
|
109
|
+
row.log_throughput("rows_processed", 1, unit="rows")
|
|
110
|
+
out.extend(drained_rows)
|
|
103
111
|
return
|
|
104
112
|
|
|
105
113
|
if isinstance(step, FilterRowStep):
|
|
@@ -169,19 +177,37 @@ def execute_row_steps(
|
|
|
169
177
|
for i in range(len(ordered)):
|
|
170
178
|
_run_step(i, flush_all=flush_all)
|
|
171
179
|
|
|
180
|
+
def _close_async_steps() -> None:
|
|
181
|
+
for window in async_windows:
|
|
182
|
+
if window is not None:
|
|
183
|
+
window.cancel_pending()
|
|
184
|
+
close_fns: list[AsyncCloseFn] = []
|
|
185
|
+
for step in ordered:
|
|
186
|
+
if not isinstance(step, AsyncRowStep):
|
|
187
|
+
continue
|
|
188
|
+
fn = getattr(step, "fn", None)
|
|
189
|
+
close = getattr(fn, "aclose", None)
|
|
190
|
+
if close is not None:
|
|
191
|
+
close_fns.append(cast(AsyncCloseFn, close))
|
|
192
|
+
for close in close_fns:
|
|
193
|
+
submit(close()).result()
|
|
194
|
+
|
|
172
195
|
def _drain_output() -> Iterator[Row]:
|
|
173
196
|
outq = queues[-1]
|
|
174
197
|
if not outq:
|
|
175
198
|
return
|
|
176
199
|
yield from outq.take_all()
|
|
177
200
|
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
201
|
+
try:
|
|
202
|
+
for row in rows:
|
|
203
|
+
queues[0].append(row)
|
|
204
|
+
_pump(flush_all=False)
|
|
205
|
+
yield from _drain_output()
|
|
182
206
|
|
|
183
|
-
|
|
184
|
-
|
|
207
|
+
_pump(flush_all=True)
|
|
208
|
+
yield from _drain_output()
|
|
209
|
+
finally:
|
|
210
|
+
_close_async_steps()
|
|
185
211
|
|
|
186
212
|
|
|
187
213
|
__all__ = ["execute_row_steps", "ShardDeltaFn"]
|
|
@@ -38,6 +38,7 @@ from refiner.inference.types import (
|
|
|
38
38
|
AnthropicFilePartProviderOptions,
|
|
39
39
|
AnthropicProviderOptions,
|
|
40
40
|
GoogleProviderOptions,
|
|
41
|
+
InferenceProvider,
|
|
41
42
|
InferenceWarning,
|
|
42
43
|
OpenAIProviderOptions,
|
|
43
44
|
)
|
|
@@ -49,6 +50,7 @@ __all__ = [
|
|
|
49
50
|
"GeneratePoolingMapFn",
|
|
50
51
|
"GeneratePoolingPayload",
|
|
51
52
|
"InferenceResponse",
|
|
53
|
+
"InferenceProvider",
|
|
52
54
|
"InferenceAPICallError",
|
|
53
55
|
"InferenceRetryError",
|
|
54
56
|
"InferenceSchemaValidationError",
|
|
@@ -9,12 +9,16 @@ from refiner.inference.providers import (
|
|
|
9
9
|
GoogleEndpointProvider,
|
|
10
10
|
OpenAIEndpointProvider,
|
|
11
11
|
OpenAIResponsesProvider,
|
|
12
|
-
VLLMProvider,
|
|
13
12
|
)
|
|
14
13
|
from refiner.inference.providers import anthropic as anthropic_provider
|
|
15
14
|
from refiner.inference.providers import google as google_provider
|
|
16
15
|
from refiner.inference.providers import openai as openai_provider
|
|
17
|
-
from refiner.inference.types import
|
|
16
|
+
from refiner.inference.types import (
|
|
17
|
+
InferenceProvider,
|
|
18
|
+
InferenceWarning,
|
|
19
|
+
Message,
|
|
20
|
+
ModelCapabilities,
|
|
21
|
+
)
|
|
18
22
|
|
|
19
23
|
_MEDIA_WARNING_BYTES = 20 * 1024 * 1024
|
|
20
24
|
_BASE64_CHARS = frozenset(
|
|
@@ -29,15 +33,7 @@ _TOOL_SETTINGS = {
|
|
|
29
33
|
}
|
|
30
34
|
|
|
31
35
|
|
|
32
|
-
def model_capabilities(
|
|
33
|
-
provider: (
|
|
34
|
-
AnthropicEndpointProvider
|
|
35
|
-
| GoogleEndpointProvider
|
|
36
|
-
| OpenAIEndpointProvider
|
|
37
|
-
| OpenAIResponsesProvider
|
|
38
|
-
| VLLMProvider
|
|
39
|
-
),
|
|
40
|
-
) -> ModelCapabilities:
|
|
36
|
+
def model_capabilities(provider: InferenceProvider) -> ModelCapabilities:
|
|
41
37
|
model = provider.model.lower()
|
|
42
38
|
if isinstance(provider, GoogleEndpointProvider):
|
|
43
39
|
return google_provider.model_capabilities(model)
|
|
@@ -52,13 +48,7 @@ def model_capabilities(
|
|
|
52
48
|
|
|
53
49
|
def capability_warnings(
|
|
54
50
|
*,
|
|
55
|
-
provider:
|
|
56
|
-
AnthropicEndpointProvider
|
|
57
|
-
| GoogleEndpointProvider
|
|
58
|
-
| OpenAIEndpointProvider
|
|
59
|
-
| OpenAIResponsesProvider
|
|
60
|
-
| VLLMProvider
|
|
61
|
-
),
|
|
51
|
+
provider: InferenceProvider,
|
|
62
52
|
messages: Sequence[Message],
|
|
63
53
|
params: Mapping[str, Any],
|
|
64
54
|
provider_options: Mapping[str, Mapping[str, Any]] | None,
|
|
@@ -137,13 +127,7 @@ def capability_warnings(
|
|
|
137
127
|
|
|
138
128
|
def _model_setting_warnings(
|
|
139
129
|
*,
|
|
140
|
-
provider:
|
|
141
|
-
AnthropicEndpointProvider
|
|
142
|
-
| GoogleEndpointProvider
|
|
143
|
-
| OpenAIEndpointProvider
|
|
144
|
-
| OpenAIResponsesProvider
|
|
145
|
-
| VLLMProvider
|
|
146
|
-
),
|
|
130
|
+
provider: InferenceProvider,
|
|
147
131
|
params: Mapping[str, Any],
|
|
148
132
|
provider_options: Mapping[str, Mapping[str, Any]] | None,
|
|
149
133
|
) -> list[InferenceWarning]:
|
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import inspect
|
|
4
4
|
from collections.abc import Awaitable, Callable, Mapping, Sequence
|
|
5
5
|
from dataclasses import replace
|
|
6
|
-
from typing import Any, Protocol,
|
|
6
|
+
from typing import Any, Protocol, cast
|
|
7
7
|
|
|
8
8
|
from pydantic import BaseModel
|
|
9
9
|
|
|
@@ -33,18 +33,16 @@ from refiner.inference.providers.openai import (
|
|
|
33
33
|
_OpenAIEndpointClient,
|
|
34
34
|
_OpenAIResponsesClient,
|
|
35
35
|
)
|
|
36
|
-
from refiner.inference.types import
|
|
36
|
+
from refiner.inference.types import (
|
|
37
|
+
InferenceProvider,
|
|
38
|
+
InferenceWarning,
|
|
39
|
+
Message,
|
|
40
|
+
ProviderOptions,
|
|
41
|
+
)
|
|
37
42
|
from refiner.pipeline.data.row import Row
|
|
43
|
+
from refiner.pipeline.planning import describe_builtin
|
|
38
44
|
from refiner.pipeline.steps import MapResult
|
|
39
45
|
|
|
40
|
-
_InferenceProvider: TypeAlias = (
|
|
41
|
-
AnthropicEndpointProvider
|
|
42
|
-
| GoogleEndpointProvider
|
|
43
|
-
| OpenAIEndpointProvider
|
|
44
|
-
| OpenAIResponsesProvider
|
|
45
|
-
| VLLMProvider
|
|
46
|
-
)
|
|
47
|
-
|
|
48
46
|
|
|
49
47
|
class GenerateTextFn(Protocol):
|
|
50
48
|
def __call__(
|
|
@@ -52,7 +50,7 @@ class GenerateTextFn(Protocol):
|
|
|
52
50
|
*,
|
|
53
51
|
messages: Sequence[Message] | None = None,
|
|
54
52
|
raw_payload: Mapping[str, Any] | None = None,
|
|
55
|
-
|
|
53
|
+
provider_options: ProviderOptions | None = None,
|
|
56
54
|
maxRetries: int | None = None,
|
|
57
55
|
schema: type[BaseModel] | None = None,
|
|
58
56
|
schemaStrict: bool = True,
|
|
@@ -66,16 +64,36 @@ GenerateTextMapFn = Callable[[Row, GenerateTextFn], Awaitable[MapResult] | MapRe
|
|
|
66
64
|
def generate_text(
|
|
67
65
|
*,
|
|
68
66
|
fn: GenerateTextMapFn,
|
|
69
|
-
provider:
|
|
67
|
+
provider: InferenceProvider,
|
|
70
68
|
default_generation_params: Mapping[str, Any] | None = None,
|
|
71
69
|
max_concurrent_requests: int = 256,
|
|
72
70
|
) -> Callable[[Row], Awaitable[MapResult]]:
|
|
71
|
+
"""Return an async row mapper that issues text-generation requests.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
fn: Row-level function that receives the input row and a request function.
|
|
75
|
+
The request function accepts either typed messages or a raw provider
|
|
76
|
+
payload and returns an ``InferenceResponse``.
|
|
77
|
+
provider: Endpoint or runtime-service provider used to execute requests.
|
|
78
|
+
default_generation_params: Parameters merged into each typed message or
|
|
79
|
+
raw payload request unless overridden by that individual request.
|
|
80
|
+
max_concurrent_requests: Maximum number of provider requests allowed to
|
|
81
|
+
run at once per worker.
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
@describe_builtin(
|
|
85
|
+
"inference.generate_text",
|
|
86
|
+
fn=fn,
|
|
87
|
+
provider=provider.to_builtin_args(),
|
|
88
|
+
max_concurrent_requests=max_concurrent_requests,
|
|
89
|
+
default_generation_params=dict(default_generation_params or {}),
|
|
90
|
+
)
|
|
73
91
|
async def _map(row: Row, request: RequestFn) -> MapResult:
|
|
74
92
|
async def _generate_text(
|
|
75
93
|
*,
|
|
76
94
|
messages: Sequence[Message] | None = None,
|
|
77
95
|
raw_payload: Mapping[str, Any] | None = None,
|
|
78
|
-
|
|
96
|
+
provider_options: ProviderOptions | None = None,
|
|
79
97
|
maxRetries: int | None = None,
|
|
80
98
|
schema: type[BaseModel] | None = None,
|
|
81
99
|
schemaStrict: bool = True,
|
|
@@ -83,7 +101,6 @@ def generate_text(
|
|
|
83
101
|
) -> InferenceResponse:
|
|
84
102
|
if (messages is None) == (raw_payload is None):
|
|
85
103
|
raise ValueError("pass exactly one of messages or raw_payload")
|
|
86
|
-
provider_options = providerOptions
|
|
87
104
|
max_retries = maxRetries
|
|
88
105
|
schema_strict = schemaStrict
|
|
89
106
|
schema_info = normalize_schema(
|
|
@@ -95,7 +112,7 @@ def generate_text(
|
|
|
95
112
|
if raw_payload is not None:
|
|
96
113
|
if provider_options is not None:
|
|
97
114
|
raise ValueError(
|
|
98
|
-
"
|
|
115
|
+
"provider_options are not supported with raw_payload"
|
|
99
116
|
)
|
|
100
117
|
if schema is not None:
|
|
101
118
|
raise ValueError("schema is not supported with raw_payload")
|
|
@@ -174,7 +191,7 @@ async def _generate(
|
|
|
174
191
|
|
|
175
192
|
def _build_payload(
|
|
176
193
|
*,
|
|
177
|
-
provider:
|
|
194
|
+
provider: InferenceProvider,
|
|
178
195
|
messages: Sequence[Message],
|
|
179
196
|
params: Mapping[str, Any],
|
|
180
197
|
provider_options: ProviderOptions | None,
|
|
@@ -211,7 +228,7 @@ def _build_payload(
|
|
|
211
228
|
|
|
212
229
|
|
|
213
230
|
def _provider_warnings(
|
|
214
|
-
provider:
|
|
231
|
+
provider: InferenceProvider,
|
|
215
232
|
provider_options: ProviderOptions | None,
|
|
216
233
|
) -> list[InferenceWarning]:
|
|
217
234
|
if isinstance(provider, OpenAIResponsesProvider):
|
|
@@ -20,7 +20,7 @@ def _custom_provider_data(
|
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
def _provider_option(part: Mapping[str, Any], provider: str, key: str) -> object:
|
|
23
|
-
provider_options = part.get("
|
|
23
|
+
provider_options = part.get("provider_options")
|
|
24
24
|
if not isinstance(provider_options, Mapping):
|
|
25
25
|
return None
|
|
26
26
|
options = cast(Mapping[str, Any], provider_options).get(provider)
|