macrodata-refiner 0.3.0__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/PKG-INFO +15 -14
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/pyproject.toml +10 -9
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/macrodata_refiner.egg-info/PKG-INFO +15 -14
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/macrodata_refiner.egg-info/SOURCES.txt +1 -1
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/macrodata_refiner.egg-info/requires.txt +12 -11
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/execution/asyncio/window.py +16 -5
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/execution/operators/row.py +36 -10
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/inference/__init__.py +2 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/inference/capabilities.py +9 -25
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/inference/generate_text.py +34 -17
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/inference/internal/message_conversion.py +1 -1
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/inference/internal/runtime.py +44 -27
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/inference/internal/transport.py +82 -47
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/inference/providers/anthropic.py +19 -12
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/inference/providers/google.py +14 -7
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/inference/providers/openai.py +31 -16
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/inference/providers/warnings.py +2 -2
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/inference/types.py +16 -1
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/pipeline.py +230 -16
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sinks/lerobot.py +79 -20
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sources/readers/lerobot.py +70 -20
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/robotics/__init__.py +1 -3
- macrodata_refiner-0.3.1/src/refiner/robotics/hand_tracking.py +151 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/robotics/lerobot_format/row.py +100 -5
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/robotics/reward.py +51 -31
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/robotics/row.py +187 -12
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/robotics/subtask_annotation.py +234 -160
- macrodata_refiner-0.3.0/src/refiner/robotics/egocentric.py +0 -99
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/LICENSE +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/README.md +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/setup.cfg +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/macrodata_refiner.egg-info/dependency_links.txt +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/macrodata_refiner.egg-info/entry_points.txt +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/macrodata_refiner.egg-info/top_level.txt +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/cli/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/cli/auth.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/cli/commands/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/cli/commands/auth.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/cli/commands/jobs.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/cli/commands/run.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/cli/commands/secrets.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/cli/common.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/cli/jobs/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/cli/jobs/attach.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/cli/jobs/common.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/cli/jobs/control.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/cli/jobs/follow.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/cli/jobs/get.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/cli/jobs/list.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/cli/jobs/logs.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/cli/jobs/manifest.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/cli/jobs/metrics.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/cli/jobs/workers.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/cli/main.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/cli/run/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/cli/run/cloud.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/cli/run/command.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/cli/run/local.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/cli/run/modes.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/cli/secrets.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/cli/ui/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/cli/ui/console.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/cli/ui/terminal.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/execution/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/execution/asyncio/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/execution/asyncio/runtime.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/execution/buffer.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/execution/engine.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/execution/operators/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/execution/operators/vectorized.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/execution/tracking/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/execution/tracking/shards.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/inference/generate_pooling.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/inference/internal/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/inference/internal/media.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/inference/internal/response.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/inference/internal/schema.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/inference/internal/usage.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/inference/providers/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/inference/providers/base.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/io/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/io/datafile.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/io/datafolder.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/io/fileset.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/job_urls.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/launchers/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/launchers/base.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/launchers/cloud.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/launchers/local.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/launchers/secrets.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/data/block.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/data/datatype.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/data/row.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/data/shard.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/data/tabular.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/expressions.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/planning.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/resources.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sinks/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sinks/assets.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sinks/base.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sinks/jsonl.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sinks/parquet.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sinks/reducer/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sinks/reducer/file.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sinks/reducer/lerobot.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sinks/reducer/zarr.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sinks/zarr.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sources/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sources/base.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sources/items.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sources/readers/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sources/readers/base.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sources/readers/csv.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sources/readers/files.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sources/readers/hdf5.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sources/readers/hf_dataset.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sources/readers/json.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sources/readers/mcap.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sources/readers/parquet.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sources/readers/tfds.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sources/readers/tfrecord.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sources/readers/utils.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sources/readers/zarr.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/sources/task.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/steps.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/utils/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/utils/cache/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/utils/cache/decoder_cache.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/utils/cache/file_cache.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/pipeline/utils/cache/lease_cache.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/platform/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/platform/auth.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/platform/client/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/platform/client/api.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/platform/client/models.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/platform/client/serialize.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/platform/manifest.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/py.typed +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/robotics/lerobot_format/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/robotics/lerobot_format/metadata/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/robotics/lerobot_format/metadata/info.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/robotics/lerobot_format/metadata/metadata.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/robotics/lerobot_format/metadata/stats.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/robotics/lerobot_format/metadata/tasks.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/robotics/lerobot_format/tabular.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/robotics/motion.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/robotics/synchronization.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/robotics/tabular.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/services/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/services/base.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/services/discovery.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/services/manager.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/services/vllm.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/text/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/text/commoncrawl.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/utils/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/utils/imports.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/video/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/video/decode.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/video/remux.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/video/transcode.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/video/types.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/video/writer.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/worker/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/worker/context.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/worker/entrypoint.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/worker/lifecycle.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/worker/metrics/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/worker/metrics/api.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/worker/metrics/emitter.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/worker/resources/__init__.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/worker/resources/cpu.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/worker/resources/gpu.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/worker/runner.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/refiner/worker/workdir.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/tests/test_cache.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/tests/test_commoncrawl_text.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/tests/test_expressions.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/tests/test_optional_dependencies.py +0 -0
- {macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/tests/test_video_decode.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: macrodata-refiner
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: Refiner by Macrodata Labs, a data processing framework for Machine Learning large scale datasets
|
|
5
5
|
Author: Macrodata Labs
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -12,6 +12,7 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
12
12
|
Requires-Python: >=3.10
|
|
13
13
|
Description-Content-Type: text/markdown
|
|
14
14
|
License-File: LICENSE
|
|
15
|
+
Requires-Dist: aiohttp
|
|
15
16
|
Requires-Dist: cloudpickle==3.1.2
|
|
16
17
|
Requires-Dist: fsspec[http]
|
|
17
18
|
Requires-Dist: httpx
|
|
@@ -29,10 +30,10 @@ Provides-Extra: hf
|
|
|
29
30
|
Requires-Dist: datasets>=3.0.0; extra == "hf"
|
|
30
31
|
Requires-Dist: huggingface-hub>=1.4.1; extra == "hf"
|
|
31
32
|
Requires-Dist: hf>=1.7.1; extra == "hf"
|
|
32
|
-
Provides-Extra:
|
|
33
|
-
Requires-Dist: macrodata-refiner[hf]; extra == "
|
|
34
|
-
Requires-Dist: macrodata-refiner[video]; extra == "
|
|
35
|
-
Requires-Dist: ego-vision[models]>=0.1.
|
|
33
|
+
Provides-Extra: hand-tracking
|
|
34
|
+
Requires-Dist: macrodata-refiner[hf]; extra == "hand-tracking"
|
|
35
|
+
Requires-Dist: macrodata-refiner[video]; extra == "hand-tracking"
|
|
36
|
+
Requires-Dist: ego-vision[models]>=0.1.25; extra == "hand-tracking"
|
|
36
37
|
Provides-Extra: text
|
|
37
38
|
Requires-Dist: warcio; extra == "text"
|
|
38
39
|
Provides-Extra: hdf5
|
|
@@ -54,18 +55,18 @@ Provides-Extra: tfds
|
|
|
54
55
|
Requires-Dist: macrodata-refiner[tensorflow]; extra == "tfds"
|
|
55
56
|
Requires-Dist: tensorflow-datasets; extra == "tfds"
|
|
56
57
|
Provides-Extra: testing
|
|
57
|
-
Requires-Dist: macrodata-refiner[
|
|
58
|
-
Requires-Dist: macrodata-refiner[hf]; extra == "testing"
|
|
59
|
-
Requires-Dist: macrodata-refiner[mcap]; extra == "testing"
|
|
60
|
-
Requires-Dist: macrodata-refiner[video]; extra == "testing"
|
|
61
|
-
Requires-Dist: macrodata-refiner[zarr]; extra == "testing"
|
|
62
|
-
Requires-Dist: macrodata-refiner[text]; extra == "testing"
|
|
63
|
-
Requires-Dist: macrodata-refiner[s3]; extra == "testing"
|
|
64
|
-
Requires-Dist: macrodata-refiner[tfds]; extra == "testing"
|
|
58
|
+
Requires-Dist: macrodata-refiner[all]; extra == "testing"
|
|
65
59
|
Requires-Dist: pytest>=8.0.0; extra == "testing"
|
|
66
60
|
Requires-Dist: pytest-cov>=5.0.0; extra == "testing"
|
|
67
61
|
Provides-Extra: all
|
|
68
|
-
Requires-Dist: macrodata-refiner[
|
|
62
|
+
Requires-Dist: macrodata-refiner[hdf5]; extra == "all"
|
|
63
|
+
Requires-Dist: macrodata-refiner[hf]; extra == "all"
|
|
64
|
+
Requires-Dist: macrodata-refiner[mcap]; extra == "all"
|
|
65
|
+
Requires-Dist: macrodata-refiner[video]; extra == "all"
|
|
66
|
+
Requires-Dist: macrodata-refiner[zarr]; extra == "all"
|
|
67
|
+
Requires-Dist: macrodata-refiner[text]; extra == "all"
|
|
68
|
+
Requires-Dist: macrodata-refiner[s3]; extra == "all"
|
|
69
|
+
Requires-Dist: macrodata-refiner[tfds]; extra == "all"
|
|
69
70
|
Dynamic: license-file
|
|
70
71
|
|
|
71
72
|
<p align="center">
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "macrodata-refiner"
|
|
3
|
-
version = "0.3.
|
|
3
|
+
version = "0.3.1"
|
|
4
4
|
description = "Refiner by Macrodata Labs, a data processing framework for Machine Learning large scale datasets"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
license = "Apache-2.0"
|
|
@@ -16,6 +16,7 @@ authors = [
|
|
|
16
16
|
]
|
|
17
17
|
requires-python = ">=3.10"
|
|
18
18
|
dependencies = [
|
|
19
|
+
"aiohttp",
|
|
19
20
|
"cloudpickle==3.1.2",
|
|
20
21
|
"fsspec[http]",
|
|
21
22
|
"httpx",
|
|
@@ -38,10 +39,10 @@ hf = [
|
|
|
38
39
|
"huggingface-hub>=1.4.1",
|
|
39
40
|
"hf>=1.7.1",
|
|
40
41
|
]
|
|
41
|
-
|
|
42
|
+
hand_tracking = [
|
|
42
43
|
"macrodata-refiner[hf]",
|
|
43
44
|
"macrodata-refiner[video]",
|
|
44
|
-
"ego-vision[models]>=0.1.
|
|
45
|
+
"ego-vision[models]>=0.1.25",
|
|
45
46
|
]
|
|
46
47
|
text = [
|
|
47
48
|
"warcio",
|
|
@@ -71,6 +72,11 @@ tfds = [
|
|
|
71
72
|
"tensorflow-datasets",
|
|
72
73
|
]
|
|
73
74
|
testing = [
|
|
75
|
+
"macrodata-refiner[all]",
|
|
76
|
+
"pytest>=8.0.0",
|
|
77
|
+
"pytest-cov>=5.0.0",
|
|
78
|
+
]
|
|
79
|
+
all = [
|
|
74
80
|
"macrodata-refiner[hdf5]",
|
|
75
81
|
"macrodata-refiner[hf]",
|
|
76
82
|
"macrodata-refiner[mcap]",
|
|
@@ -79,11 +85,6 @@ testing = [
|
|
|
79
85
|
"macrodata-refiner[text]",
|
|
80
86
|
"macrodata-refiner[s3]",
|
|
81
87
|
"macrodata-refiner[tfds]",
|
|
82
|
-
"pytest>=8.0.0",
|
|
83
|
-
"pytest-cov>=5.0.0",
|
|
84
|
-
]
|
|
85
|
-
all = [
|
|
86
|
-
"macrodata-refiner[testing]",
|
|
87
88
|
]
|
|
88
89
|
|
|
89
90
|
[project.scripts]
|
|
@@ -102,7 +103,7 @@ refiner = ["py.typed"]
|
|
|
102
103
|
|
|
103
104
|
[dependency-groups]
|
|
104
105
|
dev = [
|
|
105
|
-
"macrodata-refiner[
|
|
106
|
+
"macrodata-refiner[testing]",
|
|
106
107
|
"pre-commit>=4.0.0",
|
|
107
108
|
"ruff>=0.14.10",
|
|
108
109
|
"ty>=0.0.7",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: macrodata-refiner
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: Refiner by Macrodata Labs, a data processing framework for Machine Learning large scale datasets
|
|
5
5
|
Author: Macrodata Labs
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -12,6 +12,7 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
12
12
|
Requires-Python: >=3.10
|
|
13
13
|
Description-Content-Type: text/markdown
|
|
14
14
|
License-File: LICENSE
|
|
15
|
+
Requires-Dist: aiohttp
|
|
15
16
|
Requires-Dist: cloudpickle==3.1.2
|
|
16
17
|
Requires-Dist: fsspec[http]
|
|
17
18
|
Requires-Dist: httpx
|
|
@@ -29,10 +30,10 @@ Provides-Extra: hf
|
|
|
29
30
|
Requires-Dist: datasets>=3.0.0; extra == "hf"
|
|
30
31
|
Requires-Dist: huggingface-hub>=1.4.1; extra == "hf"
|
|
31
32
|
Requires-Dist: hf>=1.7.1; extra == "hf"
|
|
32
|
-
Provides-Extra:
|
|
33
|
-
Requires-Dist: macrodata-refiner[hf]; extra == "
|
|
34
|
-
Requires-Dist: macrodata-refiner[video]; extra == "
|
|
35
|
-
Requires-Dist: ego-vision[models]>=0.1.
|
|
33
|
+
Provides-Extra: hand-tracking
|
|
34
|
+
Requires-Dist: macrodata-refiner[hf]; extra == "hand-tracking"
|
|
35
|
+
Requires-Dist: macrodata-refiner[video]; extra == "hand-tracking"
|
|
36
|
+
Requires-Dist: ego-vision[models]>=0.1.25; extra == "hand-tracking"
|
|
36
37
|
Provides-Extra: text
|
|
37
38
|
Requires-Dist: warcio; extra == "text"
|
|
38
39
|
Provides-Extra: hdf5
|
|
@@ -54,18 +55,18 @@ Provides-Extra: tfds
|
|
|
54
55
|
Requires-Dist: macrodata-refiner[tensorflow]; extra == "tfds"
|
|
55
56
|
Requires-Dist: tensorflow-datasets; extra == "tfds"
|
|
56
57
|
Provides-Extra: testing
|
|
57
|
-
Requires-Dist: macrodata-refiner[
|
|
58
|
-
Requires-Dist: macrodata-refiner[hf]; extra == "testing"
|
|
59
|
-
Requires-Dist: macrodata-refiner[mcap]; extra == "testing"
|
|
60
|
-
Requires-Dist: macrodata-refiner[video]; extra == "testing"
|
|
61
|
-
Requires-Dist: macrodata-refiner[zarr]; extra == "testing"
|
|
62
|
-
Requires-Dist: macrodata-refiner[text]; extra == "testing"
|
|
63
|
-
Requires-Dist: macrodata-refiner[s3]; extra == "testing"
|
|
64
|
-
Requires-Dist: macrodata-refiner[tfds]; extra == "testing"
|
|
58
|
+
Requires-Dist: macrodata-refiner[all]; extra == "testing"
|
|
65
59
|
Requires-Dist: pytest>=8.0.0; extra == "testing"
|
|
66
60
|
Requires-Dist: pytest-cov>=5.0.0; extra == "testing"
|
|
67
61
|
Provides-Extra: all
|
|
68
|
-
Requires-Dist: macrodata-refiner[
|
|
62
|
+
Requires-Dist: macrodata-refiner[hdf5]; extra == "all"
|
|
63
|
+
Requires-Dist: macrodata-refiner[hf]; extra == "all"
|
|
64
|
+
Requires-Dist: macrodata-refiner[mcap]; extra == "all"
|
|
65
|
+
Requires-Dist: macrodata-refiner[video]; extra == "all"
|
|
66
|
+
Requires-Dist: macrodata-refiner[zarr]; extra == "all"
|
|
67
|
+
Requires-Dist: macrodata-refiner[text]; extra == "all"
|
|
68
|
+
Requires-Dist: macrodata-refiner[s3]; extra == "all"
|
|
69
|
+
Requires-Dist: macrodata-refiner[tfds]; extra == "all"
|
|
69
70
|
Dynamic: license-file
|
|
70
71
|
|
|
71
72
|
<p align="center">
|
{macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/macrodata_refiner.egg-info/SOURCES.txt
RENAMED
|
@@ -131,7 +131,7 @@ src/refiner/platform/client/api.py
|
|
|
131
131
|
src/refiner/platform/client/models.py
|
|
132
132
|
src/refiner/platform/client/serialize.py
|
|
133
133
|
src/refiner/robotics/__init__.py
|
|
134
|
-
src/refiner/robotics/
|
|
134
|
+
src/refiner/robotics/hand_tracking.py
|
|
135
135
|
src/refiner/robotics/motion.py
|
|
136
136
|
src/refiner/robotics/reward.py
|
|
137
137
|
src/refiner/robotics/row.py
|
{macrodata_refiner-0.3.0 → macrodata_refiner-0.3.1}/src/macrodata_refiner.egg-info/requires.txt
RENAMED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
aiohttp
|
|
1
2
|
cloudpickle==3.1.2
|
|
2
3
|
fsspec[http]
|
|
3
4
|
httpx
|
|
@@ -10,12 +11,19 @@ msgspec>=0.20.0
|
|
|
10
11
|
pydantic>=2.0.0
|
|
11
12
|
|
|
12
13
|
[all]
|
|
13
|
-
macrodata-refiner[
|
|
14
|
+
macrodata-refiner[hdf5]
|
|
15
|
+
macrodata-refiner[hf]
|
|
16
|
+
macrodata-refiner[mcap]
|
|
17
|
+
macrodata-refiner[video]
|
|
18
|
+
macrodata-refiner[zarr]
|
|
19
|
+
macrodata-refiner[text]
|
|
20
|
+
macrodata-refiner[s3]
|
|
21
|
+
macrodata-refiner[tfds]
|
|
14
22
|
|
|
15
|
-
[
|
|
23
|
+
[hand_tracking]
|
|
16
24
|
macrodata-refiner[hf]
|
|
17
25
|
macrodata-refiner[video]
|
|
18
|
-
ego-vision[models]>=0.1.
|
|
26
|
+
ego-vision[models]>=0.1.25
|
|
19
27
|
|
|
20
28
|
[hdf5]
|
|
21
29
|
h5py
|
|
@@ -39,14 +47,7 @@ s3fs
|
|
|
39
47
|
tensorflow
|
|
40
48
|
|
|
41
49
|
[testing]
|
|
42
|
-
macrodata-refiner[
|
|
43
|
-
macrodata-refiner[hf]
|
|
44
|
-
macrodata-refiner[mcap]
|
|
45
|
-
macrodata-refiner[video]
|
|
46
|
-
macrodata-refiner[zarr]
|
|
47
|
-
macrodata-refiner[text]
|
|
48
|
-
macrodata-refiner[s3]
|
|
49
|
-
macrodata-refiner[tfds]
|
|
50
|
+
macrodata-refiner[all]
|
|
50
51
|
pytest>=8.0.0
|
|
51
52
|
pytest-cov>=5.0.0
|
|
52
53
|
|
|
@@ -2,7 +2,13 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import heapq
|
|
4
4
|
from collections.abc import Coroutine
|
|
5
|
-
from concurrent.futures import
|
|
5
|
+
from concurrent.futures import (
|
|
6
|
+
ALL_COMPLETED,
|
|
7
|
+
FIRST_COMPLETED,
|
|
8
|
+
FIRST_EXCEPTION,
|
|
9
|
+
Future,
|
|
10
|
+
wait,
|
|
11
|
+
)
|
|
6
12
|
from dataclasses import dataclass, field
|
|
7
13
|
from typing import Generic, TypeVar
|
|
8
14
|
|
|
@@ -65,8 +71,8 @@ class AsyncWindow(Generic[T]):
|
|
|
65
71
|
return self._take_ready()
|
|
66
72
|
|
|
67
73
|
def drain(self) -> list[T]:
|
|
68
|
-
"""Wait for
|
|
69
|
-
self._wait_until(return_when=
|
|
74
|
+
"""Wait for in-flight work, failing fast if any future raises."""
|
|
75
|
+
self._wait_until(return_when=FIRST_EXCEPTION)
|
|
70
76
|
return self._take_ready()
|
|
71
77
|
|
|
72
78
|
def cancel_pending(self) -> None:
|
|
@@ -83,13 +89,18 @@ class AsyncWindow(Generic[T]):
|
|
|
83
89
|
return
|
|
84
90
|
self._futures.difference_update(done)
|
|
85
91
|
for future in done:
|
|
86
|
-
|
|
92
|
+
try:
|
|
93
|
+
idx, value = future.result()
|
|
94
|
+
except BaseException:
|
|
95
|
+
self.cancel_pending()
|
|
96
|
+
raise
|
|
87
97
|
self._store_result(idx, value)
|
|
88
98
|
|
|
89
99
|
def _wait_until(self, *, return_when: str) -> None:
|
|
90
100
|
"""Block until the requested completion condition and collect results."""
|
|
91
101
|
while self._futures and (
|
|
92
|
-
return_when
|
|
102
|
+
return_when in {ALL_COMPLETED, FIRST_EXCEPTION}
|
|
103
|
+
or len(self._futures) >= self.max_in_flight
|
|
93
104
|
):
|
|
94
105
|
done, _ = wait(self._futures, return_when=return_when)
|
|
95
106
|
self._collect_done(done)
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import inspect
|
|
4
|
-
from collections.abc import Iterable, Iterator, Sequence
|
|
4
|
+
from collections.abc import Callable, Coroutine, Iterable, Iterator, Sequence
|
|
5
5
|
from typing import cast
|
|
6
6
|
|
|
7
7
|
from refiner.execution.asyncio.window import AsyncWindow
|
|
8
|
+
from refiner.execution.asyncio.runtime import submit
|
|
8
9
|
from refiner.execution.buffer import RowBuffer
|
|
9
10
|
from refiner.execution.tracking.shards import ShardDeltaFn, ShardDeltaTracker
|
|
10
11
|
from refiner.pipeline.data.row import Row
|
|
@@ -20,6 +21,8 @@ from refiner.pipeline.steps import (
|
|
|
20
21
|
from refiner.worker.context import set_active_step_index
|
|
21
22
|
from refiner.worker.metrics.api import register_gauge
|
|
22
23
|
|
|
24
|
+
AsyncCloseFn = Callable[[], Coroutine[object, object, None]]
|
|
25
|
+
|
|
23
26
|
|
|
24
27
|
def execute_row_steps(
|
|
25
28
|
rows: Iterable[Row],
|
|
@@ -95,11 +98,16 @@ def execute_row_steps(
|
|
|
95
98
|
if window is None:
|
|
96
99
|
return
|
|
97
100
|
for row in inp.take_all():
|
|
98
|
-
row.log_throughput("rows_processed", 1, unit="rows")
|
|
99
101
|
window.submit_blocking(_run_async_step(step=step, row=row))
|
|
100
|
-
|
|
102
|
+
completed_rows = window.take_completed()
|
|
103
|
+
for row in completed_rows:
|
|
104
|
+
row.log_throughput("rows_processed", 1, unit="rows")
|
|
105
|
+
out.extend(completed_rows)
|
|
101
106
|
if flush_all:
|
|
102
|
-
|
|
107
|
+
drained_rows = window.drain()
|
|
108
|
+
for row in drained_rows:
|
|
109
|
+
row.log_throughput("rows_processed", 1, unit="rows")
|
|
110
|
+
out.extend(drained_rows)
|
|
103
111
|
return
|
|
104
112
|
|
|
105
113
|
if isinstance(step, FilterRowStep):
|
|
@@ -169,19 +177,37 @@ def execute_row_steps(
|
|
|
169
177
|
for i in range(len(ordered)):
|
|
170
178
|
_run_step(i, flush_all=flush_all)
|
|
171
179
|
|
|
180
|
+
def _close_async_steps() -> None:
|
|
181
|
+
for window in async_windows:
|
|
182
|
+
if window is not None:
|
|
183
|
+
window.cancel_pending()
|
|
184
|
+
close_fns: list[AsyncCloseFn] = []
|
|
185
|
+
for step in ordered:
|
|
186
|
+
if not isinstance(step, AsyncRowStep):
|
|
187
|
+
continue
|
|
188
|
+
fn = getattr(step, "fn", None)
|
|
189
|
+
close = getattr(fn, "aclose", None)
|
|
190
|
+
if close is not None:
|
|
191
|
+
close_fns.append(cast(AsyncCloseFn, close))
|
|
192
|
+
for close in close_fns:
|
|
193
|
+
submit(close()).result()
|
|
194
|
+
|
|
172
195
|
def _drain_output() -> Iterator[Row]:
|
|
173
196
|
outq = queues[-1]
|
|
174
197
|
if not outq:
|
|
175
198
|
return
|
|
176
199
|
yield from outq.take_all()
|
|
177
200
|
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
201
|
+
try:
|
|
202
|
+
for row in rows:
|
|
203
|
+
queues[0].append(row)
|
|
204
|
+
_pump(flush_all=False)
|
|
205
|
+
yield from _drain_output()
|
|
182
206
|
|
|
183
|
-
|
|
184
|
-
|
|
207
|
+
_pump(flush_all=True)
|
|
208
|
+
yield from _drain_output()
|
|
209
|
+
finally:
|
|
210
|
+
_close_async_steps()
|
|
185
211
|
|
|
186
212
|
|
|
187
213
|
__all__ = ["execute_row_steps", "ShardDeltaFn"]
|
|
@@ -38,6 +38,7 @@ from refiner.inference.types import (
|
|
|
38
38
|
AnthropicFilePartProviderOptions,
|
|
39
39
|
AnthropicProviderOptions,
|
|
40
40
|
GoogleProviderOptions,
|
|
41
|
+
InferenceProvider,
|
|
41
42
|
InferenceWarning,
|
|
42
43
|
OpenAIProviderOptions,
|
|
43
44
|
)
|
|
@@ -49,6 +50,7 @@ __all__ = [
|
|
|
49
50
|
"GeneratePoolingMapFn",
|
|
50
51
|
"GeneratePoolingPayload",
|
|
51
52
|
"InferenceResponse",
|
|
53
|
+
"InferenceProvider",
|
|
52
54
|
"InferenceAPICallError",
|
|
53
55
|
"InferenceRetryError",
|
|
54
56
|
"InferenceSchemaValidationError",
|
|
@@ -9,12 +9,16 @@ from refiner.inference.providers import (
|
|
|
9
9
|
GoogleEndpointProvider,
|
|
10
10
|
OpenAIEndpointProvider,
|
|
11
11
|
OpenAIResponsesProvider,
|
|
12
|
-
VLLMProvider,
|
|
13
12
|
)
|
|
14
13
|
from refiner.inference.providers import anthropic as anthropic_provider
|
|
15
14
|
from refiner.inference.providers import google as google_provider
|
|
16
15
|
from refiner.inference.providers import openai as openai_provider
|
|
17
|
-
from refiner.inference.types import
|
|
16
|
+
from refiner.inference.types import (
|
|
17
|
+
InferenceProvider,
|
|
18
|
+
InferenceWarning,
|
|
19
|
+
Message,
|
|
20
|
+
ModelCapabilities,
|
|
21
|
+
)
|
|
18
22
|
|
|
19
23
|
_MEDIA_WARNING_BYTES = 20 * 1024 * 1024
|
|
20
24
|
_BASE64_CHARS = frozenset(
|
|
@@ -29,15 +33,7 @@ _TOOL_SETTINGS = {
|
|
|
29
33
|
}
|
|
30
34
|
|
|
31
35
|
|
|
32
|
-
def model_capabilities(
|
|
33
|
-
provider: (
|
|
34
|
-
AnthropicEndpointProvider
|
|
35
|
-
| GoogleEndpointProvider
|
|
36
|
-
| OpenAIEndpointProvider
|
|
37
|
-
| OpenAIResponsesProvider
|
|
38
|
-
| VLLMProvider
|
|
39
|
-
),
|
|
40
|
-
) -> ModelCapabilities:
|
|
36
|
+
def model_capabilities(provider: InferenceProvider) -> ModelCapabilities:
|
|
41
37
|
model = provider.model.lower()
|
|
42
38
|
if isinstance(provider, GoogleEndpointProvider):
|
|
43
39
|
return google_provider.model_capabilities(model)
|
|
@@ -52,13 +48,7 @@ def model_capabilities(
|
|
|
52
48
|
|
|
53
49
|
def capability_warnings(
|
|
54
50
|
*,
|
|
55
|
-
provider:
|
|
56
|
-
AnthropicEndpointProvider
|
|
57
|
-
| GoogleEndpointProvider
|
|
58
|
-
| OpenAIEndpointProvider
|
|
59
|
-
| OpenAIResponsesProvider
|
|
60
|
-
| VLLMProvider
|
|
61
|
-
),
|
|
51
|
+
provider: InferenceProvider,
|
|
62
52
|
messages: Sequence[Message],
|
|
63
53
|
params: Mapping[str, Any],
|
|
64
54
|
provider_options: Mapping[str, Mapping[str, Any]] | None,
|
|
@@ -137,13 +127,7 @@ def capability_warnings(
|
|
|
137
127
|
|
|
138
128
|
def _model_setting_warnings(
|
|
139
129
|
*,
|
|
140
|
-
provider:
|
|
141
|
-
AnthropicEndpointProvider
|
|
142
|
-
| GoogleEndpointProvider
|
|
143
|
-
| OpenAIEndpointProvider
|
|
144
|
-
| OpenAIResponsesProvider
|
|
145
|
-
| VLLMProvider
|
|
146
|
-
),
|
|
130
|
+
provider: InferenceProvider,
|
|
147
131
|
params: Mapping[str, Any],
|
|
148
132
|
provider_options: Mapping[str, Mapping[str, Any]] | None,
|
|
149
133
|
) -> list[InferenceWarning]:
|
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import inspect
|
|
4
4
|
from collections.abc import Awaitable, Callable, Mapping, Sequence
|
|
5
5
|
from dataclasses import replace
|
|
6
|
-
from typing import Any, Protocol,
|
|
6
|
+
from typing import Any, Protocol, cast
|
|
7
7
|
|
|
8
8
|
from pydantic import BaseModel
|
|
9
9
|
|
|
@@ -33,18 +33,16 @@ from refiner.inference.providers.openai import (
|
|
|
33
33
|
_OpenAIEndpointClient,
|
|
34
34
|
_OpenAIResponsesClient,
|
|
35
35
|
)
|
|
36
|
-
from refiner.inference.types import
|
|
36
|
+
from refiner.inference.types import (
|
|
37
|
+
InferenceProvider,
|
|
38
|
+
InferenceWarning,
|
|
39
|
+
Message,
|
|
40
|
+
ProviderOptions,
|
|
41
|
+
)
|
|
37
42
|
from refiner.pipeline.data.row import Row
|
|
43
|
+
from refiner.pipeline.planning import describe_builtin
|
|
38
44
|
from refiner.pipeline.steps import MapResult
|
|
39
45
|
|
|
40
|
-
_InferenceProvider: TypeAlias = (
|
|
41
|
-
AnthropicEndpointProvider
|
|
42
|
-
| GoogleEndpointProvider
|
|
43
|
-
| OpenAIEndpointProvider
|
|
44
|
-
| OpenAIResponsesProvider
|
|
45
|
-
| VLLMProvider
|
|
46
|
-
)
|
|
47
|
-
|
|
48
46
|
|
|
49
47
|
class GenerateTextFn(Protocol):
|
|
50
48
|
def __call__(
|
|
@@ -52,7 +50,7 @@ class GenerateTextFn(Protocol):
|
|
|
52
50
|
*,
|
|
53
51
|
messages: Sequence[Message] | None = None,
|
|
54
52
|
raw_payload: Mapping[str, Any] | None = None,
|
|
55
|
-
|
|
53
|
+
provider_options: ProviderOptions | None = None,
|
|
56
54
|
maxRetries: int | None = None,
|
|
57
55
|
schema: type[BaseModel] | None = None,
|
|
58
56
|
schemaStrict: bool = True,
|
|
@@ -66,16 +64,36 @@ GenerateTextMapFn = Callable[[Row, GenerateTextFn], Awaitable[MapResult] | MapRe
|
|
|
66
64
|
def generate_text(
|
|
67
65
|
*,
|
|
68
66
|
fn: GenerateTextMapFn,
|
|
69
|
-
provider:
|
|
67
|
+
provider: InferenceProvider,
|
|
70
68
|
default_generation_params: Mapping[str, Any] | None = None,
|
|
71
69
|
max_concurrent_requests: int = 256,
|
|
72
70
|
) -> Callable[[Row], Awaitable[MapResult]]:
|
|
71
|
+
"""Return an async row mapper that issues text-generation requests.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
fn: Row-level function that receives the input row and a request function.
|
|
75
|
+
The request function accepts either typed messages or a raw provider
|
|
76
|
+
payload and returns an ``InferenceResponse``.
|
|
77
|
+
provider: Endpoint or runtime-service provider used to execute requests.
|
|
78
|
+
default_generation_params: Parameters merged into each typed message or
|
|
79
|
+
raw payload request unless overridden by that individual request.
|
|
80
|
+
max_concurrent_requests: Maximum number of provider requests allowed to
|
|
81
|
+
run at once per worker.
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
@describe_builtin(
|
|
85
|
+
"inference.generate_text",
|
|
86
|
+
fn=fn,
|
|
87
|
+
provider=provider.to_builtin_args(),
|
|
88
|
+
max_concurrent_requests=max_concurrent_requests,
|
|
89
|
+
default_generation_params=dict(default_generation_params or {}),
|
|
90
|
+
)
|
|
73
91
|
async def _map(row: Row, request: RequestFn) -> MapResult:
|
|
74
92
|
async def _generate_text(
|
|
75
93
|
*,
|
|
76
94
|
messages: Sequence[Message] | None = None,
|
|
77
95
|
raw_payload: Mapping[str, Any] | None = None,
|
|
78
|
-
|
|
96
|
+
provider_options: ProviderOptions | None = None,
|
|
79
97
|
maxRetries: int | None = None,
|
|
80
98
|
schema: type[BaseModel] | None = None,
|
|
81
99
|
schemaStrict: bool = True,
|
|
@@ -83,7 +101,6 @@ def generate_text(
|
|
|
83
101
|
) -> InferenceResponse:
|
|
84
102
|
if (messages is None) == (raw_payload is None):
|
|
85
103
|
raise ValueError("pass exactly one of messages or raw_payload")
|
|
86
|
-
provider_options = providerOptions
|
|
87
104
|
max_retries = maxRetries
|
|
88
105
|
schema_strict = schemaStrict
|
|
89
106
|
schema_info = normalize_schema(
|
|
@@ -95,7 +112,7 @@ def generate_text(
|
|
|
95
112
|
if raw_payload is not None:
|
|
96
113
|
if provider_options is not None:
|
|
97
114
|
raise ValueError(
|
|
98
|
-
"
|
|
115
|
+
"provider_options are not supported with raw_payload"
|
|
99
116
|
)
|
|
100
117
|
if schema is not None:
|
|
101
118
|
raise ValueError("schema is not supported with raw_payload")
|
|
@@ -174,7 +191,7 @@ async def _generate(
|
|
|
174
191
|
|
|
175
192
|
def _build_payload(
|
|
176
193
|
*,
|
|
177
|
-
provider:
|
|
194
|
+
provider: InferenceProvider,
|
|
178
195
|
messages: Sequence[Message],
|
|
179
196
|
params: Mapping[str, Any],
|
|
180
197
|
provider_options: ProviderOptions | None,
|
|
@@ -211,7 +228,7 @@ def _build_payload(
|
|
|
211
228
|
|
|
212
229
|
|
|
213
230
|
def _provider_warnings(
|
|
214
|
-
provider:
|
|
231
|
+
provider: InferenceProvider,
|
|
215
232
|
provider_options: ProviderOptions | None,
|
|
216
233
|
) -> list[InferenceWarning]:
|
|
217
234
|
if isinstance(provider, OpenAIResponsesProvider):
|
|
@@ -20,7 +20,7 @@ def _custom_provider_data(
|
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
def _provider_option(part: Mapping[str, Any], provider: str, key: str) -> object:
|
|
23
|
-
provider_options = part.get("
|
|
23
|
+
provider_options = part.get("provider_options")
|
|
24
24
|
if not isinstance(provider_options, Mapping):
|
|
25
25
|
return None
|
|
26
26
|
options = cast(Mapping[str, Any], provider_options).get(provider)
|