datachain 0.25.0__tar.gz → 0.25.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.25.0 → datachain-0.25.1}/.github/workflows/benchmarks.yml +0 -3
- {datachain-0.25.0 → datachain-0.25.1}/.github/workflows/tests-studio.yml +15 -2
- {datachain-0.25.0 → datachain-0.25.1}/.github/workflows/tests.yml +16 -12
- {datachain-0.25.0 → datachain-0.25.1}/PKG-INFO +3 -2
- {datachain-0.25.0 → datachain-0.25.1}/pyproject.toml +3 -1
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/arrow.py +9 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/hf.py +18 -21
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain.egg-info/PKG-INFO +3 -2
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain.egg-info/requires.txt +4 -1
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/test_hf.py +16 -1
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/lib/test_datachain.py +16 -0
- {datachain-0.25.0 → datachain-0.25.1}/.cruft.json +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/.gitattributes +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/.github/codecov.yaml +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/.github/dependabot.yml +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/.github/workflows/release.yml +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/.gitignore +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/.pre-commit-config.yaml +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/LICENSE +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/README.rst +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/assets/datachain.svg +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/commands/auth/login.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/commands/auth/logout.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/commands/auth/team.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/commands/auth/token.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/commands/index.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/commands/job/cancel.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/commands/job/clusters.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/commands/job/logs.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/commands/job/ls.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/commands/job/run.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/contributing.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/examples.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/guide/db_migrations.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/guide/delta.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/guide/env.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/guide/index.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/guide/namespaces.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/guide/processing.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/guide/remotes.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/guide/retry.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/index.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/overrides/main.html +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/quick-start.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/references/data-types/file.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/references/data-types/index.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/references/data-types/pose.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/references/data-types/segment.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/references/datachain.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/references/func.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/references/index.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/references/toolkit.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/references/torch.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/references/udf.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/docs/tutorials.md +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/examples/incremental_processing/delta.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/examples/incremental_processing/retry.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/examples/incremental_processing/utils.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/examples/multimodal/wds.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/mkdocs.yml +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/noxfile.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/setup.cfg +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/__init__.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/__main__.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/asyn.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/cache.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/catalog/catalog.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/cli/utils.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/client/__init__.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/client/azure.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/client/gcs.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/client/hf.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/client/local.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/client/s3.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/config.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/dataset.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/delta.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/error.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/fs/reference.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/fs/utils.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/func/__init__.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/func/array.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/func/base.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/func/conditional.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/func/func.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/func/numeric.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/func/path.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/func/random.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/func/string.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/func/window.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/job.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/clip.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/dc/__init__.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/dc/database.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/dc/datachain.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/dc/datasets.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/dc/records.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/dc/storage.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/file.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/image.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/listing.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/namespaces.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/projects.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/settings.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/tar.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/text.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/udf.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/utils.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/video.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/listing.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/model/__init__.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/model/bbox.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/model/pose.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/model/segment.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/model/utils.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/namespace.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/node.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/progress.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/project.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/py.typed +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/query/__init__.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/query/batch.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/query/dataset.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/query/metrics.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/query/params.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/query/queue.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/query/schema.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/query/session.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/query/udf.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/query/utils.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/remote/studio.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/script_meta.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/semver.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/sql/types.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/sql/utils.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/studio.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/telemetry.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain/utils.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain.egg-info/SOURCES.txt +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/__init__.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/conftest.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/data.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/examples/__init__.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/examples/test_examples.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/examples/wds_data.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/__init__.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/data/lena.jpg +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/functions/__init__.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/functions/test_aggregate.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/functions/test_array.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/functions/test_conditional.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/functions/test_numeric.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/functions/test_path.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/functions/test_random.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/functions/test_string.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/model/__init__.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/test_batching.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/test_catalog.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/test_client.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/test_data_storage.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/test_datachain.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/test_datasets.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/test_delta.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/test_file.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/test_image.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/test_listing.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/test_ls.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/test_metastore.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/test_metrics.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/test_pull.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/test_pytorch.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/test_query.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/test_read_database.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/test_read_dataset_remote.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/test_read_dataset_version_specifiers.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/test_retry.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/test_session.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/test_toolkit.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/test_video.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/func/test_warehouse.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/scripts/feature_class.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/test_atomicity.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/test_cli_e2e.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/test_cli_studio.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/test_import_time.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/test_query_e2e.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/test_telemetry.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/__init__.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/lib/test_namespace.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/lib/test_project.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/lib/test_udf.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/model/__init__.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/test_asyn.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/test_cache.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/test_catalog.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/test_client.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/test_config.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/test_dataset.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/test_func.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/test_listing.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/test_metastore.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/test_query.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/test_query_params.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/test_semver.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/test_serializer.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/test_session.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/test_utils.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.25.0 → datachain-0.25.1}/tests/utils.py +0 -0
|
@@ -30,9 +30,6 @@ jobs:
|
|
|
30
30
|
enable-cache: true
|
|
31
31
|
cache-suffix: benchmarks
|
|
32
32
|
cache-dependency-glob: pyproject.toml
|
|
33
|
-
# revert after this is fixed
|
|
34
|
-
# https://github.com/wntrblm/nox/issues/953
|
|
35
|
-
version: ">=0.6,<0.7"
|
|
36
33
|
|
|
37
34
|
- name: Install nox and dvc
|
|
38
35
|
run: uv pip install dvc[gs] nox --system
|
|
@@ -75,8 +75,21 @@ jobs:
|
|
|
75
75
|
path: './backend/datachain'
|
|
76
76
|
fetch-depth: 0
|
|
77
77
|
|
|
78
|
-
- name:
|
|
79
|
-
|
|
78
|
+
- name: Install FFmpeg on Windows
|
|
79
|
+
if: runner.os == 'Windows'
|
|
80
|
+
run: choco install ffmpeg
|
|
81
|
+
|
|
82
|
+
- name: Install FFmpeg on macOS
|
|
83
|
+
if: runner.os == 'macOS'
|
|
84
|
+
run: |
|
|
85
|
+
brew install ffmpeg
|
|
86
|
+
echo 'DYLD_FALLBACK_LIBRARY_PATH=/opt/homebrew/lib' >> "$GITHUB_ENV"
|
|
87
|
+
|
|
88
|
+
- name: Install FFmpeg on Ubuntu
|
|
89
|
+
if: runner.os == 'Linux'
|
|
90
|
+
run: |
|
|
91
|
+
sudo apt update
|
|
92
|
+
sudo apt install -y ffmpeg
|
|
80
93
|
|
|
81
94
|
- name: Set up Python ${{ matrix.pyv }}
|
|
82
95
|
uses: actions/setup-python@v5
|
|
@@ -34,9 +34,6 @@ jobs:
|
|
|
34
34
|
enable-cache: true
|
|
35
35
|
cache-suffix: lint
|
|
36
36
|
cache-dependency-glob: pyproject.toml
|
|
37
|
-
# revert after this is fixed
|
|
38
|
-
# https://github.com/wntrblm/nox/issues/953
|
|
39
|
-
version: ">=0.6,<0.7"
|
|
40
37
|
|
|
41
38
|
- name: Install nox
|
|
42
39
|
run: uv pip install nox --system
|
|
@@ -81,9 +78,6 @@ jobs:
|
|
|
81
78
|
fetch-depth: 0
|
|
82
79
|
ref: ${{ github.event.pull_request.head.sha || github.ref }}
|
|
83
80
|
|
|
84
|
-
- name: Set up FFmpeg
|
|
85
|
-
uses: AnimMouse/setup-ffmpeg@v1
|
|
86
|
-
|
|
87
81
|
- name: Set up Python ${{ matrix.pyv }}
|
|
88
82
|
uses: actions/setup-python@v5
|
|
89
83
|
with:
|
|
@@ -95,9 +89,22 @@ jobs:
|
|
|
95
89
|
enable-cache: true
|
|
96
90
|
cache-suffix: tests-${{ matrix.pyv }}
|
|
97
91
|
cache-dependency-glob: pyproject.toml
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
92
|
+
|
|
93
|
+
- name: Install FFmpeg on Windows
|
|
94
|
+
if: runner.os == 'Windows'
|
|
95
|
+
run: choco install ffmpeg
|
|
96
|
+
|
|
97
|
+
- name: Install FFmpeg on macOS
|
|
98
|
+
if: runner.os == 'macOS'
|
|
99
|
+
run: |
|
|
100
|
+
brew install ffmpeg
|
|
101
|
+
echo 'DYLD_FALLBACK_LIBRARY_PATH=/opt/homebrew/lib' >> "$GITHUB_ENV"
|
|
102
|
+
|
|
103
|
+
- name: Install FFmpeg on Ubuntu
|
|
104
|
+
if: runner.os == 'Linux'
|
|
105
|
+
run: |
|
|
106
|
+
sudo apt update
|
|
107
|
+
sudo apt install -y ffmpeg
|
|
101
108
|
|
|
102
109
|
- name: Install nox
|
|
103
110
|
run: uv pip install nox --system
|
|
@@ -165,9 +172,6 @@ jobs:
|
|
|
165
172
|
enable-cache: true
|
|
166
173
|
cache-suffix: examples-${{ matrix.pyv }}
|
|
167
174
|
cache-dependency-glob: pyproject.toml
|
|
168
|
-
# revert after this is fixed
|
|
169
|
-
# https://github.com/wntrblm/nox/issues/953
|
|
170
|
-
version: ">=0.6,<0.7"
|
|
171
175
|
|
|
172
176
|
- name: Install nox
|
|
173
177
|
run: uv pip install nox --system
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.25.
|
|
3
|
+
Version: 0.25.1
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -70,7 +70,8 @@ Provides-Extra: vector
|
|
|
70
70
|
Requires-Dist: usearch; extra == "vector"
|
|
71
71
|
Provides-Extra: hf
|
|
72
72
|
Requires-Dist: numba>=0.60.0; extra == "hf"
|
|
73
|
-
Requires-Dist: datasets[
|
|
73
|
+
Requires-Dist: datasets[vision]>=4.0.0; extra == "hf"
|
|
74
|
+
Requires-Dist: datasets[audio]>=4.0.0; (sys_platform == "linux" or sys_platform == "darwin") and extra == "hf"
|
|
74
75
|
Requires-Dist: fsspec>=2024.12.0; extra == "hf"
|
|
75
76
|
Provides-Extra: video
|
|
76
77
|
Requires-Dist: ffmpeg-python; extra == "video"
|
|
@@ -81,7 +81,9 @@ vector = [
|
|
|
81
81
|
]
|
|
82
82
|
hf = [
|
|
83
83
|
"numba>=0.60.0",
|
|
84
|
-
"datasets[
|
|
84
|
+
"datasets[vision]>=4.0.0",
|
|
85
|
+
# https://github.com/pytorch/torchcodec/issues/640
|
|
86
|
+
"datasets[audio]>=4.0.0 ; (sys_platform == 'linux' or sys_platform == 'darwin')",
|
|
85
87
|
"fsspec>=2024.12.0"
|
|
86
88
|
]
|
|
87
89
|
video = [
|
|
@@ -126,7 +126,16 @@ class ArrowGenerator(Generator):
|
|
|
126
126
|
if isinstance(kwargs.get("format"), CsvFileFormat):
|
|
127
127
|
kwargs["format"] = "csv"
|
|
128
128
|
arrow_file = ArrowRow(file=file, index=index, kwargs=kwargs)
|
|
129
|
+
|
|
130
|
+
if self.output_schema and hasattr(vals[0], "source"):
|
|
131
|
+
# if we are reading parquet file written by datachain it might have
|
|
132
|
+
# source inside of it already, so we should not duplicate it, instead
|
|
133
|
+
# we are re-creating it of the self.source flag
|
|
134
|
+
vals[0].source = arrow_file # type: ignore[attr-defined]
|
|
135
|
+
|
|
136
|
+
return vals
|
|
129
137
|
return [arrow_file, *vals]
|
|
138
|
+
|
|
130
139
|
return vals
|
|
131
140
|
|
|
132
141
|
def _process_non_datachain_record(
|
|
@@ -11,7 +11,7 @@ try:
|
|
|
11
11
|
Image,
|
|
12
12
|
IterableDataset,
|
|
13
13
|
IterableDatasetDict,
|
|
14
|
-
|
|
14
|
+
List,
|
|
15
15
|
Value,
|
|
16
16
|
load_dataset,
|
|
17
17
|
)
|
|
@@ -59,7 +59,6 @@ class HFImage(DataModel):
|
|
|
59
59
|
|
|
60
60
|
|
|
61
61
|
class HFAudio(DataModel):
|
|
62
|
-
path: str
|
|
63
62
|
array: list[float]
|
|
64
63
|
sampling_rate: int
|
|
65
64
|
|
|
@@ -116,26 +115,24 @@ def stream_splits(ds: Union[str, HFDatasetType], *args, **kwargs):
|
|
|
116
115
|
return {"": ds}
|
|
117
116
|
|
|
118
117
|
|
|
119
|
-
def convert_feature(val: Any, feat: Any, anno: Any) -> Any:
|
|
120
|
-
if isinstance(feat, (Value, Array2D, Array3D, Array4D, Array5D)):
|
|
118
|
+
def convert_feature(val: Any, feat: Any, anno: Any) -> Any:
|
|
119
|
+
if isinstance(feat, (Value, Array2D, Array3D, Array4D, Array5D, List)):
|
|
121
120
|
return val
|
|
122
121
|
if isinstance(feat, ClassLabel):
|
|
123
122
|
return HFClassLabel(string=feat.names[val], integer=val)
|
|
124
|
-
if isinstance(feat,
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
return anno(**sdict)
|
|
132
|
-
return val
|
|
123
|
+
if isinstance(feat, dict):
|
|
124
|
+
sdict = {}
|
|
125
|
+
for sname in val:
|
|
126
|
+
sfeat = feat[sname]
|
|
127
|
+
sanno = anno.model_fields[sname].annotation
|
|
128
|
+
sdict[sname] = [convert_feature(v, sfeat, sanno) for v in val[sname]]
|
|
129
|
+
return anno(**sdict)
|
|
133
130
|
if isinstance(feat, Image):
|
|
134
131
|
if isinstance(val, dict):
|
|
135
132
|
return HFImage(img=val["bytes"])
|
|
136
133
|
return HFImage(img=image_to_bytes(val))
|
|
137
134
|
if isinstance(feat, Audio):
|
|
138
|
-
return HFAudio(
|
|
135
|
+
return HFAudio(array=val["array"], sampling_rate=val["sampling_rate"])
|
|
139
136
|
|
|
140
137
|
|
|
141
138
|
def get_output_schema(features: Features) -> dict[str, DataType]:
|
|
@@ -151,13 +148,13 @@ def _feature_to_chain_type(name: str, val: Any) -> DataType: # noqa: PLR0911
|
|
|
151
148
|
return arrow_type_mapper(val.pa_type)
|
|
152
149
|
if isinstance(val, ClassLabel):
|
|
153
150
|
return HFClassLabel
|
|
154
|
-
if isinstance(val,
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
151
|
+
if isinstance(val, dict):
|
|
152
|
+
sequence_dict = {}
|
|
153
|
+
for sname, sval in val.items():
|
|
154
|
+
dtype = _feature_to_chain_type(sname, sval)
|
|
155
|
+
sequence_dict[sname] = dtype # type: ignore[valid-type]
|
|
156
|
+
return dict_to_data_model(name, sequence_dict) # type: ignore[arg-type]
|
|
157
|
+
if isinstance(val, List):
|
|
161
158
|
return list[_feature_to_chain_type(name, val.feature)] # type: ignore[arg-type,misc,return-value]
|
|
162
159
|
if isinstance(val, Array2D):
|
|
163
160
|
dtype = arrow_type_mapper(string_to_arrow(val.dtype))
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.25.
|
|
3
|
+
Version: 0.25.1
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -70,7 +70,8 @@ Provides-Extra: vector
|
|
|
70
70
|
Requires-Dist: usearch; extra == "vector"
|
|
71
71
|
Provides-Extra: hf
|
|
72
72
|
Requires-Dist: numba>=0.60.0; extra == "hf"
|
|
73
|
-
Requires-Dist: datasets[
|
|
73
|
+
Requires-Dist: datasets[vision]>=4.0.0; extra == "hf"
|
|
74
|
+
Requires-Dist: datasets[audio]>=4.0.0; (sys_platform == "linux" or sys_platform == "darwin") and extra == "hf"
|
|
74
75
|
Requires-Dist: fsspec>=2024.12.0; extra == "hf"
|
|
75
76
|
Provides-Extra: video
|
|
76
77
|
Requires-Dist: ffmpeg-python; extra == "video"
|
|
@@ -63,9 +63,12 @@ open_clip_torch
|
|
|
63
63
|
|
|
64
64
|
[hf]
|
|
65
65
|
numba>=0.60.0
|
|
66
|
-
datasets[
|
|
66
|
+
datasets[vision]>=4.0.0
|
|
67
67
|
fsspec>=2024.12.0
|
|
68
68
|
|
|
69
|
+
[hf:sys_platform == "linux" or sys_platform == "darwin"]
|
|
70
|
+
datasets[audio]>=4.0.0
|
|
71
|
+
|
|
69
72
|
[remote]
|
|
70
73
|
lz4
|
|
71
74
|
requests>=2.22.0
|
|
@@ -1,4 +1,7 @@
|
|
|
1
|
+
import importlib
|
|
2
|
+
|
|
1
3
|
import numpy as np
|
|
4
|
+
import pytest
|
|
2
5
|
from datasets import load_dataset
|
|
3
6
|
from datasets.features.image import image_to_bytes
|
|
4
7
|
from PIL import Image
|
|
@@ -12,6 +15,18 @@ from datachain.lib.hf import (
|
|
|
12
15
|
)
|
|
13
16
|
|
|
14
17
|
|
|
18
|
+
def require_torchcodec(test_case):
|
|
19
|
+
"""
|
|
20
|
+
Decorator marking a test that requires torchcodec (not available on Windows).
|
|
21
|
+
These tests are skipped when torchcodec isn't installed.
|
|
22
|
+
"""
|
|
23
|
+
if not importlib.util.find_spec("torchcodec"):
|
|
24
|
+
test_case = pytest.mark.skip(
|
|
25
|
+
"test requires torchcoded, not available on Windows yet"
|
|
26
|
+
)(test_case)
|
|
27
|
+
return test_case
|
|
28
|
+
|
|
29
|
+
|
|
15
30
|
def test_hf_image(tmp_path):
|
|
16
31
|
train_dir = tmp_path / "train"
|
|
17
32
|
train_dir.mkdir()
|
|
@@ -28,6 +43,7 @@ def test_hf_image(tmp_path):
|
|
|
28
43
|
assert row.image.img == image_to_bytes(img)
|
|
29
44
|
|
|
30
45
|
|
|
46
|
+
@require_torchcodec
|
|
31
47
|
def test_hf_audio(tmp_path):
|
|
32
48
|
# See https://stackoverflow.com/questions/66191480/how-to-convert-a-numpy-array-to-a-mp3-file
|
|
33
49
|
samplerate = 44100
|
|
@@ -45,6 +61,5 @@ def test_hf_audio(tmp_path):
|
|
|
45
61
|
gen = HFGenerator(ds, dict_to_data_model("", schema))
|
|
46
62
|
gen.setup()
|
|
47
63
|
row = next(iter(gen.process("train")))
|
|
48
|
-
assert row.audio.path == str(train_dir / "example.wav")
|
|
49
64
|
assert np.allclose(row.audio.array, data / amplitude, atol=1e-4)
|
|
50
65
|
assert row.audio.sampling_rate == samplerate
|
|
@@ -1760,6 +1760,22 @@ def test_read_parquet(tmp_dir, test_session):
|
|
|
1760
1760
|
assert df_equal(df1, df)
|
|
1761
1761
|
|
|
1762
1762
|
|
|
1763
|
+
def test_read_parquet_exported_with_source(test_session, tmp_dir):
|
|
1764
|
+
path = tmp_dir / "df.parquet"
|
|
1765
|
+
path2 = tmp_dir / "df2.parquet"
|
|
1766
|
+
df = pd.DataFrame(DF_DATA)
|
|
1767
|
+
|
|
1768
|
+
df.to_parquet(path)
|
|
1769
|
+
dc.read_parquet(path, source=True).to_parquet(path2)
|
|
1770
|
+
df1 = (
|
|
1771
|
+
dc.read_parquet(path2, source=True)
|
|
1772
|
+
.select("first_name", "age", "city")
|
|
1773
|
+
.to_pandas()
|
|
1774
|
+
)
|
|
1775
|
+
|
|
1776
|
+
assert df_equal(df1, df)
|
|
1777
|
+
|
|
1778
|
+
|
|
1763
1779
|
@skip_if_not_sqlite
|
|
1764
1780
|
def test_read_parquet_in_memory(tmp_dir):
|
|
1765
1781
|
df = pd.DataFrame(DF_DATA)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|