datachain 0.30.2__tar.gz → 0.30.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.30.2 → datachain-0.30.4}/.github/workflows/benchmarks.yml +1 -1
- {datachain-0.30.2 → datachain-0.30.4}/.github/workflows/release.yml +1 -1
- {datachain-0.30.2 → datachain-0.30.4}/.github/workflows/tests-studio.yml +2 -2
- {datachain-0.30.2 → datachain-0.30.4}/.github/workflows/tests.yml +19 -3
- {datachain-0.30.2 → datachain-0.30.4}/.github/workflows/update-template.yaml +1 -1
- {datachain-0.30.2 → datachain-0.30.4}/.pre-commit-config.yaml +1 -1
- {datachain-0.30.2 → datachain-0.30.4}/PKG-INFO +2 -2
- datachain-0.30.4/docs/references/func.md +38 -0
- datachain-0.30.4/docs/references/functions/aggregate.md +5 -0
- datachain-0.30.4/docs/references/functions/array.md +5 -0
- datachain-0.30.4/docs/references/functions/conditional.md +5 -0
- datachain-0.30.4/docs/references/functions/numeric.md +5 -0
- datachain-0.30.4/docs/references/functions/path.md +5 -0
- datachain-0.30.4/docs/references/functions/random.md +5 -0
- datachain-0.30.4/docs/references/functions/string.md +22 -0
- datachain-0.30.4/docs/references/functions/window.md +5 -0
- {datachain-0.30.2 → datachain-0.30.4}/examples/multimodal/audio-to-text.py +4 -1
- {datachain-0.30.2 → datachain-0.30.4}/mkdocs.yml +11 -2
- {datachain-0.30.2 → datachain-0.30.4}/pyproject.toml +1 -1
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/__init__.py +2 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/catalog/__init__.py +2 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/catalog/catalog.py +100 -31
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/catalog/loader.py +4 -2
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/cli/__init__.py +1 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/cli/commands/datasets.py +19 -12
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/data_storage/metastore.py +34 -30
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/data_storage/sqlite.py +0 -4
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/delta.py +23 -12
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/func/string.py +8 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/dc/__init__.py +2 -1
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/dc/database.py +50 -6
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/dc/datachain.py +48 -20
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/dc/datasets.py +12 -7
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/dc/utils.py +5 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/namespaces.py +3 -1
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/projects.py +3 -1
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/signal_schema.py +28 -17
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/listing.py +5 -9
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/model/ultralytics/bbox.py +14 -12
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/model/ultralytics/pose.py +14 -12
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/model/ultralytics/segment.py +14 -12
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/query/dataset.py +42 -28
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/query/schema.py +4 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/utils.py +7 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain.egg-info/PKG-INFO +2 -2
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain.egg-info/SOURCES.txt +9 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain.egg-info/requires.txt +1 -1
- {datachain-0.30.2 → datachain-0.30.4}/tests/conftest.py +4 -32
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_catalog.py +2 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_data_storage.py +2 -2
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_datachain.py +0 -70
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_dataset_query.py +19 -6
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_datasets.py +11 -19
- datachain-0.30.4/tests/func/test_mutate.py +284 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_pull.py +15 -4
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_read_dataset_remote.py +10 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_to_database.py +62 -5
- {datachain-0.30.2 → datachain-0.30.4}/tests/test_cli_studio.py +1 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_datachain.py +12 -15
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_namespace.py +2 -2
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_project.py +1 -1
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_signal_schema.py +4 -2
- {datachain-0.30.2 → datachain-0.30.4}/tests/utils.py +2 -14
- datachain-0.30.2/docs/references/func.md +0 -5
- {datachain-0.30.2 → datachain-0.30.4}/.cruft.json +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/.gitattributes +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/.github/codecov.yaml +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/.github/dependabot.yml +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/.gitignore +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/LICENSE +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/README.rst +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/assets/datachain.svg +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/commands/auth/login.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/commands/auth/logout.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/commands/auth/team.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/commands/auth/token.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/commands/index.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/commands/job/cancel.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/commands/job/clusters.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/commands/job/logs.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/commands/job/ls.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/commands/job/run.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/contributing.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/examples.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/guide/db_migrations.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/guide/delta.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/guide/env.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/guide/index.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/guide/namespaces.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/guide/processing.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/guide/remotes.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/guide/retry.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/index.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/overrides/main.html +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/quick-start.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/references/data-types/file.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/references/data-types/index.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/references/data-types/pose.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/references/data-types/segment.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/references/datachain.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/references/index.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/references/toolkit.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/references/torch.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/references/udf.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/docs/tutorials.md +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/examples/incremental_processing/delta.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/examples/incremental_processing/retry.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/examples/incremental_processing/utils.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/examples/multimodal/wds.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/noxfile.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/setup.cfg +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/__main__.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/asyn.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/cache.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/cli/utils.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/client/__init__.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/client/azure.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/client/gcs.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/client/hf.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/client/local.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/client/s3.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/config.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/dataset.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/error.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/fs/reference.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/fs/utils.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/func/__init__.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/func/array.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/func/base.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/func/conditional.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/func/func.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/func/numeric.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/func/path.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/func/random.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/func/window.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/job.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/audio.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/clip.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/dc/records.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/dc/storage.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/file.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/hf.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/image.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/listing.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/settings.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/tar.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/text.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/udf.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/utils.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/video.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/model/__init__.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/model/bbox.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/model/pose.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/model/segment.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/model/utils.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/namespace.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/node.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/progress.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/project.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/py.typed +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/query/__init__.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/query/batch.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/query/metrics.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/query/params.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/query/queue.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/query/session.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/query/udf.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/query/utils.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/remote/studio.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/script_meta.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/semver.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/postgresql_dialect.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/postgresql_types.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/types.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/utils.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/studio.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/telemetry.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/__init__.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/data.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/examples/__init__.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/examples/test_examples.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/examples/wds_data.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/__init__.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/data/lena.jpg +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/functions/__init__.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/functions/test_aggregate.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/functions/test_array.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/functions/test_conditional.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/functions/test_numeric.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/functions/test_path.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/functions/test_random.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/functions/test_string.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/model/__init__.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_audio.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_batching.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_client.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_delta.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_file.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_hf.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_image.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_listing.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_ls.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_metastore.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_metrics.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_pytorch.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_query.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_read_database.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_read_dataset_version_specifiers.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_retry.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_session.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_studio_datetime_parsing.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_toolkit.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_video.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_warehouse.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/scripts/feature_class.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/test_atomicity.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/test_cli_e2e.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/test_import_time.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/test_query_e2e.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/test_telemetry.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/__init__.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_audio.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_partition_by.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_settings.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_udf.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/model/__init__.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_asyn.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_cache.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_catalog.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_client.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_config.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_dataset.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_func.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_listing.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_metastore.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_query.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_query_params.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_semver.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_serializer.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_session.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_utils.py +0 -0
- {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_warehouse.py +0 -0
|
@@ -62,7 +62,7 @@ jobs:
|
|
|
62
62
|
echo "Studio branch: $STUDIO_BRANCH"
|
|
63
63
|
|
|
64
64
|
- name: Check out Studio
|
|
65
|
-
uses: actions/checkout@
|
|
65
|
+
uses: actions/checkout@v5
|
|
66
66
|
with:
|
|
67
67
|
fetch-depth: 0
|
|
68
68
|
repository: iterative/studio
|
|
@@ -70,7 +70,7 @@ jobs:
|
|
|
70
70
|
token: ${{ secrets.ITERATIVE_STUDIO_READ_ACCESS_TOKEN }}
|
|
71
71
|
|
|
72
72
|
- name: Check out repository
|
|
73
|
-
uses: actions/checkout@
|
|
73
|
+
uses: actions/checkout@v5
|
|
74
74
|
with:
|
|
75
75
|
path: './backend/datachain'
|
|
76
76
|
fetch-depth: 0
|
|
@@ -18,7 +18,7 @@ jobs:
|
|
|
18
18
|
runs-on: ubuntu-latest
|
|
19
19
|
steps:
|
|
20
20
|
- name: Check out the repository
|
|
21
|
-
uses: actions/checkout@
|
|
21
|
+
uses: actions/checkout@v5
|
|
22
22
|
with:
|
|
23
23
|
fetch-depth: 0
|
|
24
24
|
ref: ${{ github.event.pull_request.head.sha || github.ref }}
|
|
@@ -73,7 +73,7 @@ jobs:
|
|
|
73
73
|
|
|
74
74
|
steps:
|
|
75
75
|
- name: Check out the repository
|
|
76
|
-
uses: actions/checkout@
|
|
76
|
+
uses: actions/checkout@v5
|
|
77
77
|
with:
|
|
78
78
|
fetch-depth: 0
|
|
79
79
|
ref: ${{ github.event.pull_request.head.sha || github.ref }}
|
|
@@ -175,7 +175,7 @@ jobs:
|
|
|
175
175
|
- {os: ubuntu-latest-4-cores, pyv: "3.13", group: multimodal}
|
|
176
176
|
|
|
177
177
|
steps:
|
|
178
|
-
- uses: actions/checkout@
|
|
178
|
+
- uses: actions/checkout@v5
|
|
179
179
|
with:
|
|
180
180
|
ref: ${{ github.event.pull_request.head.sha || github.ref }}
|
|
181
181
|
|
|
@@ -194,6 +194,22 @@ jobs:
|
|
|
194
194
|
- name: Install nox
|
|
195
195
|
run: uv pip install nox --system
|
|
196
196
|
|
|
197
|
+
- name: Install FFmpeg on Windows
|
|
198
|
+
if: runner.os == 'Windows'
|
|
199
|
+
run: choco install ffmpeg
|
|
200
|
+
|
|
201
|
+
- name: Install FFmpeg on macOS
|
|
202
|
+
if: runner.os == 'macOS'
|
|
203
|
+
run: |
|
|
204
|
+
brew install ffmpeg
|
|
205
|
+
echo 'DYLD_FALLBACK_LIBRARY_PATH=/opt/homebrew/lib' >> "$GITHUB_ENV"
|
|
206
|
+
|
|
207
|
+
- name: Install FFmpeg on Ubuntu
|
|
208
|
+
if: runner.os == 'Linux'
|
|
209
|
+
run: |
|
|
210
|
+
sudo apt update
|
|
211
|
+
sudo apt install -y ffmpeg
|
|
212
|
+
|
|
197
213
|
- name: Set hf token
|
|
198
214
|
if: matrix.group == 'llm_and_nlp'
|
|
199
215
|
run: echo 'HF_TOKEN=${{ secrets.HF_TOKEN }}' >> "$GITHUB_ENV"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.30.
|
|
3
|
+
Version: 0.30.4
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -45,7 +45,7 @@ Requires-Dist: datamodel-code-generator>=0.25
|
|
|
45
45
|
Requires-Dist: Pillow<12,>=10.0.0
|
|
46
46
|
Requires-Dist: msgpack<2,>=1.0.4
|
|
47
47
|
Requires-Dist: psutil
|
|
48
|
-
Requires-Dist: huggingface_hub
|
|
48
|
+
Requires-Dist: huggingface_hub
|
|
49
49
|
Requires-Dist: iterative-telemetry>=0.0.10
|
|
50
50
|
Requires-Dist: platformdirs
|
|
51
51
|
Requires-Dist: dvc-studio-client<1,>=0.21
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Functions
|
|
2
|
+
|
|
3
|
+
Use built-in functions for data manipulation and analysis to operate on the underlying database storing the chain data. These functions are useful for operations like [`DataChain.filter`](datachain.md#datachain.lib.dc.DataChain.filter) and [`DataChain.mutate`](datachain.md#datachain.lib.dc.DataChain.mutate).
|
|
4
|
+
|
|
5
|
+
Functions are organized by category and accessed through their respective modules. For example, string functions are accessed via `func.string.length()`, array functions via `func.array.contains()`, etc.
|
|
6
|
+
|
|
7
|
+
!!! note "Global Function Access"
|
|
8
|
+
Only a subset of functions are available directly from `datachain.func` (e.g., `func.length`). Most functions should be accessed through their specific module namespace (e.g., `func.string.length`) to avoid naming conflicts.
|
|
9
|
+
|
|
10
|
+
## Function Categories
|
|
11
|
+
|
|
12
|
+
DataChain provides several categories of functions for different types of operations:
|
|
13
|
+
|
|
14
|
+
- **[Aggregate Functions](functions/aggregate.md)** - Functions for aggregating data like `sum`, `count`, `avg`, etc.
|
|
15
|
+
- **[Array Functions](functions/array.md)** - Functions for working with arrays and lists
|
|
16
|
+
- **[Conditional Functions](functions/conditional.md)** - Functions for conditional logic like `ifelse`, `case`, etc.
|
|
17
|
+
- **[Numeric Functions](functions/numeric.md)** - Functions for numeric operations and computations
|
|
18
|
+
- **[Path Functions](functions/path.md)** - Functions for working with file paths
|
|
19
|
+
- **[Random Functions](functions/random.md)** - Functions for generating random values
|
|
20
|
+
- **[String Functions](functions/string.md)** - Functions for string manipulation and processing
|
|
21
|
+
- **[Window Functions](functions/window.md)** - Functions for window operations
|
|
22
|
+
|
|
23
|
+
## Usage
|
|
24
|
+
|
|
25
|
+
```python
|
|
26
|
+
from datachain.func import aggregate, array, conditional, numeric, path, random, string, window
|
|
27
|
+
|
|
28
|
+
# Access functions through their module namespaces
|
|
29
|
+
dc.mutate(
|
|
30
|
+
text_length=string.length("text_column"),
|
|
31
|
+
contains_item=array.contains("array_column", "value"),
|
|
32
|
+
file_extension=path.file_ext("file_path")
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# Some commonly used functions are also available directly
|
|
36
|
+
from datachain.func import sum, count, length, ifelse
|
|
37
|
+
dc.mutate(total=sum("amount"))
|
|
38
|
+
```
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# String Functions
|
|
2
|
+
|
|
3
|
+
Functions for string manipulation, text processing, and string analysis.
|
|
4
|
+
|
|
5
|
+
## Usage
|
|
6
|
+
|
|
7
|
+
String functions are available under the `func.string` namespace to avoid name collisions with other functions:
|
|
8
|
+
|
|
9
|
+
```python
|
|
10
|
+
from datachain.func import string
|
|
11
|
+
|
|
12
|
+
# Use string functions with the string namespace
|
|
13
|
+
dc.mutate(
|
|
14
|
+
str_len=string.length("text_column"),
|
|
15
|
+
parts=string.split("text_column", ","),
|
|
16
|
+
cleaned=string.replace("text_column", "old", "new"),
|
|
17
|
+
regex_cleaned=string.regexp_replace("text_column", r"\d+", "X"),
|
|
18
|
+
distance=string.byte_hamming_distance("col1", "col2")
|
|
19
|
+
)
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
::: datachain.func.string
|
|
@@ -35,7 +35,10 @@ def process(fragment: AudioFragment, pipeline: Pipeline) -> str:
|
|
|
35
35
|
audio_array = audio_array.mean(axis=1)
|
|
36
36
|
|
|
37
37
|
# Pass the numpy array with exact sampling rate from fragment
|
|
38
|
-
result = pipeline(
|
|
38
|
+
result = pipeline(
|
|
39
|
+
{"raw": audio_array, "sampling_rate": sample_rate},
|
|
40
|
+
generate_kwargs={"language": "en"},
|
|
41
|
+
)
|
|
39
42
|
return str(result["text"])
|
|
40
43
|
|
|
41
44
|
|
|
@@ -82,7 +82,16 @@ nav:
|
|
|
82
82
|
- Segment: references/data-types/segment.md
|
|
83
83
|
- UDF: references/udf.md
|
|
84
84
|
- Torch: references/torch.md
|
|
85
|
-
- Functions:
|
|
85
|
+
- Functions:
|
|
86
|
+
- Overview: references/func.md
|
|
87
|
+
- Aggregate: references/functions/aggregate.md
|
|
88
|
+
- Array: references/functions/array.md
|
|
89
|
+
- Conditional: references/functions/conditional.md
|
|
90
|
+
- Numeric: references/functions/numeric.md
|
|
91
|
+
- Path: references/functions/path.md
|
|
92
|
+
- Random: references/functions/random.md
|
|
93
|
+
- String: references/functions/string.md
|
|
94
|
+
- Window: references/functions/window.md
|
|
86
95
|
- Toolkit: references/toolkit.md
|
|
87
96
|
- 📖 CLI Reference:
|
|
88
97
|
- Overview: commands/index.md
|
|
@@ -177,7 +186,7 @@ plugins:
|
|
|
177
186
|
- https://numpy.org/doc/stable/objects.inv
|
|
178
187
|
- https://pandas.pydata.org/docs/objects.inv
|
|
179
188
|
- https://arrow.apache.org/docs/objects.inv
|
|
180
|
-
|
|
189
|
+
- https://docs.sqlalchemy.org/objects.inv # SSL certificate issue
|
|
181
190
|
- https://docs.pydantic.dev/latest/objects.inv
|
|
182
191
|
|
|
183
192
|
watch:
|
|
@@ -6,6 +6,7 @@ from datachain.lib.dc import (
|
|
|
6
6
|
Sys,
|
|
7
7
|
datasets,
|
|
8
8
|
delete_dataset,
|
|
9
|
+
is_studio,
|
|
9
10
|
listings,
|
|
10
11
|
move_dataset,
|
|
11
12
|
read_csv,
|
|
@@ -74,6 +75,7 @@ __all__ = [
|
|
|
74
75
|
"datasets",
|
|
75
76
|
"delete_dataset",
|
|
76
77
|
"is_chain_type",
|
|
78
|
+
"is_studio",
|
|
77
79
|
"listings",
|
|
78
80
|
"metrics",
|
|
79
81
|
"move_dataset",
|
|
@@ -3,6 +3,7 @@ from .catalog import (
|
|
|
3
3
|
QUERY_SCRIPT_CANCELED_EXIT_CODE,
|
|
4
4
|
QUERY_SCRIPT_INVALID_LAST_STATEMENT_EXIT_CODE,
|
|
5
5
|
Catalog,
|
|
6
|
+
is_namespace_local,
|
|
6
7
|
)
|
|
7
8
|
from .loader import get_catalog
|
|
8
9
|
|
|
@@ -12,4 +13,5 @@ __all__ = [
|
|
|
12
13
|
"QUERY_SCRIPT_INVALID_LAST_STATEMENT_EXIT_CODE",
|
|
13
14
|
"Catalog",
|
|
14
15
|
"get_catalog",
|
|
16
|
+
"is_namespace_local",
|
|
15
17
|
]
|
|
@@ -113,6 +113,11 @@ else:
|
|
|
113
113
|
SIGINT = signal.SIGINT
|
|
114
114
|
|
|
115
115
|
|
|
116
|
+
def is_namespace_local(namespace_name) -> bool:
|
|
117
|
+
"""Checks if namespace is from local environment, i.e. is `local`"""
|
|
118
|
+
return namespace_name == "local"
|
|
119
|
+
|
|
120
|
+
|
|
116
121
|
def shutdown_process(
|
|
117
122
|
proc: subprocess.Popen,
|
|
118
123
|
interrupt_timeout: Optional[int] = None,
|
|
@@ -680,8 +685,9 @@ class Catalog:
|
|
|
680
685
|
ds_namespace, ds_project, ds_name = parse_dataset_name(ds_name)
|
|
681
686
|
assert ds_namespace
|
|
682
687
|
assert ds_project
|
|
683
|
-
|
|
684
|
-
|
|
688
|
+
dataset = self.get_dataset(
|
|
689
|
+
ds_name, namespace_name=ds_namespace, project_name=ds_project
|
|
690
|
+
)
|
|
685
691
|
if not ds_version:
|
|
686
692
|
ds_version = dataset.latest_version
|
|
687
693
|
dataset_sources = self.warehouse.get_dataset_sources(
|
|
@@ -807,7 +813,11 @@ class Catalog:
|
|
|
807
813
|
)
|
|
808
814
|
default_version = DEFAULT_DATASET_VERSION
|
|
809
815
|
try:
|
|
810
|
-
dataset = self.get_dataset(
|
|
816
|
+
dataset = self.get_dataset(
|
|
817
|
+
name,
|
|
818
|
+
namespace_name=project.namespace.name if project else None,
|
|
819
|
+
project_name=project.name if project else None,
|
|
820
|
+
)
|
|
811
821
|
default_version = dataset.next_version_patch
|
|
812
822
|
if update_version == "major":
|
|
813
823
|
default_version = dataset.next_version_major
|
|
@@ -1016,7 +1026,11 @@ class Catalog:
|
|
|
1016
1026
|
dc.save(name)
|
|
1017
1027
|
except Exception as e: # noqa: BLE001
|
|
1018
1028
|
try:
|
|
1019
|
-
ds = self.get_dataset(
|
|
1029
|
+
ds = self.get_dataset(
|
|
1030
|
+
name,
|
|
1031
|
+
namespace_name=project.namespace.name,
|
|
1032
|
+
project_name=project.name,
|
|
1033
|
+
)
|
|
1020
1034
|
self.metastore.update_dataset_status(
|
|
1021
1035
|
ds,
|
|
1022
1036
|
DatasetStatus.FAILED,
|
|
@@ -1033,7 +1047,11 @@ class Catalog:
|
|
|
1033
1047
|
except DatasetNotFoundError:
|
|
1034
1048
|
raise e from None
|
|
1035
1049
|
|
|
1036
|
-
ds = self.get_dataset(
|
|
1050
|
+
ds = self.get_dataset(
|
|
1051
|
+
name,
|
|
1052
|
+
namespace_name=project.namespace.name,
|
|
1053
|
+
project_name=project.name,
|
|
1054
|
+
)
|
|
1037
1055
|
|
|
1038
1056
|
self.update_dataset_version_with_warehouse_info(
|
|
1039
1057
|
ds,
|
|
@@ -1041,7 +1059,11 @@ class Catalog:
|
|
|
1041
1059
|
sources="\n".join(sources),
|
|
1042
1060
|
)
|
|
1043
1061
|
|
|
1044
|
-
return self.get_dataset(
|
|
1062
|
+
return self.get_dataset(
|
|
1063
|
+
name,
|
|
1064
|
+
namespace_name=project.namespace.name,
|
|
1065
|
+
project_name=project.name,
|
|
1066
|
+
)
|
|
1045
1067
|
|
|
1046
1068
|
def get_full_dataset_name(
|
|
1047
1069
|
self,
|
|
@@ -1077,22 +1099,23 @@ class Catalog:
|
|
|
1077
1099
|
return namespace_name, project_name, name
|
|
1078
1100
|
|
|
1079
1101
|
def get_dataset(
|
|
1080
|
-
self,
|
|
1102
|
+
self,
|
|
1103
|
+
name: str,
|
|
1104
|
+
namespace_name: Optional[str] = None,
|
|
1105
|
+
project_name: Optional[str] = None,
|
|
1081
1106
|
) -> DatasetRecord:
|
|
1082
1107
|
from datachain.lib.listing import is_listing_dataset
|
|
1083
1108
|
|
|
1084
|
-
|
|
1109
|
+
namespace_name = namespace_name or self.metastore.default_namespace_name
|
|
1110
|
+
project_name = project_name or self.metastore.default_project_name
|
|
1085
1111
|
|
|
1086
1112
|
if is_listing_dataset(name):
|
|
1087
|
-
|
|
1113
|
+
namespace_name = self.metastore.system_namespace_name
|
|
1114
|
+
project_name = self.metastore.listing_project_name
|
|
1088
1115
|
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
raise DatasetNotFoundError(
|
|
1093
|
-
f"Dataset {name} not found in namespace {project.namespace.name}"
|
|
1094
|
-
f" and project {project.name}"
|
|
1095
|
-
) from None
|
|
1116
|
+
return self.metastore.get_dataset(
|
|
1117
|
+
name, namespace_name=namespace_name, project_name=project_name
|
|
1118
|
+
)
|
|
1096
1119
|
|
|
1097
1120
|
def get_dataset_with_remote_fallback(
|
|
1098
1121
|
self,
|
|
@@ -1103,6 +1126,8 @@ class Catalog:
|
|
|
1103
1126
|
pull_dataset: bool = False,
|
|
1104
1127
|
update: bool = False,
|
|
1105
1128
|
) -> DatasetRecord:
|
|
1129
|
+
from datachain.lib.dc.utils import is_studio
|
|
1130
|
+
|
|
1106
1131
|
# Intentionally ignore update flag is version is provided. Here only exact
|
|
1107
1132
|
# version can be provided and update then doesn't make sense.
|
|
1108
1133
|
# It corresponds to a query like this for example:
|
|
@@ -1111,16 +1136,24 @@ class Catalog:
|
|
|
1111
1136
|
if version:
|
|
1112
1137
|
update = False
|
|
1113
1138
|
|
|
1114
|
-
|
|
1139
|
+
# we don't do Studio fallback is script is already ran in Studio, or if we try
|
|
1140
|
+
# to fetch dataset with local namespace as that one cannot
|
|
1141
|
+
# exist in Studio in the first place
|
|
1142
|
+
no_fallback = is_studio() or is_namespace_local(namespace_name)
|
|
1143
|
+
|
|
1144
|
+
if no_fallback or not update:
|
|
1115
1145
|
try:
|
|
1116
|
-
|
|
1117
|
-
|
|
1146
|
+
ds = self.get_dataset(
|
|
1147
|
+
name,
|
|
1148
|
+
namespace_name=namespace_name,
|
|
1149
|
+
project_name=project_name,
|
|
1150
|
+
)
|
|
1118
1151
|
if not version or ds.has_version(version):
|
|
1119
1152
|
return ds
|
|
1120
1153
|
except (NamespaceNotFoundError, ProjectNotFoundError, DatasetNotFoundError):
|
|
1121
1154
|
pass
|
|
1122
1155
|
|
|
1123
|
-
if
|
|
1156
|
+
if no_fallback:
|
|
1124
1157
|
raise DatasetNotFoundError(
|
|
1125
1158
|
f"Dataset {name}"
|
|
1126
1159
|
+ (f" version {version} " if version else " ")
|
|
@@ -1139,7 +1172,9 @@ class Catalog:
|
|
|
1139
1172
|
local_ds_version=version,
|
|
1140
1173
|
)
|
|
1141
1174
|
return self.get_dataset(
|
|
1142
|
-
name,
|
|
1175
|
+
name,
|
|
1176
|
+
namespace_name=namespace_name,
|
|
1177
|
+
project_name=project_name,
|
|
1143
1178
|
)
|
|
1144
1179
|
|
|
1145
1180
|
return self.get_remote_dataset(namespace_name, project_name, name)
|
|
@@ -1148,7 +1183,11 @@ class Catalog:
|
|
|
1148
1183
|
"""Returns dataset that contains version with specific uuid"""
|
|
1149
1184
|
for dataset in self.ls_datasets():
|
|
1150
1185
|
if dataset.has_version_with_uuid(uuid):
|
|
1151
|
-
return self.get_dataset(
|
|
1186
|
+
return self.get_dataset(
|
|
1187
|
+
dataset.name,
|
|
1188
|
+
namespace_name=dataset.project.namespace.name,
|
|
1189
|
+
project_name=dataset.project.name,
|
|
1190
|
+
)
|
|
1152
1191
|
raise DatasetNotFoundError(f"Dataset with version uuid {uuid} not found.")
|
|
1153
1192
|
|
|
1154
1193
|
def get_remote_dataset(
|
|
@@ -1171,9 +1210,18 @@ class Catalog:
|
|
|
1171
1210
|
return DatasetRecord.from_dict(dataset_info)
|
|
1172
1211
|
|
|
1173
1212
|
def get_dataset_dependencies(
|
|
1174
|
-
self,
|
|
1213
|
+
self,
|
|
1214
|
+
name: str,
|
|
1215
|
+
version: str,
|
|
1216
|
+
namespace_name: Optional[str] = None,
|
|
1217
|
+
project_name: Optional[str] = None,
|
|
1218
|
+
indirect=False,
|
|
1175
1219
|
) -> list[Optional[DatasetDependency]]:
|
|
1176
|
-
dataset = self.get_dataset(
|
|
1220
|
+
dataset = self.get_dataset(
|
|
1221
|
+
name,
|
|
1222
|
+
namespace_name=namespace_name,
|
|
1223
|
+
project_name=project_name,
|
|
1224
|
+
)
|
|
1177
1225
|
|
|
1178
1226
|
direct_dependencies = self.metastore.get_direct_dataset_dependencies(
|
|
1179
1227
|
dataset, version
|
|
@@ -1187,10 +1235,13 @@ class Catalog:
|
|
|
1187
1235
|
# dependency has been removed
|
|
1188
1236
|
continue
|
|
1189
1237
|
if d.is_dataset:
|
|
1190
|
-
project = self.metastore.get_project(d.project, d.namespace)
|
|
1191
1238
|
# only datasets can have dependencies
|
|
1192
1239
|
d.dependencies = self.get_dataset_dependencies(
|
|
1193
|
-
d.name,
|
|
1240
|
+
d.name,
|
|
1241
|
+
d.version,
|
|
1242
|
+
namespace_name=d.namespace,
|
|
1243
|
+
project_name=d.project,
|
|
1244
|
+
indirect=indirect,
|
|
1194
1245
|
)
|
|
1195
1246
|
|
|
1196
1247
|
return direct_dependencies
|
|
@@ -1340,7 +1391,11 @@ class Catalog:
|
|
|
1340
1391
|
project: Optional[Project] = None,
|
|
1341
1392
|
client_config=None,
|
|
1342
1393
|
) -> list[str]:
|
|
1343
|
-
dataset = self.get_dataset(
|
|
1394
|
+
dataset = self.get_dataset(
|
|
1395
|
+
name,
|
|
1396
|
+
namespace_name=project.namespace.name if project else None,
|
|
1397
|
+
project_name=project.name if project else None,
|
|
1398
|
+
)
|
|
1344
1399
|
|
|
1345
1400
|
return self.warehouse.export_dataset_table(
|
|
1346
1401
|
bucket_uri, dataset, version, client_config
|
|
@@ -1349,7 +1404,11 @@ class Catalog:
|
|
|
1349
1404
|
def dataset_table_export_file_names(
|
|
1350
1405
|
self, name: str, version: str, project: Optional[Project] = None
|
|
1351
1406
|
) -> list[str]:
|
|
1352
|
-
dataset = self.get_dataset(
|
|
1407
|
+
dataset = self.get_dataset(
|
|
1408
|
+
name,
|
|
1409
|
+
namespace_name=project.namespace.name if project else None,
|
|
1410
|
+
project_name=project.name if project else None,
|
|
1411
|
+
)
|
|
1353
1412
|
return self.warehouse.dataset_table_export_file_names(dataset, version)
|
|
1354
1413
|
|
|
1355
1414
|
def remove_dataset(
|
|
@@ -1359,7 +1418,11 @@ class Catalog:
|
|
|
1359
1418
|
version: Optional[str] = None,
|
|
1360
1419
|
force: Optional[bool] = False,
|
|
1361
1420
|
):
|
|
1362
|
-
dataset = self.get_dataset(
|
|
1421
|
+
dataset = self.get_dataset(
|
|
1422
|
+
name,
|
|
1423
|
+
namespace_name=project.namespace.name if project else None,
|
|
1424
|
+
project_name=project.name if project else None,
|
|
1425
|
+
)
|
|
1363
1426
|
if not version and not force:
|
|
1364
1427
|
raise ValueError(f"Missing dataset version from input for dataset {name}")
|
|
1365
1428
|
if version and not dataset.has_version(version):
|
|
@@ -1395,7 +1458,11 @@ class Catalog:
|
|
|
1395
1458
|
if attrs is not None:
|
|
1396
1459
|
update_data["attrs"] = attrs # type: ignore[assignment]
|
|
1397
1460
|
|
|
1398
|
-
dataset = self.get_dataset(
|
|
1461
|
+
dataset = self.get_dataset(
|
|
1462
|
+
name,
|
|
1463
|
+
namespace_name=project.namespace.name if project else None,
|
|
1464
|
+
project_name=project.name if project else None,
|
|
1465
|
+
)
|
|
1399
1466
|
return self.update_dataset(dataset, **update_data)
|
|
1400
1467
|
|
|
1401
1468
|
def ls(
|
|
@@ -1549,7 +1616,9 @@ class Catalog:
|
|
|
1549
1616
|
)
|
|
1550
1617
|
|
|
1551
1618
|
try:
|
|
1552
|
-
local_dataset = self.get_dataset(
|
|
1619
|
+
local_dataset = self.get_dataset(
|
|
1620
|
+
local_ds_name, namespace_name=namespace.name, project_name=project.name
|
|
1621
|
+
)
|
|
1553
1622
|
if local_dataset and local_dataset.has_version(local_ds_version):
|
|
1554
1623
|
raise DataChainError(
|
|
1555
1624
|
f"Local dataset {local_ds_uri} already exists with different uuid,"
|
|
@@ -127,7 +127,8 @@ def get_udf_distributor_class() -> Optional[type["AbstractUDFDistributor"]]:
|
|
|
127
127
|
|
|
128
128
|
|
|
129
129
|
def get_catalog(
|
|
130
|
-
client_config: Optional[dict[str, Any]] = None,
|
|
130
|
+
client_config: Optional[dict[str, Any]] = None,
|
|
131
|
+
in_memory: bool = False,
|
|
131
132
|
) -> "Catalog":
|
|
132
133
|
"""
|
|
133
134
|
Function that creates Catalog instance with appropriate metastore
|
|
@@ -142,8 +143,9 @@ def get_catalog(
|
|
|
142
143
|
"""
|
|
143
144
|
from datachain.catalog import Catalog
|
|
144
145
|
|
|
146
|
+
metastore = get_metastore(in_memory=in_memory)
|
|
145
147
|
return Catalog(
|
|
146
|
-
metastore=
|
|
148
|
+
metastore=metastore,
|
|
147
149
|
warehouse=get_warehouse(in_memory=in_memory),
|
|
148
150
|
client_config=client_config,
|
|
149
151
|
in_memory=in_memory,
|