datachain 0.20.1__tar.gz → 0.20.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.20.1 → datachain-0.20.3}/.pre-commit-config.yaml +1 -1
- {datachain-0.20.1 → datachain-0.20.3}/PKG-INFO +1 -1
- {datachain-0.20.1 → datachain-0.20.3}/examples/get_started/json-csv-reader.py +1 -1
- {datachain-0.20.1 → datachain-0.20.3}/examples/incremental_processing/delta.py +1 -1
- {datachain-0.20.1 → datachain-0.20.3}/examples/llm_and_nlp/hf-dataset-llm-eval.py +15 -5
- {datachain-0.20.1 → datachain-0.20.3}/pyproject.toml +2 -1
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/__init__.py +2 -3
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/cache.py +2 -2
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/catalog/catalog.py +3 -3
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/cli/commands/ls.py +2 -2
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/client/fsspec.py +5 -3
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/client/hf.py +10 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/client/local.py +4 -4
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/data_storage/metastore.py +19 -6
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/data_storage/sqlite.py +2 -2
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/dataset.py +4 -3
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/delta.py +2 -2
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/func/func.py +1 -1
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/arrow.py +3 -3
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/dataset_info.py +4 -4
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/dc/datachain.py +174 -86
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/dc/datasets.py +25 -37
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/dc/storage.py +24 -38
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/file.py +77 -23
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/meta_formats.py +1 -1
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/namespaces.py +16 -18
- datachain-0.20.3/src/datachain/lib/projects.py +86 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/pytorch.py +1 -1
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/tar.py +1 -2
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/udf_signature.py +1 -1
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/webdataset.py +30 -20
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/namespace.py +3 -3
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/project.py +5 -5
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain.egg-info/PKG-INFO +1 -1
- {datachain-0.20.1 → datachain-0.20.3}/tests/conftest.py +6 -11
- {datachain-0.20.1 → datachain-0.20.3}/tests/examples/test_wds_e2e.py +5 -5
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/functions/test_aggregate.py +7 -9
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/functions/test_array.py +20 -21
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/functions/test_conditional.py +6 -7
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/functions/test_numeric.py +4 -5
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/functions/test_path.py +6 -8
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/functions/test_random.py +3 -6
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/functions/test_string.py +6 -7
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_datachain.py +31 -36
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_delta.py +15 -29
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_file.py +33 -7
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_listing.py +1 -1
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_pull.py +3 -4
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_retry.py +6 -8
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_toolkit.py +2 -2
- {datachain-0.20.1 → datachain-0.20.3}/tests/test_atomicity.py +1 -2
- {datachain-0.20.1 → datachain-0.20.3}/tests/test_import_time.py +2 -2
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_datachain.py +170 -119
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_datachain_bootstrap.py +3 -3
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_datachain_merge.py +11 -11
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_diff.py +43 -45
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_feature_utils.py +2 -2
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_file.py +50 -8
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_listing_info.py +3 -5
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_namespace.py +18 -16
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_project.py +42 -57
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_schema.py +1 -4
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_dataset.py +18 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_func.py +149 -125
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_session.py +1 -2
- {datachain-0.20.1 → datachain-0.20.3}/tests/utils.py +1 -1
- datachain-0.20.1/src/datachain/lib/projects.py +0 -86
- {datachain-0.20.1 → datachain-0.20.3}/.cruft.json +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/.gitattributes +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/.github/codecov.yaml +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/.github/dependabot.yml +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/.github/workflows/release.yml +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/.github/workflows/tests.yml +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/.gitignore +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/LICENSE +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/README.rst +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/assets/datachain.svg +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/commands/auth/login.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/commands/auth/logout.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/commands/auth/team.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/commands/auth/token.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/commands/index.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/commands/job/cancel.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/commands/job/clusters.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/commands/job/logs.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/commands/job/ls.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/commands/job/run.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/contributing.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/examples.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/guide/delta.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/guide/env.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/guide/index.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/guide/processing.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/guide/remotes.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/guide/retry.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/index.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/overrides/main.html +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/quick-start.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/references/data-types/file.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/references/data-types/index.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/references/data-types/pose.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/references/data-types/segment.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/references/datachain.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/references/func.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/references/index.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/references/toolkit.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/references/torch.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/references/udf.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/docs/tutorials.md +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/examples/incremental_processing/retry.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/examples/incremental_processing/utils.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/examples/multimodal/wds.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/mkdocs.yml +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/noxfile.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/setup.cfg +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/__main__.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/asyn.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/cli/utils.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/client/__init__.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/client/azure.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/client/gcs.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/client/s3.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/config.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/error.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/fs/reference.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/fs/utils.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/func/__init__.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/func/array.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/func/base.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/func/conditional.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/func/numeric.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/func/path.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/func/random.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/func/string.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/func/window.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/job.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/clip.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/dc/__init__.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/dc/database.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/dc/records.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/hf.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/image.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/listing.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/settings.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/text.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/udf.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/utils.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/video.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/listing.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/model/__init__.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/model/bbox.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/model/pose.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/model/segment.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/model/utils.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/node.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/progress.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/py.typed +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/query/__init__.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/query/batch.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/query/dataset.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/query/metrics.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/query/params.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/query/queue.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/query/schema.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/query/session.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/query/udf.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/query/utils.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/remote/studio.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/script_meta.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/semver.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/types.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/utils.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/studio.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/telemetry.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain/utils.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain.egg-info/SOURCES.txt +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain.egg-info/requires.txt +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/__init__.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/data.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/examples/__init__.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/examples/test_examples.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/examples/wds_data.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/__init__.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/data/lena.jpg +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/functions/__init__.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/model/__init__.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_batching.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_catalog.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_client.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_data_storage.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_datasets.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_hf.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_image.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_ls.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_metastore.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_metrics.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_pytorch.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_query.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_read_database.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_session.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_video.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_warehouse.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/scripts/feature_class.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/test_cli_e2e.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/test_cli_studio.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/test_query_e2e.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/test_telemetry.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/__init__.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_udf.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/model/__init__.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_asyn.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_cache.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_catalog.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_client.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_config.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_listing.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_metastore.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_query.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_query_params.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_semver.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_serializer.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_utils.py +0 -0
- {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_warehouse.py +0 -0
|
@@ -48,7 +48,7 @@ def main():
|
|
|
48
48
|
|
|
49
49
|
# Print JSON schema in Pydantic format from main COCO annotation
|
|
50
50
|
chain = dc.read_storage(uri, anon="True").filter(dc.C("file.path").glob("*.json"))
|
|
51
|
-
file =
|
|
51
|
+
file = chain.limit(1).to_values("file")[0]
|
|
52
52
|
print(gen_datamodel_code(file, jmespath="@", model_name="Coco"))
|
|
53
53
|
|
|
54
54
|
# Static JSON schema test parsing 3/7 objects
|
|
@@ -47,7 +47,7 @@ def process_files_with_delta():
|
|
|
47
47
|
print("\nDataset versions:")
|
|
48
48
|
test_dataset = dc.datasets().filter(C("name") == "test_files")
|
|
49
49
|
|
|
50
|
-
for version in test_dataset.
|
|
50
|
+
for version in test_dataset.to_iter("version"):
|
|
51
51
|
print(f"- Version: {version}")
|
|
52
52
|
|
|
53
53
|
# Show the last 3 records to demonstrate the incremental processing
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
1
3
|
from huggingface_hub import InferenceClient
|
|
2
4
|
from requests import HTTPError
|
|
3
5
|
|
|
@@ -23,6 +25,7 @@ def eval_dialog(
|
|
|
23
25
|
) -> DialogEval:
|
|
24
26
|
try:
|
|
25
27
|
completion = client.chat_completion(
|
|
28
|
+
model="meta-llama/Llama-3.3-70B-Instruct",
|
|
26
29
|
messages=[
|
|
27
30
|
{
|
|
28
31
|
"role": "user",
|
|
@@ -31,9 +34,10 @@ def eval_dialog(
|
|
|
31
34
|
],
|
|
32
35
|
response_format={"type": "json", "value": DialogEval.model_json_schema()},
|
|
33
36
|
)
|
|
34
|
-
except HTTPError:
|
|
37
|
+
except HTTPError as e:
|
|
35
38
|
return DialogEval(
|
|
36
|
-
result="Error",
|
|
39
|
+
result="Error",
|
|
40
|
+
reason=f"Error while interacting with the Hugging Face API. {e}",
|
|
37
41
|
)
|
|
38
42
|
|
|
39
43
|
message = completion.choices[0].message
|
|
@@ -48,9 +52,15 @@ def eval_dialog(
|
|
|
48
52
|
# Save to HF as Parquet. Dataset can be previewed here:
|
|
49
53
|
# https://huggingface.co/datasets/dvcorg/test-datachain-llm-eval/viewer
|
|
50
54
|
(
|
|
51
|
-
dc.read_csv(
|
|
52
|
-
|
|
53
|
-
|
|
55
|
+
dc.read_csv(
|
|
56
|
+
"hf://datasets/infinite-dataset-hub/MobilePlanAssistant/data.csv", source=False
|
|
57
|
+
)
|
|
58
|
+
.settings(parallel=True)
|
|
59
|
+
.setup(
|
|
60
|
+
client=lambda: InferenceClient(
|
|
61
|
+
provider="hf-inference", api_key=os.environ["HF_TOKEN"]
|
|
62
|
+
)
|
|
63
|
+
)
|
|
54
64
|
.map(response=eval_dialog)
|
|
55
65
|
.to_parquet("hf://datasets/dvcorg/test-datachain-llm-eval/data.parquet")
|
|
56
66
|
)
|
|
@@ -221,7 +221,8 @@ ignore = [
|
|
|
221
221
|
"PERF203", # perflint - try-except-in-loop, irrelevant for Python>=3.11
|
|
222
222
|
"PERF401",
|
|
223
223
|
"D100", # undocumented-public-module
|
|
224
|
-
"D205" # one-blank-line-after-class
|
|
224
|
+
"D205", # one-blank-line-after-class
|
|
225
|
+
"PLC0415" # import-outside-top-level
|
|
225
226
|
]
|
|
226
227
|
select = [
|
|
227
228
|
"B", # flake8-bugbear
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
from datachain.lib import namespaces, projects
|
|
2
1
|
from datachain.lib.data_model import DataModel, DataType, is_chain_type
|
|
3
2
|
from datachain.lib.dc import (
|
|
4
3
|
C,
|
|
@@ -33,6 +32,7 @@ from datachain.lib.file import (
|
|
|
33
32
|
VideoFrame,
|
|
34
33
|
)
|
|
35
34
|
from datachain.lib.model_store import ModelStore
|
|
35
|
+
from datachain.lib.projects import create as create_project
|
|
36
36
|
from datachain.lib.udf import Aggregator, Generator, Mapper
|
|
37
37
|
from datachain.lib.utils import AbstractUDF, DataChainError
|
|
38
38
|
from datachain.query import metrics, param
|
|
@@ -63,14 +63,13 @@ __all__ = [
|
|
|
63
63
|
"VideoFile",
|
|
64
64
|
"VideoFragment",
|
|
65
65
|
"VideoFrame",
|
|
66
|
+
"create_project",
|
|
66
67
|
"datasets",
|
|
67
68
|
"delete_dataset",
|
|
68
69
|
"is_chain_type",
|
|
69
70
|
"listings",
|
|
70
71
|
"metrics",
|
|
71
|
-
"namespaces",
|
|
72
72
|
"param",
|
|
73
|
-
"projects",
|
|
74
73
|
"read_csv",
|
|
75
74
|
"read_database",
|
|
76
75
|
"read_dataset",
|
|
@@ -39,7 +39,7 @@ def temporary_cache(
|
|
|
39
39
|
cache.destroy()
|
|
40
40
|
|
|
41
41
|
|
|
42
|
-
class Cache:
|
|
42
|
+
class Cache: # noqa: PLW1641
|
|
43
43
|
def __init__(self, cache_dir: str, tmp_dir: str):
|
|
44
44
|
self.odb = LocalHashFileDB(
|
|
45
45
|
LocalFileSystem(),
|
|
@@ -76,9 +76,9 @@ class Cache:
|
|
|
76
76
|
async def download(
|
|
77
77
|
self, file: "File", client: "Client", callback: Optional[Callback] = None
|
|
78
78
|
) -> None:
|
|
79
|
-
from_path = f"{file.source}/{file.path}"
|
|
80
79
|
from dvc_objects.fs.utils import tmp_fname
|
|
81
80
|
|
|
81
|
+
from_path = file.get_uri()
|
|
82
82
|
odb_fs = self.odb.fs
|
|
83
83
|
tmp_info = odb_fs.join(self.odb.tmp_dir, tmp_fname()) # type: ignore[arg-type]
|
|
84
84
|
size = file.size
|
|
@@ -1491,13 +1491,13 @@ class Catalog:
|
|
|
1491
1491
|
|
|
1492
1492
|
namespace = self.metastore.create_namespace(
|
|
1493
1493
|
remote_ds.project.namespace.name,
|
|
1494
|
-
description=remote_ds.project.namespace.
|
|
1494
|
+
description=remote_ds.project.namespace.descr,
|
|
1495
1495
|
uuid=remote_ds.project.namespace.uuid,
|
|
1496
1496
|
)
|
|
1497
1497
|
project = self.metastore.create_project(
|
|
1498
|
-
remote_ds.project.name,
|
|
1499
1498
|
namespace.name,
|
|
1500
|
-
|
|
1499
|
+
remote_ds.project.name,
|
|
1500
|
+
description=remote_ds.project.descr,
|
|
1501
1501
|
uuid=remote_ds.project.uuid,
|
|
1502
1502
|
)
|
|
1503
1503
|
|
|
@@ -63,8 +63,8 @@ def ls_local(
|
|
|
63
63
|
print(format_ls_entry(entry))
|
|
64
64
|
else:
|
|
65
65
|
# Collect results in a list here to prevent interference from `tqdm` and `print`
|
|
66
|
-
listing =
|
|
67
|
-
for ls in listing:
|
|
66
|
+
listing = listings().to_list("listing")
|
|
67
|
+
for (ls,) in listing:
|
|
68
68
|
print(format_ls_entry(f"{ls.uri}@v{ls.version}")) # type: ignore[union-attr]
|
|
69
69
|
|
|
70
70
|
|
|
@@ -207,13 +207,14 @@ class Client(ABC):
|
|
|
207
207
|
)
|
|
208
208
|
|
|
209
209
|
async def get_current_etag(self, file: "File") -> str:
|
|
210
|
+
file_path = file.get_path_normalized()
|
|
210
211
|
kwargs = {}
|
|
211
212
|
if self._is_version_aware():
|
|
212
213
|
kwargs["version_id"] = file.version
|
|
213
214
|
info = await self.fs._info(
|
|
214
|
-
self.get_full_path(
|
|
215
|
+
self.get_full_path(file_path, file.version), **kwargs
|
|
215
216
|
)
|
|
216
|
-
return self.info_to_file(info,
|
|
217
|
+
return self.info_to_file(info, file_path).etag
|
|
217
218
|
|
|
218
219
|
def get_file_info(self, path: str, version_id: Optional[str] = None) -> "File":
|
|
219
220
|
info = self.fs.info(self.get_full_path(path, version_id), version_id=version_id)
|
|
@@ -386,7 +387,8 @@ class Client(ABC):
|
|
|
386
387
|
return open(cache_path, mode="rb")
|
|
387
388
|
assert not file.location
|
|
388
389
|
return FileWrapper(
|
|
389
|
-
self.fs.open(self.get_full_path(file.
|
|
390
|
+
self.fs.open(self.get_full_path(file.get_path_normalized(), file.version)),
|
|
391
|
+
cb,
|
|
390
392
|
) # type: ignore[return-value]
|
|
391
393
|
|
|
392
394
|
def upload(self, data: bytes, path: str) -> "File":
|
|
@@ -21,6 +21,9 @@ def _wrap_class(sync_fs_class):
|
|
|
21
21
|
asynchronous to False by default. This is similar to other Async FS
|
|
22
22
|
we initialize. E.g. it means we don't break things in Jupyter where code
|
|
23
23
|
run in async.
|
|
24
|
+
|
|
25
|
+
This also fixes write operations by ensuring they are properly forwarded
|
|
26
|
+
to the underlying filesystem without async buffering issues.
|
|
24
27
|
"""
|
|
25
28
|
from fsspec.implementations.asyn_wrapper import AsyncFileSystemWrapper
|
|
26
29
|
|
|
@@ -29,6 +32,13 @@ def _wrap_class(sync_fs_class):
|
|
|
29
32
|
sync_fs = sync_fs_class(*args, **kwargs)
|
|
30
33
|
super().__init__(sync_fs, asynchronous=False)
|
|
31
34
|
|
|
35
|
+
def open(self, path, mode="rb", **kwargs):
|
|
36
|
+
# Override open to ensure write operations work correctly.
|
|
37
|
+
# It seems to be a bug in the fsspec wrapper. It avoids
|
|
38
|
+
# wrapping open() explicitly but also doesn't redirect it to
|
|
39
|
+
# sync filesystem.
|
|
40
|
+
return self.sync_fs.open(path, mode, **kwargs)
|
|
41
|
+
|
|
32
42
|
GeneratedAsyncFileSystemWrapper.__name__ = f"Async{sync_fs_class.__name__}Wrapper"
|
|
33
43
|
return GeneratedAsyncFileSystemWrapper
|
|
34
44
|
|
|
@@ -99,7 +99,7 @@ class FileClient(Client):
|
|
|
99
99
|
)
|
|
100
100
|
|
|
101
101
|
async def get_current_etag(self, file: "File") -> str:
|
|
102
|
-
info = self.fs.info(self.get_full_path(file.
|
|
102
|
+
info = self.fs.info(self.get_full_path(file.get_path_normalized()))
|
|
103
103
|
return self.info_to_file(info, "").etag
|
|
104
104
|
|
|
105
105
|
async def get_size(self, path: str, version_id: Optional[str] = None) -> int:
|
|
@@ -138,8 +138,8 @@ class FileClient(Client):
|
|
|
138
138
|
if not self.use_symlinks:
|
|
139
139
|
super().fetch_nodes(nodes, shared_progress_bar)
|
|
140
140
|
|
|
141
|
-
def do_instantiate_object(self,
|
|
141
|
+
def do_instantiate_object(self, file: File, dst: str) -> None:
|
|
142
142
|
if self.use_symlinks:
|
|
143
|
-
os.symlink(Path(self.name,
|
|
143
|
+
os.symlink(Path(self.name, file.path), dst)
|
|
144
144
|
else:
|
|
145
|
-
super().do_instantiate_object(
|
|
145
|
+
super().do_instantiate_object(file, dst)
|
|
@@ -185,8 +185,8 @@ class AbstractMetastore(ABC, Serializable):
|
|
|
185
185
|
@abstractmethod
|
|
186
186
|
def create_project(
|
|
187
187
|
self,
|
|
188
|
-
name: str,
|
|
189
188
|
namespace_name: str,
|
|
189
|
+
name: str,
|
|
190
190
|
description: Optional[str] = None,
|
|
191
191
|
uuid: Optional[str] = None,
|
|
192
192
|
ignore_if_exists: bool = True,
|
|
@@ -195,8 +195,13 @@ class AbstractMetastore(ABC, Serializable):
|
|
|
195
195
|
"""Creates new project in specific namespace"""
|
|
196
196
|
|
|
197
197
|
@abstractmethod
|
|
198
|
-
def get_project(
|
|
199
|
-
|
|
198
|
+
def get_project(
|
|
199
|
+
self, name: str, namespace_name: str, create: bool = False, conn=None
|
|
200
|
+
) -> Project:
|
|
201
|
+
"""
|
|
202
|
+
Gets a single project inside some namespace by name.
|
|
203
|
+
It also creates project if not found and create flag is set to True.
|
|
204
|
+
"""
|
|
200
205
|
|
|
201
206
|
@abstractmethod
|
|
202
207
|
def list_projects(self, namespace_id: Optional[int], conn=None) -> list[Project]:
|
|
@@ -763,14 +768,18 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
763
768
|
|
|
764
769
|
def create_project(
|
|
765
770
|
self,
|
|
766
|
-
name: str,
|
|
767
771
|
namespace_name: str,
|
|
772
|
+
name: str,
|
|
768
773
|
description: Optional[str] = None,
|
|
769
774
|
uuid: Optional[str] = None,
|
|
770
775
|
ignore_if_exists: bool = True,
|
|
771
776
|
**kwargs,
|
|
772
777
|
) -> Project:
|
|
773
|
-
|
|
778
|
+
try:
|
|
779
|
+
namespace = self.get_namespace(namespace_name)
|
|
780
|
+
except NamespaceNotFoundError:
|
|
781
|
+
namespace = self.create_namespace(namespace_name)
|
|
782
|
+
|
|
774
783
|
query = self._projects_insert().values(
|
|
775
784
|
namespace_id=namespace.id,
|
|
776
785
|
uuid=uuid or str(uuid4()),
|
|
@@ -788,7 +797,9 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
788
797
|
|
|
789
798
|
return self.get_project(name, namespace.name)
|
|
790
799
|
|
|
791
|
-
def get_project(
|
|
800
|
+
def get_project(
|
|
801
|
+
self, name: str, namespace_name: str, create: bool = False, conn=None
|
|
802
|
+
) -> Project:
|
|
792
803
|
"""Gets a single project inside some namespace by name"""
|
|
793
804
|
n = self._namespaces
|
|
794
805
|
p = self._projects
|
|
@@ -803,6 +814,8 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
803
814
|
|
|
804
815
|
rows = list(self.db.execute(query, conn=conn))
|
|
805
816
|
if not rows:
|
|
817
|
+
if create:
|
|
818
|
+
return self.create_project(namespace_name, name)
|
|
806
819
|
raise ProjectNotFoundError(
|
|
807
820
|
f"Project {name} in namespace {namespace_name} not found."
|
|
808
821
|
)
|
|
@@ -469,10 +469,10 @@ class SQLiteMetastore(AbstractDBMetastore):
|
|
|
469
469
|
Studio dataset.
|
|
470
470
|
"""
|
|
471
471
|
system_namespace = self.create_namespace(Namespace.system(), "System namespace")
|
|
472
|
-
self.create_project(Project.listing(),
|
|
472
|
+
self.create_project(system_namespace.name, Project.listing(), "Listing project")
|
|
473
473
|
|
|
474
474
|
local_namespace = self.create_namespace(Namespace.default(), "Local namespace")
|
|
475
|
-
self.create_project(Project.default(),
|
|
475
|
+
self.create_project(local_namespace.name, Project.default(), "Local project")
|
|
476
476
|
|
|
477
477
|
def _check_schema_version(self) -> None:
|
|
478
478
|
"""
|
|
@@ -83,10 +83,11 @@ def parse_dataset_name(name: str) -> tuple[Optional[str], Optional[str], str]:
|
|
|
83
83
|
if not name:
|
|
84
84
|
raise ValueError("Name must be defined to parse it")
|
|
85
85
|
split = name.split(".")
|
|
86
|
-
|
|
87
|
-
|
|
86
|
+
name = split[-1]
|
|
87
|
+
project_name = split[-2] if len(split) > 1 else None
|
|
88
|
+
namespace_name = split[-3] if len(split) > 2 else None
|
|
88
89
|
|
|
89
|
-
return
|
|
90
|
+
return namespace_name, project_name, name
|
|
90
91
|
|
|
91
92
|
|
|
92
93
|
class DatasetDependencyType:
|
|
@@ -62,7 +62,7 @@ def _get_delta_chain(
|
|
|
62
62
|
)
|
|
63
63
|
|
|
64
64
|
# Calculate diff between source versions
|
|
65
|
-
return source_dc_latest.
|
|
65
|
+
return source_dc_latest.diff(source_dc, on=on, compare=compare, deleted=False)
|
|
66
66
|
|
|
67
67
|
|
|
68
68
|
def _get_retry_chain(
|
|
@@ -237,7 +237,7 @@ def delta_retry_update(
|
|
|
237
237
|
return None, None, False
|
|
238
238
|
|
|
239
239
|
latest_dataset = datachain.read_dataset(name, version=latest_version)
|
|
240
|
-
compared_chain = latest_dataset.
|
|
240
|
+
compared_chain = latest_dataset.diff(
|
|
241
241
|
processing_chain,
|
|
242
242
|
on=right_on or on,
|
|
243
243
|
added=True,
|
|
@@ -76,7 +76,7 @@ class ArrowGenerator(Generator):
|
|
|
76
76
|
fs_path = file.path
|
|
77
77
|
fs = ReferenceFileSystem({fs_path: [cache_path]})
|
|
78
78
|
else:
|
|
79
|
-
fs, fs_path = file.get_fs(), file.
|
|
79
|
+
fs, fs_path = file.get_fs(), file.get_fs_path()
|
|
80
80
|
|
|
81
81
|
kwargs = self.kwargs
|
|
82
82
|
if format := kwargs.get("format"):
|
|
@@ -160,8 +160,8 @@ def infer_schema(chain: "DataChain", **kwargs) -> pa.Schema:
|
|
|
160
160
|
kwargs["format"] = fix_pyarrow_format(format, parse_options)
|
|
161
161
|
|
|
162
162
|
schemas = []
|
|
163
|
-
for file in chain.
|
|
164
|
-
ds = dataset(file.
|
|
163
|
+
for (file,) in chain.to_iter("file"):
|
|
164
|
+
ds = dataset(file.get_fs_path(), filesystem=file.get_fs(), **kwargs) # type: ignore[union-attr]
|
|
165
165
|
schemas.append(ds.schema)
|
|
166
166
|
if not schemas:
|
|
167
167
|
raise ValueError(
|
|
@@ -22,8 +22,8 @@ if TYPE_CHECKING:
|
|
|
22
22
|
|
|
23
23
|
class DatasetInfo(DataModel):
|
|
24
24
|
name: str
|
|
25
|
-
|
|
26
|
-
|
|
25
|
+
namespace: str
|
|
26
|
+
project: str
|
|
27
27
|
uuid: str = Field(default=str(uuid4()))
|
|
28
28
|
version: str = Field(default=DEFAULT_DATASET_VERSION)
|
|
29
29
|
status: int = Field(default=DatasetStatus.CREATED)
|
|
@@ -93,8 +93,8 @@ class DatasetInfo(DataModel):
|
|
|
93
93
|
return cls(
|
|
94
94
|
uuid=version.uuid,
|
|
95
95
|
name=dataset.name,
|
|
96
|
-
|
|
97
|
-
|
|
96
|
+
namespace=dataset.project.namespace.name,
|
|
97
|
+
project=dataset.project.name,
|
|
98
98
|
version=version.version,
|
|
99
99
|
status=version.status,
|
|
100
100
|
created_at=version.created_at,
|