datachain 0.32.2__tar.gz → 0.33.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.32.2 → datachain-0.33.0}/.pre-commit-config.yaml +1 -1
- {datachain-0.32.2 → datachain-0.33.0}/PKG-INFO +2 -1
- {datachain-0.32.2 → datachain-0.33.0}/pyproject.toml +1 -0
- datachain-0.33.0/src/datachain/checkpoint.py +44 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/client/fsspec.py +6 -1
- datachain-0.33.0/src/datachain/client/http.py +157 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/data_storage/metastore.py +153 -24
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/data_storage/schema.py +1 -1
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/data_storage/sqlite.py +8 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/error.py +4 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/dc/datachain.py +13 -1
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/file.py +14 -6
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain.egg-info/PKG-INFO +2 -1
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain.egg-info/SOURCES.txt +3 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain.egg-info/requires.txt +1 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_datachain.py +12 -0
- datachain-0.33.0/tests/unit/test_client_http.py +186 -0
- {datachain-0.32.2 → datachain-0.33.0}/.cruft.json +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/.gitattributes +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/.github/codecov.yaml +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/.github/dependabot.yml +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/.github/workflows/release.yml +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/.github/workflows/tests.yml +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/.gitignore +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/LICENSE +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/README.rst +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/api_hooks.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/assets/datachain.svg +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/assets/webhook_dialog.png +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/assets/webhook_list.png +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/commands/auth/login.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/commands/auth/logout.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/commands/auth/team.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/commands/auth/token.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/commands/index.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/commands/job/cancel.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/commands/job/clusters.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/commands/job/logs.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/commands/job/ls.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/commands/job/run.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/contributing.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/examples.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/guide/db_migrations.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/guide/delta.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/guide/env.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/guide/index.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/guide/namespaces.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/guide/processing.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/guide/remotes.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/guide/retry.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/index.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/overrides/main.html +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/quick-start.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/references/data-types/file.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/references/data-types/index.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/references/data-types/pose.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/references/data-types/segment.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/references/datachain.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/references/func.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/references/functions/aggregate.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/references/functions/array.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/references/functions/conditional.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/references/functions/numeric.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/references/functions/path.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/references/functions/random.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/references/functions/string.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/references/functions/window.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/references/index.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/references/toolkit.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/references/torch.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/references/udf.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/studio/api/.gitkeep +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/studio/webhooks.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/templates/main.dot +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/templates/operation.dot +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/templates/responses.def +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/docs/tutorials.md +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/examples/get_started/nested_datamodel.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/examples/incremental_processing/delta.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/examples/incremental_processing/retry.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/examples/incremental_processing/utils.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/examples/multimodal/audio-to-text.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/examples/multimodal/wds.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/mkdocs.yml +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/noxfile.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/setup.cfg +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/__init__.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/__main__.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/asyn.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/cache.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/catalog/catalog.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/cli/utils.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/client/__init__.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/client/azure.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/client/gcs.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/client/hf.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/client/local.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/client/s3.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/config.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/dataset.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/delta.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/fs/reference.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/fs/utils.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/func/__init__.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/func/array.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/func/base.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/func/conditional.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/func/func.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/func/numeric.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/func/path.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/func/random.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/func/string.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/func/window.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/job.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/audio.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/clip.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/dc/__init__.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/dc/database.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/dc/datasets.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/dc/records.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/dc/storage.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/dc/storage_pattern.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/hf.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/image.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/listing.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/namespaces.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/projects.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/settings.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/tar.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/text.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/udf.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/utils.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/video.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/listing.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/model/__init__.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/model/bbox.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/model/pose.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/model/segment.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/model/utils.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/namespace.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/node.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/progress.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/project.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/py.typed +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/query/__init__.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/query/batch.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/query/dataset.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/query/metrics.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/query/params.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/query/queue.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/query/schema.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/query/session.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/query/udf.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/query/utils.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/remote/studio.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/script_meta.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/semver.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/sql/postgresql_dialect.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/sql/postgresql_types.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/sql/types.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/sql/utils.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/studio.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/telemetry.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain/utils.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/__init__.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/conftest.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/data.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/examples/__init__.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/examples/test_examples.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/examples/wds_data.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/__init__.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/data/lena.jpg +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/functions/__init__.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/functions/test_aggregate.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/functions/test_array.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/functions/test_conditional.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/functions/test_numeric.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/functions/test_path.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/functions/test_random.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/functions/test_string.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/model/__init__.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_audio.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_batching.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_catalog.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_client.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_data_storage.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_datasets.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_delta.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_file.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_hf.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_image.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_listing.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_ls.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_metastore.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_metrics.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_mutate.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_pull.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_pytorch.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_query.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_read_database.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_read_dataset_remote.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_read_dataset_version_specifiers.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_retry.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_session.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_storage_pattern.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_studio_datetime_parsing.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_to_database.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_toolkit.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_video.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/func/test_warehouse.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/scripts/feature_class.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/test_atomicity.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/test_cli_e2e.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/test_cli_studio.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/test_import_time.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/test_query_e2e.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/test_telemetry.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/__init__.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/lib/test_audio.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/lib/test_datachain.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/lib/test_namespace.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/lib/test_partition_by.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/lib/test_project.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/lib/test_settings.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/lib/test_storage_pattern.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/lib/test_udf.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/model/__init__.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/test_asyn.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/test_cache.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/test_catalog.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/test_cli_datasets.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/test_client.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/test_config.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/test_dataset.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/test_func.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/test_listing.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/test_metastore.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/test_query.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/test_query_params.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/test_semver.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/test_serializer.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/test_session.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/test_utils.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.32.2 → datachain-0.33.0}/tests/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.33.0
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -86,6 +86,7 @@ Requires-Dist: psycopg2-binary>=2.9.0; extra == "postgres"
|
|
|
86
86
|
Provides-Extra: tests
|
|
87
87
|
Requires-Dist: datachain[audio,hf,postgres,remote,torch,vector,video]; extra == "tests"
|
|
88
88
|
Requires-Dist: pytest<9,>=8; extra == "tests"
|
|
89
|
+
Requires-Dist: pytest-asyncio; extra == "tests"
|
|
89
90
|
Requires-Dist: pytest-sugar>=0.9.6; extra == "tests"
|
|
90
91
|
Requires-Dist: pytest-cov>=4.1.0; extra == "tests"
|
|
91
92
|
Requires-Dist: pytest-mock>=3.12.0; extra == "tests"
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import uuid
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from typing import Union
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class Checkpoint:
|
|
9
|
+
"""
|
|
10
|
+
Represents a checkpoint within a job run.
|
|
11
|
+
|
|
12
|
+
A checkpoint marks a successfully completed stage of execution. In the event
|
|
13
|
+
of a failure, the job can resume from the most recent checkpoint rather than
|
|
14
|
+
starting over from the beginning.
|
|
15
|
+
|
|
16
|
+
Checkpoints can also be created in a "partial" mode, which indicates that the
|
|
17
|
+
work at this stage was only partially completed. For example, if a failure
|
|
18
|
+
occurs halfway through running a UDF, already computed results can still be
|
|
19
|
+
saved, allowing the job to resume from that partially completed state on
|
|
20
|
+
restart.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
id: str
|
|
24
|
+
job_id: str
|
|
25
|
+
hash: str
|
|
26
|
+
partial: bool
|
|
27
|
+
created_at: datetime
|
|
28
|
+
|
|
29
|
+
@classmethod
|
|
30
|
+
def parse(
|
|
31
|
+
cls,
|
|
32
|
+
id: Union[str, uuid.UUID],
|
|
33
|
+
job_id: str,
|
|
34
|
+
_hash: str,
|
|
35
|
+
partial: bool,
|
|
36
|
+
created_at: datetime,
|
|
37
|
+
) -> "Checkpoint":
|
|
38
|
+
return cls(
|
|
39
|
+
str(id),
|
|
40
|
+
job_id,
|
|
41
|
+
_hash,
|
|
42
|
+
bool(partial),
|
|
43
|
+
created_at,
|
|
44
|
+
)
|
|
@@ -93,10 +93,11 @@ class Client(ABC):
|
|
|
93
93
|
self.uri = self.get_uri(self.name)
|
|
94
94
|
|
|
95
95
|
@staticmethod
|
|
96
|
-
def get_implementation(url: Union[str, os.PathLike[str]]) -> type["Client"]:
|
|
96
|
+
def get_implementation(url: Union[str, os.PathLike[str]]) -> type["Client"]: # noqa: PLR0911
|
|
97
97
|
from .azure import AzureClient
|
|
98
98
|
from .gcs import GCSClient
|
|
99
99
|
from .hf import HfClient
|
|
100
|
+
from .http import HTTPClient, HTTPSClient
|
|
100
101
|
from .local import FileClient
|
|
101
102
|
from .s3 import ClientS3
|
|
102
103
|
|
|
@@ -114,6 +115,10 @@ class Client(ABC):
|
|
|
114
115
|
return FileClient
|
|
115
116
|
if protocol == HfClient.protocol:
|
|
116
117
|
return HfClient
|
|
118
|
+
if protocol == HTTPClient.protocol:
|
|
119
|
+
return HTTPClient
|
|
120
|
+
if protocol == HTTPSClient.protocol:
|
|
121
|
+
return HTTPSClient
|
|
117
122
|
|
|
118
123
|
raise NotImplementedError(f"Unsupported protocol: {protocol}")
|
|
119
124
|
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
from datetime import datetime, timezone
|
|
2
|
+
from typing import TYPE_CHECKING, Any, ClassVar, Optional, cast
|
|
3
|
+
from urllib.parse import urlparse
|
|
4
|
+
|
|
5
|
+
from fsspec.implementations.http import HTTPFileSystem
|
|
6
|
+
|
|
7
|
+
from datachain.dataset import StorageURI
|
|
8
|
+
from datachain.lib.file import File
|
|
9
|
+
|
|
10
|
+
from .fsspec import Client
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from datachain.cache import Cache
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class HTTPClient(Client):
|
|
17
|
+
FS_CLASS = HTTPFileSystem
|
|
18
|
+
PREFIX: ClassVar[str] = "http://"
|
|
19
|
+
protocol: ClassVar[str] = "http"
|
|
20
|
+
|
|
21
|
+
@classmethod
|
|
22
|
+
def create_fs(cls, **kwargs) -> HTTPFileSystem:
|
|
23
|
+
# Configure HTTPFileSystem options
|
|
24
|
+
kwargs.setdefault("simple_links", True)
|
|
25
|
+
kwargs.setdefault("same_scheme", True)
|
|
26
|
+
kwargs.setdefault("cache_type", "bytes")
|
|
27
|
+
|
|
28
|
+
kwargs.pop("version_aware", None)
|
|
29
|
+
|
|
30
|
+
fs = cls.FS_CLASS(**kwargs)
|
|
31
|
+
fs.invalidate_cache()
|
|
32
|
+
return cast("HTTPFileSystem", fs)
|
|
33
|
+
|
|
34
|
+
@classmethod
|
|
35
|
+
def from_name(
|
|
36
|
+
cls,
|
|
37
|
+
name: str,
|
|
38
|
+
cache: "Cache",
|
|
39
|
+
kwargs: dict[str, Any],
|
|
40
|
+
) -> "HTTPClient":
|
|
41
|
+
parsed = urlparse(name)
|
|
42
|
+
|
|
43
|
+
if parsed.scheme:
|
|
44
|
+
name = parsed.netloc + parsed.path
|
|
45
|
+
|
|
46
|
+
return cls(name, kwargs, cache)
|
|
47
|
+
|
|
48
|
+
@classmethod
|
|
49
|
+
def split_url(cls, url: str) -> tuple[str, str]:
|
|
50
|
+
"""Split HTTP/HTTPS URL into domain (bucket equivalent) and path."""
|
|
51
|
+
parsed = urlparse(url)
|
|
52
|
+
domain = parsed.netloc
|
|
53
|
+
path = parsed.path.lstrip("/")
|
|
54
|
+
|
|
55
|
+
if parsed.query:
|
|
56
|
+
path += f"?{parsed.query}"
|
|
57
|
+
if parsed.fragment:
|
|
58
|
+
path += f"#{parsed.fragment}"
|
|
59
|
+
|
|
60
|
+
return domain, path
|
|
61
|
+
|
|
62
|
+
@classmethod
|
|
63
|
+
def get_uri(cls, name: str) -> "StorageURI":
|
|
64
|
+
if not name.startswith(("http://", "https://")):
|
|
65
|
+
return StorageURI(f"{cls.PREFIX}{name}")
|
|
66
|
+
return StorageURI(name)
|
|
67
|
+
|
|
68
|
+
@classmethod
|
|
69
|
+
def is_root_url(cls, url: str) -> bool:
|
|
70
|
+
parsed = urlparse(url)
|
|
71
|
+
return parsed.path in ("", "/") and not parsed.query and not parsed.fragment
|
|
72
|
+
|
|
73
|
+
def get_full_path(self, rel_path: str, version_id: Optional[str] = None) -> str:
|
|
74
|
+
if self.name.startswith(("http://", "https://")):
|
|
75
|
+
base_url = self.name
|
|
76
|
+
else:
|
|
77
|
+
if rel_path and "/" in rel_path:
|
|
78
|
+
first_part = rel_path.split("/")[0]
|
|
79
|
+
if "." in first_part and not first_part.startswith("."):
|
|
80
|
+
return f"{self.protocol}://{rel_path}"
|
|
81
|
+
|
|
82
|
+
base_url = f"{self.protocol}://{self.name}"
|
|
83
|
+
|
|
84
|
+
if rel_path:
|
|
85
|
+
if not base_url.endswith("/") and not rel_path.startswith("/"):
|
|
86
|
+
base_url += "/"
|
|
87
|
+
full_url = base_url + rel_path
|
|
88
|
+
else:
|
|
89
|
+
full_url = base_url
|
|
90
|
+
|
|
91
|
+
return full_url
|
|
92
|
+
|
|
93
|
+
def url(self, path: str, expires: int = 3600, **kwargs) -> str:
|
|
94
|
+
"""
|
|
95
|
+
Generate URL for the given path.
|
|
96
|
+
Note: HTTP URLs don't support signed/expiring URLs.
|
|
97
|
+
"""
|
|
98
|
+
return self.get_full_path(path, kwargs.pop("version_id", None))
|
|
99
|
+
|
|
100
|
+
def info_to_file(self, v: dict[str, Any], path: str) -> File:
|
|
101
|
+
etag = v.get("ETag", "").strip('"')
|
|
102
|
+
last_modified = v.get("last_modified")
|
|
103
|
+
if last_modified:
|
|
104
|
+
if isinstance(last_modified, str):
|
|
105
|
+
try:
|
|
106
|
+
from email.utils import parsedate_to_datetime
|
|
107
|
+
|
|
108
|
+
last_modified = parsedate_to_datetime(last_modified)
|
|
109
|
+
except (ValueError, TypeError):
|
|
110
|
+
last_modified = datetime.now(timezone.utc)
|
|
111
|
+
elif isinstance(last_modified, (int, float)):
|
|
112
|
+
last_modified = datetime.fromtimestamp(last_modified, timezone.utc)
|
|
113
|
+
else:
|
|
114
|
+
last_modified = datetime.now(timezone.utc)
|
|
115
|
+
|
|
116
|
+
return File(
|
|
117
|
+
source=self.uri,
|
|
118
|
+
path=path,
|
|
119
|
+
size=v.get("size", 0),
|
|
120
|
+
etag=etag,
|
|
121
|
+
version="",
|
|
122
|
+
is_latest=True,
|
|
123
|
+
last_modified=last_modified,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
def upload(self, data: bytes, path: str) -> "File":
|
|
127
|
+
raise NotImplementedError(
|
|
128
|
+
"HTTP/HTTPS client is read-only. Upload operations are not supported."
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
def get_file_info(self, path: str, version_id: Optional[str] = None) -> "File":
|
|
132
|
+
info = self.fs.info(self.get_full_path(path))
|
|
133
|
+
return self.info_to_file(info, path)
|
|
134
|
+
|
|
135
|
+
def open_object(self, file: "File", use_cache: bool = True, cb=None):
|
|
136
|
+
from datachain.client.fileslice import FileWrapper
|
|
137
|
+
|
|
138
|
+
if use_cache and (cache_path := self.cache.get_path(file)):
|
|
139
|
+
return open(cache_path, mode="rb")
|
|
140
|
+
|
|
141
|
+
assert not file.location
|
|
142
|
+
return FileWrapper(
|
|
143
|
+
self.fs.open(self.get_full_path(file.get_path_normalized())),
|
|
144
|
+
cb or (lambda x: None),
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
async def get_file(self, lpath, rpath, callback, version_id: Optional[str] = None):
|
|
148
|
+
return await self.fs._get_file(lpath, rpath, callback=callback)
|
|
149
|
+
|
|
150
|
+
async def _fetch_dir(self, prefix: str, pbar, result_queue) -> set[str]:
|
|
151
|
+
full_url = self.get_full_path(prefix)
|
|
152
|
+
raise NotImplementedError(f"Cannot download file from {full_url}")
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
class HTTPSClient(HTTPClient):
|
|
156
|
+
protocol = "https"
|
|
157
|
+
PREFIX = "https://"
|
|
@@ -13,6 +13,7 @@ from uuid import uuid4
|
|
|
13
13
|
from sqlalchemy import (
|
|
14
14
|
JSON,
|
|
15
15
|
BigInteger,
|
|
16
|
+
Boolean,
|
|
16
17
|
Column,
|
|
17
18
|
DateTime,
|
|
18
19
|
ForeignKey,
|
|
@@ -24,6 +25,7 @@ from sqlalchemy import (
|
|
|
24
25
|
)
|
|
25
26
|
from sqlalchemy.sql import func as f
|
|
26
27
|
|
|
28
|
+
from datachain.checkpoint import Checkpoint
|
|
27
29
|
from datachain.data_storage import JobQueryType, JobStatus
|
|
28
30
|
from datachain.data_storage.serializer import Serializable
|
|
29
31
|
from datachain.dataset import (
|
|
@@ -36,6 +38,7 @@ from datachain.dataset import (
|
|
|
36
38
|
StorageURI,
|
|
37
39
|
)
|
|
38
40
|
from datachain.error import (
|
|
41
|
+
CheckpointNotFoundError,
|
|
39
42
|
DatasetNotFoundError,
|
|
40
43
|
DatasetVersionNotFoundError,
|
|
41
44
|
NamespaceDeleteNotAllowedError,
|
|
@@ -75,6 +78,7 @@ class AbstractMetastore(ABC, Serializable):
|
|
|
75
78
|
dataset_list_version_class: type[DatasetListVersion] = DatasetListVersion
|
|
76
79
|
dependency_class: type[DatasetDependency] = DatasetDependency
|
|
77
80
|
job_class: type[Job] = Job
|
|
81
|
+
checkpoint_class: type[Checkpoint] = Checkpoint
|
|
78
82
|
|
|
79
83
|
def __init__(
|
|
80
84
|
self,
|
|
@@ -431,6 +435,35 @@ class AbstractMetastore(ABC, Serializable):
|
|
|
431
435
|
def get_job_status(self, job_id: str) -> Optional[JobStatus]:
|
|
432
436
|
"""Returns the status of the given job."""
|
|
433
437
|
|
|
438
|
+
#
|
|
439
|
+
# Checkpoints
|
|
440
|
+
#
|
|
441
|
+
|
|
442
|
+
@abstractmethod
|
|
443
|
+
def list_checkpoints(self, job_id: str, conn=None) -> Iterator["Checkpoint"]:
|
|
444
|
+
"""Returns all checkpoints related to some job"""
|
|
445
|
+
|
|
446
|
+
@abstractmethod
|
|
447
|
+
def get_checkpoint_by_id(self, checkpoint_id: str, conn=None) -> Checkpoint:
|
|
448
|
+
"""Gets single checkpoint by id"""
|
|
449
|
+
|
|
450
|
+
def find_checkpoint(
|
|
451
|
+
self, job_id: str, _hash: str, partial: bool = False, conn=None
|
|
452
|
+
) -> Optional[Checkpoint]:
|
|
453
|
+
"""
|
|
454
|
+
Tries to find checkpoint for a job with specific hash and optionally partial
|
|
455
|
+
"""
|
|
456
|
+
|
|
457
|
+
@abstractmethod
|
|
458
|
+
def create_checkpoint(
|
|
459
|
+
self,
|
|
460
|
+
job_id: str,
|
|
461
|
+
_hash: str,
|
|
462
|
+
partial: bool = False,
|
|
463
|
+
conn: Optional[Any] = None,
|
|
464
|
+
) -> Checkpoint:
|
|
465
|
+
"""Creates new checkpoint"""
|
|
466
|
+
|
|
434
467
|
|
|
435
468
|
class AbstractDBMetastore(AbstractMetastore):
|
|
436
469
|
"""
|
|
@@ -446,6 +479,7 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
446
479
|
DATASET_VERSION_TABLE = "datasets_versions"
|
|
447
480
|
DATASET_DEPENDENCY_TABLE = "datasets_dependencies"
|
|
448
481
|
JOBS_TABLE = "jobs"
|
|
482
|
+
CHECKPOINTS_TABLE = "checkpoints"
|
|
449
483
|
|
|
450
484
|
db: "DatabaseEngine"
|
|
451
485
|
|
|
@@ -689,9 +723,6 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
689
723
|
return self._projects.select()
|
|
690
724
|
return select(*columns)
|
|
691
725
|
|
|
692
|
-
def _projects_update(self) -> "Update":
|
|
693
|
-
return self._projects.update()
|
|
694
|
-
|
|
695
726
|
def _projects_delete(self) -> "Delete":
|
|
696
727
|
return self._projects.delete()
|
|
697
728
|
|
|
@@ -839,6 +870,16 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
839
870
|
|
|
840
871
|
return self.get_project(name, namespace.name)
|
|
841
872
|
|
|
873
|
+
def _projects_base_query(self) -> "Select":
|
|
874
|
+
n = self._namespaces
|
|
875
|
+
p = self._projects
|
|
876
|
+
|
|
877
|
+
query = self._projects_select(
|
|
878
|
+
*(getattr(n.c, f) for f in self._namespaces_fields),
|
|
879
|
+
*(getattr(p.c, f) for f in self._projects_fields),
|
|
880
|
+
)
|
|
881
|
+
return query.select_from(n.join(p, n.c.id == p.c.namespace_id))
|
|
882
|
+
|
|
842
883
|
def get_project(
|
|
843
884
|
self, name: str, namespace_name: str, create: bool = False, conn=None
|
|
844
885
|
) -> Project:
|
|
@@ -854,11 +895,7 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
854
895
|
create = True
|
|
855
896
|
validate = False
|
|
856
897
|
|
|
857
|
-
query = self.
|
|
858
|
-
*(getattr(n.c, f) for f in self._namespaces_fields),
|
|
859
|
-
*(getattr(p.c, f) for f in self._projects_fields),
|
|
860
|
-
)
|
|
861
|
-
query = query.select_from(n.join(p, n.c.id == p.c.namespace_id)).where(
|
|
898
|
+
query = self._projects_base_query().where(
|
|
862
899
|
p.c.name == name, n.c.name == namespace_name
|
|
863
900
|
)
|
|
864
901
|
|
|
@@ -873,16 +910,9 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
873
910
|
|
|
874
911
|
def get_project_by_id(self, project_id: int, conn=None) -> Project:
|
|
875
912
|
"""Gets a single project by id"""
|
|
876
|
-
n = self._namespaces
|
|
877
913
|
p = self._projects
|
|
878
914
|
|
|
879
|
-
query = self.
|
|
880
|
-
*(getattr(n.c, f) for f in self._namespaces_fields),
|
|
881
|
-
*(getattr(p.c, f) for f in self._projects_fields),
|
|
882
|
-
)
|
|
883
|
-
query = query.select_from(n.join(p, n.c.id == p.c.namespace_id)).where(
|
|
884
|
-
p.c.id == project_id
|
|
885
|
-
)
|
|
915
|
+
query = self._projects_base_query().where(p.c.id == project_id)
|
|
886
916
|
|
|
887
917
|
rows = list(self.db.execute(query, conn=conn))
|
|
888
918
|
if not rows:
|
|
@@ -891,7 +921,8 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
891
921
|
|
|
892
922
|
def count_projects(self, namespace_id: Optional[int] = None) -> int:
|
|
893
923
|
p = self._projects
|
|
894
|
-
|
|
924
|
+
|
|
925
|
+
query = self._projects_base_query()
|
|
895
926
|
if namespace_id:
|
|
896
927
|
query = query.where(p.c.namespace_id == namespace_id)
|
|
897
928
|
|
|
@@ -917,17 +948,12 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
917
948
|
"""
|
|
918
949
|
Gets a list of projects inside some namespace, or in all namespaces
|
|
919
950
|
"""
|
|
920
|
-
n = self._namespaces
|
|
921
951
|
p = self._projects
|
|
922
952
|
|
|
923
|
-
query = self.
|
|
924
|
-
*(getattr(n.c, f) for f in self._namespaces_fields),
|
|
925
|
-
*(getattr(p.c, f) for f in self._projects_fields),
|
|
926
|
-
)
|
|
927
|
-
query = query.select_from(n.join(p, n.c.id == p.c.namespace_id))
|
|
953
|
+
query = self._projects_base_query()
|
|
928
954
|
|
|
929
955
|
if namespace_id:
|
|
930
|
-
query = query.where(
|
|
956
|
+
query = query.where(p.c.namespace_id == namespace_id)
|
|
931
957
|
|
|
932
958
|
rows = list(self.db.execute(query, conn=conn))
|
|
933
959
|
|
|
@@ -1671,3 +1697,106 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
1671
1697
|
if not results:
|
|
1672
1698
|
return None
|
|
1673
1699
|
return results[0][0]
|
|
1700
|
+
|
|
1701
|
+
#
|
|
1702
|
+
# Checkpoints
|
|
1703
|
+
#
|
|
1704
|
+
|
|
1705
|
+
@staticmethod
|
|
1706
|
+
def _checkpoints_columns() -> "list[SchemaItem]":
|
|
1707
|
+
return [
|
|
1708
|
+
Column(
|
|
1709
|
+
"id",
|
|
1710
|
+
Text,
|
|
1711
|
+
default=uuid4,
|
|
1712
|
+
primary_key=True,
|
|
1713
|
+
nullable=False,
|
|
1714
|
+
),
|
|
1715
|
+
Column("job_id", Text, nullable=True),
|
|
1716
|
+
Column("hash", Text, nullable=False),
|
|
1717
|
+
Column("partial", Boolean, default=False),
|
|
1718
|
+
Column("created_at", DateTime(timezone=True), nullable=False),
|
|
1719
|
+
UniqueConstraint("job_id", "hash"),
|
|
1720
|
+
]
|
|
1721
|
+
|
|
1722
|
+
@cached_property
|
|
1723
|
+
def _checkpoints_fields(self) -> list[str]:
|
|
1724
|
+
return [c.name for c in self._checkpoints_columns() if c.name] # type: ignore[attr-defined]
|
|
1725
|
+
|
|
1726
|
+
@cached_property
|
|
1727
|
+
def _checkpoints(self) -> "Table":
|
|
1728
|
+
return Table(
|
|
1729
|
+
self.CHECKPOINTS_TABLE,
|
|
1730
|
+
self.db.metadata,
|
|
1731
|
+
*self._checkpoints_columns(),
|
|
1732
|
+
)
|
|
1733
|
+
|
|
1734
|
+
@abstractmethod
|
|
1735
|
+
def _checkpoints_insert(self) -> "Insert": ...
|
|
1736
|
+
|
|
1737
|
+
def _checkpoints_select(self, *columns) -> "Select":
|
|
1738
|
+
if not columns:
|
|
1739
|
+
return self._checkpoints.select()
|
|
1740
|
+
return select(*columns)
|
|
1741
|
+
|
|
1742
|
+
def _checkpoints_delete(self) -> "Delete":
|
|
1743
|
+
return self._checkpoints.delete()
|
|
1744
|
+
|
|
1745
|
+
def _checkpoints_query(self):
|
|
1746
|
+
return self._checkpoints_select(
|
|
1747
|
+
*[getattr(self._checkpoints.c, f) for f in self._checkpoints_fields]
|
|
1748
|
+
)
|
|
1749
|
+
|
|
1750
|
+
def create_checkpoint(
|
|
1751
|
+
self,
|
|
1752
|
+
job_id: str,
|
|
1753
|
+
_hash: str,
|
|
1754
|
+
partial: bool = False,
|
|
1755
|
+
conn: Optional[Any] = None,
|
|
1756
|
+
) -> Checkpoint:
|
|
1757
|
+
"""
|
|
1758
|
+
Creates a new job query step.
|
|
1759
|
+
"""
|
|
1760
|
+
checkpoint_id = str(uuid4())
|
|
1761
|
+
self.db.execute(
|
|
1762
|
+
self._checkpoints_insert().values(
|
|
1763
|
+
id=checkpoint_id,
|
|
1764
|
+
job_id=job_id,
|
|
1765
|
+
hash=_hash,
|
|
1766
|
+
partial=partial,
|
|
1767
|
+
created_at=datetime.now(timezone.utc),
|
|
1768
|
+
),
|
|
1769
|
+
conn=conn,
|
|
1770
|
+
)
|
|
1771
|
+
return self.get_checkpoint_by_id(checkpoint_id)
|
|
1772
|
+
|
|
1773
|
+
def list_checkpoints(self, job_id: str, conn=None) -> Iterator["Checkpoint"]:
|
|
1774
|
+
"""List checkpoints by job id."""
|
|
1775
|
+
query = self._checkpoints_query().where(self._checkpoints.c.job_id == job_id)
|
|
1776
|
+
rows = list(self.db.execute(query, conn=conn))
|
|
1777
|
+
|
|
1778
|
+
yield from [self.checkpoint_class.parse(*r) for r in rows]
|
|
1779
|
+
|
|
1780
|
+
def get_checkpoint_by_id(self, checkpoint_id: str, conn=None) -> Checkpoint:
|
|
1781
|
+
"""Returns the checkpoint with the given ID."""
|
|
1782
|
+
ch = self._checkpoints
|
|
1783
|
+
query = self._checkpoints_select(ch).where(ch.c.id == checkpoint_id)
|
|
1784
|
+
rows = list(self.db.execute(query, conn=conn))
|
|
1785
|
+
if not rows:
|
|
1786
|
+
raise CheckpointNotFoundError(f"Checkpoint {checkpoint_id} not found")
|
|
1787
|
+
return self.checkpoint_class.parse(*rows[0])
|
|
1788
|
+
|
|
1789
|
+
def find_checkpoint(
|
|
1790
|
+
self, job_id: str, _hash: str, partial: bool = False, conn=None
|
|
1791
|
+
) -> Optional[Checkpoint]:
|
|
1792
|
+
"""
|
|
1793
|
+
Tries to find checkpoint for a job with specific hash and optionally partial
|
|
1794
|
+
"""
|
|
1795
|
+
ch = self._checkpoints
|
|
1796
|
+
query = self._checkpoints_select(ch).where(
|
|
1797
|
+
ch.c.job_id == job_id, ch.c.hash == _hash, ch.c.partial == partial
|
|
1798
|
+
)
|
|
1799
|
+
rows = list(self.db.execute(query, conn=conn))
|
|
1800
|
+
if not rows:
|
|
1801
|
+
return None
|
|
1802
|
+
return self.checkpoint_class.parse(*rows[0])
|
|
@@ -51,7 +51,7 @@ def dedup_columns(columns: Iterable[sa.Column]) -> list[sa.Column]:
|
|
|
51
51
|
"""
|
|
52
52
|
c_set: dict[str, sa.Column] = {}
|
|
53
53
|
for c in columns:
|
|
54
|
-
if (ec := c_set.get(c.name
|
|
54
|
+
if (ec := c_set.get(c.name)) is not None:
|
|
55
55
|
if str(ec.type) != str(c.type):
|
|
56
56
|
raise ValueError(
|
|
57
57
|
f"conflicting types for column {c.name}:{c.type!s} and {ec.type!s}"
|
|
@@ -459,6 +459,8 @@ class SQLiteMetastore(AbstractDBMetastore):
|
|
|
459
459
|
self.default_table_names.append(self._datasets_dependencies.name)
|
|
460
460
|
self.db.create_table(self._jobs, if_not_exists=True)
|
|
461
461
|
self.default_table_names.append(self._jobs.name)
|
|
462
|
+
self.db.create_table(self._checkpoints, if_not_exists=True)
|
|
463
|
+
self.default_table_names.append(self._checkpoints.name)
|
|
462
464
|
|
|
463
465
|
def _init_namespaces_projects(self) -> None:
|
|
464
466
|
"""
|
|
@@ -543,6 +545,12 @@ class SQLiteMetastore(AbstractDBMetastore):
|
|
|
543
545
|
def _jobs_insert(self) -> "Insert":
|
|
544
546
|
return sqlite.insert(self._jobs)
|
|
545
547
|
|
|
548
|
+
#
|
|
549
|
+
# Checkpoints
|
|
550
|
+
#
|
|
551
|
+
def _checkpoints_insert(self) -> "Insert":
|
|
552
|
+
return sqlite.insert(self._checkpoints)
|
|
553
|
+
|
|
546
554
|
#
|
|
547
555
|
# Namespaces
|
|
548
556
|
#
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import copy
|
|
2
|
+
import hashlib
|
|
2
3
|
import os
|
|
3
4
|
import os.path
|
|
4
5
|
import sys
|
|
@@ -18,6 +19,7 @@ from typing import (
|
|
|
18
19
|
cast,
|
|
19
20
|
overload,
|
|
20
21
|
)
|
|
22
|
+
from uuid import uuid4
|
|
21
23
|
|
|
22
24
|
import sqlalchemy
|
|
23
25
|
import ujson as json
|
|
@@ -665,7 +667,7 @@ class DataChain:
|
|
|
665
667
|
name, namespace=namespace_name, project=project_name, **kwargs
|
|
666
668
|
)
|
|
667
669
|
|
|
668
|
-
|
|
670
|
+
result = self._evolve(
|
|
669
671
|
query=self._query.save(
|
|
670
672
|
name=name,
|
|
671
673
|
version=version,
|
|
@@ -678,6 +680,16 @@ class DataChain:
|
|
|
678
680
|
)
|
|
679
681
|
)
|
|
680
682
|
|
|
683
|
+
if job_id := os.getenv("DATACHAIN_JOB_ID"):
|
|
684
|
+
catalog.metastore.create_checkpoint(
|
|
685
|
+
job_id, # type: ignore[arg-type]
|
|
686
|
+
_hash=hashlib.sha256( # TODO this will be replaced with self.hash()
|
|
687
|
+
str(uuid4()).encode()
|
|
688
|
+
).hexdigest(),
|
|
689
|
+
)
|
|
690
|
+
|
|
691
|
+
return result
|
|
692
|
+
|
|
681
693
|
def apply(self, func, *args, **kwargs):
|
|
682
694
|
"""Apply any function to the chain.
|
|
683
695
|
|
|
@@ -332,7 +332,10 @@ class File(DataModel):
|
|
|
332
332
|
|
|
333
333
|
@classmethod
|
|
334
334
|
def upload(
|
|
335
|
-
cls,
|
|
335
|
+
cls,
|
|
336
|
+
data: bytes,
|
|
337
|
+
path: Union[str, os.PathLike[str]],
|
|
338
|
+
catalog: Optional["Catalog"] = None,
|
|
336
339
|
) -> "Self":
|
|
337
340
|
if catalog is None:
|
|
338
341
|
from datachain.catalog.loader import get_catalog
|
|
@@ -340,8 +343,10 @@ class File(DataModel):
|
|
|
340
343
|
catalog = get_catalog()
|
|
341
344
|
from datachain.client.fsspec import Client
|
|
342
345
|
|
|
343
|
-
|
|
344
|
-
|
|
346
|
+
path_str = stringify_path(path)
|
|
347
|
+
|
|
348
|
+
client_cls = Client.get_implementation(path_str)
|
|
349
|
+
source, rel_path = client_cls.split_url(path_str)
|
|
345
350
|
|
|
346
351
|
client = catalog.get_client(client_cls.get_uri(source))
|
|
347
352
|
file = client.upload(data, rel_path)
|
|
@@ -351,7 +356,9 @@ class File(DataModel):
|
|
|
351
356
|
return file
|
|
352
357
|
|
|
353
358
|
@classmethod
|
|
354
|
-
def at(
|
|
359
|
+
def at(
|
|
360
|
+
cls, uri: Union[str, os.PathLike[str]], session: Optional["Session"] = None
|
|
361
|
+
) -> "Self":
|
|
355
362
|
"""Construct a File from a full URI in one call.
|
|
356
363
|
|
|
357
364
|
Example:
|
|
@@ -364,9 +371,10 @@ class File(DataModel):
|
|
|
364
371
|
if session is None:
|
|
365
372
|
session = Session.get()
|
|
366
373
|
catalog = session.catalog
|
|
374
|
+
uri_str = stringify_path(uri)
|
|
367
375
|
|
|
368
|
-
client_cls = Client.get_implementation(
|
|
369
|
-
source, rel_path = client_cls.split_url(
|
|
376
|
+
client_cls = Client.get_implementation(uri_str)
|
|
377
|
+
source, rel_path = client_cls.split_url(uri_str)
|
|
370
378
|
file = cls(source=client_cls.get_uri(source), path=rel_path)
|
|
371
379
|
file._set_stream(catalog)
|
|
372
380
|
return file
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.33.0
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -86,6 +86,7 @@ Requires-Dist: psycopg2-binary>=2.9.0; extra == "postgres"
|
|
|
86
86
|
Provides-Extra: tests
|
|
87
87
|
Requires-Dist: datachain[audio,hf,postgres,remote,torch,vector,video]; extra == "tests"
|
|
88
88
|
Requires-Dist: pytest<9,>=8; extra == "tests"
|
|
89
|
+
Requires-Dist: pytest-asyncio; extra == "tests"
|
|
89
90
|
Requires-Dist: pytest-sugar>=0.9.6; extra == "tests"
|
|
90
91
|
Requires-Dist: pytest-cov>=4.1.0; extra == "tests"
|
|
91
92
|
Requires-Dist: pytest-mock>=3.12.0; extra == "tests"
|