datachain 0.32.3__tar.gz → 0.33.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.32.3 → datachain-0.33.0}/.pre-commit-config.yaml +1 -1
- {datachain-0.32.3 → datachain-0.33.0}/PKG-INFO +2 -1
- {datachain-0.32.3 → datachain-0.33.0}/pyproject.toml +1 -0
- datachain-0.33.0/src/datachain/checkpoint.py +44 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/client/fsspec.py +6 -1
- datachain-0.33.0/src/datachain/client/http.py +157 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/data_storage/metastore.py +137 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/data_storage/schema.py +1 -1
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/data_storage/sqlite.py +8 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/error.py +4 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/dc/datachain.py +13 -1
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain.egg-info/PKG-INFO +2 -1
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain.egg-info/SOURCES.txt +3 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain.egg-info/requires.txt +1 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_datachain.py +12 -0
- datachain-0.33.0/tests/unit/test_client_http.py +186 -0
- {datachain-0.32.3 → datachain-0.33.0}/.cruft.json +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/.gitattributes +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/.github/codecov.yaml +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/.github/dependabot.yml +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/.github/workflows/release.yml +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/.github/workflows/tests.yml +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/.gitignore +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/LICENSE +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/README.rst +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/api_hooks.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/assets/datachain.svg +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/assets/webhook_dialog.png +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/assets/webhook_list.png +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/commands/auth/login.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/commands/auth/logout.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/commands/auth/team.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/commands/auth/token.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/commands/index.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/commands/job/cancel.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/commands/job/clusters.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/commands/job/logs.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/commands/job/ls.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/commands/job/run.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/contributing.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/examples.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/guide/db_migrations.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/guide/delta.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/guide/env.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/guide/index.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/guide/namespaces.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/guide/processing.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/guide/remotes.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/guide/retry.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/index.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/overrides/main.html +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/quick-start.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/references/data-types/file.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/references/data-types/index.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/references/data-types/pose.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/references/data-types/segment.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/references/datachain.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/references/func.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/references/functions/aggregate.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/references/functions/array.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/references/functions/conditional.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/references/functions/numeric.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/references/functions/path.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/references/functions/random.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/references/functions/string.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/references/functions/window.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/references/index.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/references/toolkit.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/references/torch.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/references/udf.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/studio/api/.gitkeep +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/studio/webhooks.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/templates/main.dot +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/templates/operation.dot +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/templates/responses.def +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/docs/tutorials.md +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/examples/get_started/nested_datamodel.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/examples/incremental_processing/delta.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/examples/incremental_processing/retry.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/examples/incremental_processing/utils.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/examples/multimodal/audio-to-text.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/examples/multimodal/wds.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/mkdocs.yml +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/noxfile.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/setup.cfg +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/__init__.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/__main__.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/asyn.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/cache.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/catalog/catalog.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/cli/utils.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/client/__init__.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/client/azure.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/client/gcs.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/client/hf.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/client/local.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/client/s3.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/config.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/dataset.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/delta.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/fs/reference.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/fs/utils.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/func/__init__.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/func/array.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/func/base.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/func/conditional.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/func/func.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/func/numeric.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/func/path.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/func/random.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/func/string.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/func/window.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/job.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/audio.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/clip.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/dc/__init__.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/dc/database.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/dc/datasets.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/dc/records.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/dc/storage.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/dc/storage_pattern.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/file.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/hf.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/image.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/listing.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/namespaces.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/projects.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/settings.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/tar.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/text.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/udf.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/utils.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/video.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/listing.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/model/__init__.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/model/bbox.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/model/pose.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/model/segment.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/model/utils.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/namespace.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/node.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/progress.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/project.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/py.typed +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/query/__init__.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/query/batch.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/query/dataset.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/query/metrics.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/query/params.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/query/queue.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/query/schema.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/query/session.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/query/udf.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/query/utils.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/remote/studio.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/script_meta.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/semver.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/sql/postgresql_dialect.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/sql/postgresql_types.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/sql/types.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/sql/utils.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/studio.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/telemetry.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain/utils.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/__init__.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/conftest.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/data.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/examples/__init__.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/examples/test_examples.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/examples/wds_data.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/__init__.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/data/lena.jpg +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/functions/__init__.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/functions/test_aggregate.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/functions/test_array.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/functions/test_conditional.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/functions/test_numeric.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/functions/test_path.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/functions/test_random.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/functions/test_string.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/model/__init__.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_audio.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_batching.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_catalog.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_client.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_data_storage.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_datasets.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_delta.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_file.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_hf.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_image.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_listing.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_ls.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_metastore.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_metrics.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_mutate.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_pull.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_pytorch.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_query.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_read_database.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_read_dataset_remote.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_read_dataset_version_specifiers.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_retry.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_session.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_storage_pattern.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_studio_datetime_parsing.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_to_database.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_toolkit.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_video.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/func/test_warehouse.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/scripts/feature_class.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/test_atomicity.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/test_cli_e2e.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/test_cli_studio.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/test_import_time.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/test_query_e2e.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/test_telemetry.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/__init__.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/lib/test_audio.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/lib/test_datachain.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/lib/test_namespace.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/lib/test_partition_by.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/lib/test_project.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/lib/test_settings.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/lib/test_storage_pattern.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/lib/test_udf.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/model/__init__.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/test_asyn.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/test_cache.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/test_catalog.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/test_cli_datasets.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/test_client.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/test_config.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/test_dataset.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/test_func.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/test_listing.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/test_metastore.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/test_query.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/test_query_params.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/test_semver.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/test_serializer.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/test_session.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/test_utils.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.32.3 → datachain-0.33.0}/tests/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.33.0
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -86,6 +86,7 @@ Requires-Dist: psycopg2-binary>=2.9.0; extra == "postgres"
|
|
|
86
86
|
Provides-Extra: tests
|
|
87
87
|
Requires-Dist: datachain[audio,hf,postgres,remote,torch,vector,video]; extra == "tests"
|
|
88
88
|
Requires-Dist: pytest<9,>=8; extra == "tests"
|
|
89
|
+
Requires-Dist: pytest-asyncio; extra == "tests"
|
|
89
90
|
Requires-Dist: pytest-sugar>=0.9.6; extra == "tests"
|
|
90
91
|
Requires-Dist: pytest-cov>=4.1.0; extra == "tests"
|
|
91
92
|
Requires-Dist: pytest-mock>=3.12.0; extra == "tests"
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import uuid
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from typing import Union
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class Checkpoint:
|
|
9
|
+
"""
|
|
10
|
+
Represents a checkpoint within a job run.
|
|
11
|
+
|
|
12
|
+
A checkpoint marks a successfully completed stage of execution. In the event
|
|
13
|
+
of a failure, the job can resume from the most recent checkpoint rather than
|
|
14
|
+
starting over from the beginning.
|
|
15
|
+
|
|
16
|
+
Checkpoints can also be created in a "partial" mode, which indicates that the
|
|
17
|
+
work at this stage was only partially completed. For example, if a failure
|
|
18
|
+
occurs halfway through running a UDF, already computed results can still be
|
|
19
|
+
saved, allowing the job to resume from that partially completed state on
|
|
20
|
+
restart.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
id: str
|
|
24
|
+
job_id: str
|
|
25
|
+
hash: str
|
|
26
|
+
partial: bool
|
|
27
|
+
created_at: datetime
|
|
28
|
+
|
|
29
|
+
@classmethod
|
|
30
|
+
def parse(
|
|
31
|
+
cls,
|
|
32
|
+
id: Union[str, uuid.UUID],
|
|
33
|
+
job_id: str,
|
|
34
|
+
_hash: str,
|
|
35
|
+
partial: bool,
|
|
36
|
+
created_at: datetime,
|
|
37
|
+
) -> "Checkpoint":
|
|
38
|
+
return cls(
|
|
39
|
+
str(id),
|
|
40
|
+
job_id,
|
|
41
|
+
_hash,
|
|
42
|
+
bool(partial),
|
|
43
|
+
created_at,
|
|
44
|
+
)
|
|
@@ -93,10 +93,11 @@ class Client(ABC):
|
|
|
93
93
|
self.uri = self.get_uri(self.name)
|
|
94
94
|
|
|
95
95
|
@staticmethod
|
|
96
|
-
def get_implementation(url: Union[str, os.PathLike[str]]) -> type["Client"]:
|
|
96
|
+
def get_implementation(url: Union[str, os.PathLike[str]]) -> type["Client"]: # noqa: PLR0911
|
|
97
97
|
from .azure import AzureClient
|
|
98
98
|
from .gcs import GCSClient
|
|
99
99
|
from .hf import HfClient
|
|
100
|
+
from .http import HTTPClient, HTTPSClient
|
|
100
101
|
from .local import FileClient
|
|
101
102
|
from .s3 import ClientS3
|
|
102
103
|
|
|
@@ -114,6 +115,10 @@ class Client(ABC):
|
|
|
114
115
|
return FileClient
|
|
115
116
|
if protocol == HfClient.protocol:
|
|
116
117
|
return HfClient
|
|
118
|
+
if protocol == HTTPClient.protocol:
|
|
119
|
+
return HTTPClient
|
|
120
|
+
if protocol == HTTPSClient.protocol:
|
|
121
|
+
return HTTPSClient
|
|
117
122
|
|
|
118
123
|
raise NotImplementedError(f"Unsupported protocol: {protocol}")
|
|
119
124
|
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
from datetime import datetime, timezone
|
|
2
|
+
from typing import TYPE_CHECKING, Any, ClassVar, Optional, cast
|
|
3
|
+
from urllib.parse import urlparse
|
|
4
|
+
|
|
5
|
+
from fsspec.implementations.http import HTTPFileSystem
|
|
6
|
+
|
|
7
|
+
from datachain.dataset import StorageURI
|
|
8
|
+
from datachain.lib.file import File
|
|
9
|
+
|
|
10
|
+
from .fsspec import Client
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from datachain.cache import Cache
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class HTTPClient(Client):
|
|
17
|
+
FS_CLASS = HTTPFileSystem
|
|
18
|
+
PREFIX: ClassVar[str] = "http://"
|
|
19
|
+
protocol: ClassVar[str] = "http"
|
|
20
|
+
|
|
21
|
+
@classmethod
|
|
22
|
+
def create_fs(cls, **kwargs) -> HTTPFileSystem:
|
|
23
|
+
# Configure HTTPFileSystem options
|
|
24
|
+
kwargs.setdefault("simple_links", True)
|
|
25
|
+
kwargs.setdefault("same_scheme", True)
|
|
26
|
+
kwargs.setdefault("cache_type", "bytes")
|
|
27
|
+
|
|
28
|
+
kwargs.pop("version_aware", None)
|
|
29
|
+
|
|
30
|
+
fs = cls.FS_CLASS(**kwargs)
|
|
31
|
+
fs.invalidate_cache()
|
|
32
|
+
return cast("HTTPFileSystem", fs)
|
|
33
|
+
|
|
34
|
+
@classmethod
|
|
35
|
+
def from_name(
|
|
36
|
+
cls,
|
|
37
|
+
name: str,
|
|
38
|
+
cache: "Cache",
|
|
39
|
+
kwargs: dict[str, Any],
|
|
40
|
+
) -> "HTTPClient":
|
|
41
|
+
parsed = urlparse(name)
|
|
42
|
+
|
|
43
|
+
if parsed.scheme:
|
|
44
|
+
name = parsed.netloc + parsed.path
|
|
45
|
+
|
|
46
|
+
return cls(name, kwargs, cache)
|
|
47
|
+
|
|
48
|
+
@classmethod
|
|
49
|
+
def split_url(cls, url: str) -> tuple[str, str]:
|
|
50
|
+
"""Split HTTP/HTTPS URL into domain (bucket equivalent) and path."""
|
|
51
|
+
parsed = urlparse(url)
|
|
52
|
+
domain = parsed.netloc
|
|
53
|
+
path = parsed.path.lstrip("/")
|
|
54
|
+
|
|
55
|
+
if parsed.query:
|
|
56
|
+
path += f"?{parsed.query}"
|
|
57
|
+
if parsed.fragment:
|
|
58
|
+
path += f"#{parsed.fragment}"
|
|
59
|
+
|
|
60
|
+
return domain, path
|
|
61
|
+
|
|
62
|
+
@classmethod
|
|
63
|
+
def get_uri(cls, name: str) -> "StorageURI":
|
|
64
|
+
if not name.startswith(("http://", "https://")):
|
|
65
|
+
return StorageURI(f"{cls.PREFIX}{name}")
|
|
66
|
+
return StorageURI(name)
|
|
67
|
+
|
|
68
|
+
@classmethod
|
|
69
|
+
def is_root_url(cls, url: str) -> bool:
|
|
70
|
+
parsed = urlparse(url)
|
|
71
|
+
return parsed.path in ("", "/") and not parsed.query and not parsed.fragment
|
|
72
|
+
|
|
73
|
+
def get_full_path(self, rel_path: str, version_id: Optional[str] = None) -> str:
|
|
74
|
+
if self.name.startswith(("http://", "https://")):
|
|
75
|
+
base_url = self.name
|
|
76
|
+
else:
|
|
77
|
+
if rel_path and "/" in rel_path:
|
|
78
|
+
first_part = rel_path.split("/")[0]
|
|
79
|
+
if "." in first_part and not first_part.startswith("."):
|
|
80
|
+
return f"{self.protocol}://{rel_path}"
|
|
81
|
+
|
|
82
|
+
base_url = f"{self.protocol}://{self.name}"
|
|
83
|
+
|
|
84
|
+
if rel_path:
|
|
85
|
+
if not base_url.endswith("/") and not rel_path.startswith("/"):
|
|
86
|
+
base_url += "/"
|
|
87
|
+
full_url = base_url + rel_path
|
|
88
|
+
else:
|
|
89
|
+
full_url = base_url
|
|
90
|
+
|
|
91
|
+
return full_url
|
|
92
|
+
|
|
93
|
+
def url(self, path: str, expires: int = 3600, **kwargs) -> str:
|
|
94
|
+
"""
|
|
95
|
+
Generate URL for the given path.
|
|
96
|
+
Note: HTTP URLs don't support signed/expiring URLs.
|
|
97
|
+
"""
|
|
98
|
+
return self.get_full_path(path, kwargs.pop("version_id", None))
|
|
99
|
+
|
|
100
|
+
def info_to_file(self, v: dict[str, Any], path: str) -> File:
|
|
101
|
+
etag = v.get("ETag", "").strip('"')
|
|
102
|
+
last_modified = v.get("last_modified")
|
|
103
|
+
if last_modified:
|
|
104
|
+
if isinstance(last_modified, str):
|
|
105
|
+
try:
|
|
106
|
+
from email.utils import parsedate_to_datetime
|
|
107
|
+
|
|
108
|
+
last_modified = parsedate_to_datetime(last_modified)
|
|
109
|
+
except (ValueError, TypeError):
|
|
110
|
+
last_modified = datetime.now(timezone.utc)
|
|
111
|
+
elif isinstance(last_modified, (int, float)):
|
|
112
|
+
last_modified = datetime.fromtimestamp(last_modified, timezone.utc)
|
|
113
|
+
else:
|
|
114
|
+
last_modified = datetime.now(timezone.utc)
|
|
115
|
+
|
|
116
|
+
return File(
|
|
117
|
+
source=self.uri,
|
|
118
|
+
path=path,
|
|
119
|
+
size=v.get("size", 0),
|
|
120
|
+
etag=etag,
|
|
121
|
+
version="",
|
|
122
|
+
is_latest=True,
|
|
123
|
+
last_modified=last_modified,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
def upload(self, data: bytes, path: str) -> "File":
|
|
127
|
+
raise NotImplementedError(
|
|
128
|
+
"HTTP/HTTPS client is read-only. Upload operations are not supported."
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
def get_file_info(self, path: str, version_id: Optional[str] = None) -> "File":
|
|
132
|
+
info = self.fs.info(self.get_full_path(path))
|
|
133
|
+
return self.info_to_file(info, path)
|
|
134
|
+
|
|
135
|
+
def open_object(self, file: "File", use_cache: bool = True, cb=None):
|
|
136
|
+
from datachain.client.fileslice import FileWrapper
|
|
137
|
+
|
|
138
|
+
if use_cache and (cache_path := self.cache.get_path(file)):
|
|
139
|
+
return open(cache_path, mode="rb")
|
|
140
|
+
|
|
141
|
+
assert not file.location
|
|
142
|
+
return FileWrapper(
|
|
143
|
+
self.fs.open(self.get_full_path(file.get_path_normalized())),
|
|
144
|
+
cb or (lambda x: None),
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
async def get_file(self, lpath, rpath, callback, version_id: Optional[str] = None):
|
|
148
|
+
return await self.fs._get_file(lpath, rpath, callback=callback)
|
|
149
|
+
|
|
150
|
+
async def _fetch_dir(self, prefix: str, pbar, result_queue) -> set[str]:
|
|
151
|
+
full_url = self.get_full_path(prefix)
|
|
152
|
+
raise NotImplementedError(f"Cannot download file from {full_url}")
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
class HTTPSClient(HTTPClient):
|
|
156
|
+
protocol = "https"
|
|
157
|
+
PREFIX = "https://"
|
|
@@ -13,6 +13,7 @@ from uuid import uuid4
|
|
|
13
13
|
from sqlalchemy import (
|
|
14
14
|
JSON,
|
|
15
15
|
BigInteger,
|
|
16
|
+
Boolean,
|
|
16
17
|
Column,
|
|
17
18
|
DateTime,
|
|
18
19
|
ForeignKey,
|
|
@@ -24,6 +25,7 @@ from sqlalchemy import (
|
|
|
24
25
|
)
|
|
25
26
|
from sqlalchemy.sql import func as f
|
|
26
27
|
|
|
28
|
+
from datachain.checkpoint import Checkpoint
|
|
27
29
|
from datachain.data_storage import JobQueryType, JobStatus
|
|
28
30
|
from datachain.data_storage.serializer import Serializable
|
|
29
31
|
from datachain.dataset import (
|
|
@@ -36,6 +38,7 @@ from datachain.dataset import (
|
|
|
36
38
|
StorageURI,
|
|
37
39
|
)
|
|
38
40
|
from datachain.error import (
|
|
41
|
+
CheckpointNotFoundError,
|
|
39
42
|
DatasetNotFoundError,
|
|
40
43
|
DatasetVersionNotFoundError,
|
|
41
44
|
NamespaceDeleteNotAllowedError,
|
|
@@ -75,6 +78,7 @@ class AbstractMetastore(ABC, Serializable):
|
|
|
75
78
|
dataset_list_version_class: type[DatasetListVersion] = DatasetListVersion
|
|
76
79
|
dependency_class: type[DatasetDependency] = DatasetDependency
|
|
77
80
|
job_class: type[Job] = Job
|
|
81
|
+
checkpoint_class: type[Checkpoint] = Checkpoint
|
|
78
82
|
|
|
79
83
|
def __init__(
|
|
80
84
|
self,
|
|
@@ -431,6 +435,35 @@ class AbstractMetastore(ABC, Serializable):
|
|
|
431
435
|
def get_job_status(self, job_id: str) -> Optional[JobStatus]:
|
|
432
436
|
"""Returns the status of the given job."""
|
|
433
437
|
|
|
438
|
+
#
|
|
439
|
+
# Checkpoints
|
|
440
|
+
#
|
|
441
|
+
|
|
442
|
+
@abstractmethod
|
|
443
|
+
def list_checkpoints(self, job_id: str, conn=None) -> Iterator["Checkpoint"]:
|
|
444
|
+
"""Returns all checkpoints related to some job"""
|
|
445
|
+
|
|
446
|
+
@abstractmethod
|
|
447
|
+
def get_checkpoint_by_id(self, checkpoint_id: str, conn=None) -> Checkpoint:
|
|
448
|
+
"""Gets single checkpoint by id"""
|
|
449
|
+
|
|
450
|
+
def find_checkpoint(
|
|
451
|
+
self, job_id: str, _hash: str, partial: bool = False, conn=None
|
|
452
|
+
) -> Optional[Checkpoint]:
|
|
453
|
+
"""
|
|
454
|
+
Tries to find checkpoint for a job with specific hash and optionally partial
|
|
455
|
+
"""
|
|
456
|
+
|
|
457
|
+
@abstractmethod
|
|
458
|
+
def create_checkpoint(
|
|
459
|
+
self,
|
|
460
|
+
job_id: str,
|
|
461
|
+
_hash: str,
|
|
462
|
+
partial: bool = False,
|
|
463
|
+
conn: Optional[Any] = None,
|
|
464
|
+
) -> Checkpoint:
|
|
465
|
+
"""Creates new checkpoint"""
|
|
466
|
+
|
|
434
467
|
|
|
435
468
|
class AbstractDBMetastore(AbstractMetastore):
|
|
436
469
|
"""
|
|
@@ -446,6 +479,7 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
446
479
|
DATASET_VERSION_TABLE = "datasets_versions"
|
|
447
480
|
DATASET_DEPENDENCY_TABLE = "datasets_dependencies"
|
|
448
481
|
JOBS_TABLE = "jobs"
|
|
482
|
+
CHECKPOINTS_TABLE = "checkpoints"
|
|
449
483
|
|
|
450
484
|
db: "DatabaseEngine"
|
|
451
485
|
|
|
@@ -1663,3 +1697,106 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
1663
1697
|
if not results:
|
|
1664
1698
|
return None
|
|
1665
1699
|
return results[0][0]
|
|
1700
|
+
|
|
1701
|
+
#
|
|
1702
|
+
# Checkpoints
|
|
1703
|
+
#
|
|
1704
|
+
|
|
1705
|
+
@staticmethod
|
|
1706
|
+
def _checkpoints_columns() -> "list[SchemaItem]":
|
|
1707
|
+
return [
|
|
1708
|
+
Column(
|
|
1709
|
+
"id",
|
|
1710
|
+
Text,
|
|
1711
|
+
default=uuid4,
|
|
1712
|
+
primary_key=True,
|
|
1713
|
+
nullable=False,
|
|
1714
|
+
),
|
|
1715
|
+
Column("job_id", Text, nullable=True),
|
|
1716
|
+
Column("hash", Text, nullable=False),
|
|
1717
|
+
Column("partial", Boolean, default=False),
|
|
1718
|
+
Column("created_at", DateTime(timezone=True), nullable=False),
|
|
1719
|
+
UniqueConstraint("job_id", "hash"),
|
|
1720
|
+
]
|
|
1721
|
+
|
|
1722
|
+
@cached_property
|
|
1723
|
+
def _checkpoints_fields(self) -> list[str]:
|
|
1724
|
+
return [c.name for c in self._checkpoints_columns() if c.name] # type: ignore[attr-defined]
|
|
1725
|
+
|
|
1726
|
+
@cached_property
|
|
1727
|
+
def _checkpoints(self) -> "Table":
|
|
1728
|
+
return Table(
|
|
1729
|
+
self.CHECKPOINTS_TABLE,
|
|
1730
|
+
self.db.metadata,
|
|
1731
|
+
*self._checkpoints_columns(),
|
|
1732
|
+
)
|
|
1733
|
+
|
|
1734
|
+
@abstractmethod
|
|
1735
|
+
def _checkpoints_insert(self) -> "Insert": ...
|
|
1736
|
+
|
|
1737
|
+
def _checkpoints_select(self, *columns) -> "Select":
|
|
1738
|
+
if not columns:
|
|
1739
|
+
return self._checkpoints.select()
|
|
1740
|
+
return select(*columns)
|
|
1741
|
+
|
|
1742
|
+
def _checkpoints_delete(self) -> "Delete":
|
|
1743
|
+
return self._checkpoints.delete()
|
|
1744
|
+
|
|
1745
|
+
def _checkpoints_query(self):
|
|
1746
|
+
return self._checkpoints_select(
|
|
1747
|
+
*[getattr(self._checkpoints.c, f) for f in self._checkpoints_fields]
|
|
1748
|
+
)
|
|
1749
|
+
|
|
1750
|
+
def create_checkpoint(
|
|
1751
|
+
self,
|
|
1752
|
+
job_id: str,
|
|
1753
|
+
_hash: str,
|
|
1754
|
+
partial: bool = False,
|
|
1755
|
+
conn: Optional[Any] = None,
|
|
1756
|
+
) -> Checkpoint:
|
|
1757
|
+
"""
|
|
1758
|
+
Creates a new job query step.
|
|
1759
|
+
"""
|
|
1760
|
+
checkpoint_id = str(uuid4())
|
|
1761
|
+
self.db.execute(
|
|
1762
|
+
self._checkpoints_insert().values(
|
|
1763
|
+
id=checkpoint_id,
|
|
1764
|
+
job_id=job_id,
|
|
1765
|
+
hash=_hash,
|
|
1766
|
+
partial=partial,
|
|
1767
|
+
created_at=datetime.now(timezone.utc),
|
|
1768
|
+
),
|
|
1769
|
+
conn=conn,
|
|
1770
|
+
)
|
|
1771
|
+
return self.get_checkpoint_by_id(checkpoint_id)
|
|
1772
|
+
|
|
1773
|
+
def list_checkpoints(self, job_id: str, conn=None) -> Iterator["Checkpoint"]:
|
|
1774
|
+
"""List checkpoints by job id."""
|
|
1775
|
+
query = self._checkpoints_query().where(self._checkpoints.c.job_id == job_id)
|
|
1776
|
+
rows = list(self.db.execute(query, conn=conn))
|
|
1777
|
+
|
|
1778
|
+
yield from [self.checkpoint_class.parse(*r) for r in rows]
|
|
1779
|
+
|
|
1780
|
+
def get_checkpoint_by_id(self, checkpoint_id: str, conn=None) -> Checkpoint:
|
|
1781
|
+
"""Returns the checkpoint with the given ID."""
|
|
1782
|
+
ch = self._checkpoints
|
|
1783
|
+
query = self._checkpoints_select(ch).where(ch.c.id == checkpoint_id)
|
|
1784
|
+
rows = list(self.db.execute(query, conn=conn))
|
|
1785
|
+
if not rows:
|
|
1786
|
+
raise CheckpointNotFoundError(f"Checkpoint {checkpoint_id} not found")
|
|
1787
|
+
return self.checkpoint_class.parse(*rows[0])
|
|
1788
|
+
|
|
1789
|
+
def find_checkpoint(
|
|
1790
|
+
self, job_id: str, _hash: str, partial: bool = False, conn=None
|
|
1791
|
+
) -> Optional[Checkpoint]:
|
|
1792
|
+
"""
|
|
1793
|
+
Tries to find checkpoint for a job with specific hash and optionally partial
|
|
1794
|
+
"""
|
|
1795
|
+
ch = self._checkpoints
|
|
1796
|
+
query = self._checkpoints_select(ch).where(
|
|
1797
|
+
ch.c.job_id == job_id, ch.c.hash == _hash, ch.c.partial == partial
|
|
1798
|
+
)
|
|
1799
|
+
rows = list(self.db.execute(query, conn=conn))
|
|
1800
|
+
if not rows:
|
|
1801
|
+
return None
|
|
1802
|
+
return self.checkpoint_class.parse(*rows[0])
|
|
@@ -51,7 +51,7 @@ def dedup_columns(columns: Iterable[sa.Column]) -> list[sa.Column]:
|
|
|
51
51
|
"""
|
|
52
52
|
c_set: dict[str, sa.Column] = {}
|
|
53
53
|
for c in columns:
|
|
54
|
-
if (ec := c_set.get(c.name
|
|
54
|
+
if (ec := c_set.get(c.name)) is not None:
|
|
55
55
|
if str(ec.type) != str(c.type):
|
|
56
56
|
raise ValueError(
|
|
57
57
|
f"conflicting types for column {c.name}:{c.type!s} and {ec.type!s}"
|
|
@@ -459,6 +459,8 @@ class SQLiteMetastore(AbstractDBMetastore):
|
|
|
459
459
|
self.default_table_names.append(self._datasets_dependencies.name)
|
|
460
460
|
self.db.create_table(self._jobs, if_not_exists=True)
|
|
461
461
|
self.default_table_names.append(self._jobs.name)
|
|
462
|
+
self.db.create_table(self._checkpoints, if_not_exists=True)
|
|
463
|
+
self.default_table_names.append(self._checkpoints.name)
|
|
462
464
|
|
|
463
465
|
def _init_namespaces_projects(self) -> None:
|
|
464
466
|
"""
|
|
@@ -543,6 +545,12 @@ class SQLiteMetastore(AbstractDBMetastore):
|
|
|
543
545
|
def _jobs_insert(self) -> "Insert":
|
|
544
546
|
return sqlite.insert(self._jobs)
|
|
545
547
|
|
|
548
|
+
#
|
|
549
|
+
# Checkpoints
|
|
550
|
+
#
|
|
551
|
+
def _checkpoints_insert(self) -> "Insert":
|
|
552
|
+
return sqlite.insert(self._checkpoints)
|
|
553
|
+
|
|
546
554
|
#
|
|
547
555
|
# Namespaces
|
|
548
556
|
#
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import copy
|
|
2
|
+
import hashlib
|
|
2
3
|
import os
|
|
3
4
|
import os.path
|
|
4
5
|
import sys
|
|
@@ -18,6 +19,7 @@ from typing import (
|
|
|
18
19
|
cast,
|
|
19
20
|
overload,
|
|
20
21
|
)
|
|
22
|
+
from uuid import uuid4
|
|
21
23
|
|
|
22
24
|
import sqlalchemy
|
|
23
25
|
import ujson as json
|
|
@@ -665,7 +667,7 @@ class DataChain:
|
|
|
665
667
|
name, namespace=namespace_name, project=project_name, **kwargs
|
|
666
668
|
)
|
|
667
669
|
|
|
668
|
-
|
|
670
|
+
result = self._evolve(
|
|
669
671
|
query=self._query.save(
|
|
670
672
|
name=name,
|
|
671
673
|
version=version,
|
|
@@ -678,6 +680,16 @@ class DataChain:
|
|
|
678
680
|
)
|
|
679
681
|
)
|
|
680
682
|
|
|
683
|
+
if job_id := os.getenv("DATACHAIN_JOB_ID"):
|
|
684
|
+
catalog.metastore.create_checkpoint(
|
|
685
|
+
job_id, # type: ignore[arg-type]
|
|
686
|
+
_hash=hashlib.sha256( # TODO this will be replaced with self.hash()
|
|
687
|
+
str(uuid4()).encode()
|
|
688
|
+
).hexdigest(),
|
|
689
|
+
)
|
|
690
|
+
|
|
691
|
+
return result
|
|
692
|
+
|
|
681
693
|
def apply(self, func, *args, **kwargs):
|
|
682
694
|
"""Apply any function to the chain.
|
|
683
695
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.33.0
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -86,6 +86,7 @@ Requires-Dist: psycopg2-binary>=2.9.0; extra == "postgres"
|
|
|
86
86
|
Provides-Extra: tests
|
|
87
87
|
Requires-Dist: datachain[audio,hf,postgres,remote,torch,vector,video]; extra == "tests"
|
|
88
88
|
Requires-Dist: pytest<9,>=8; extra == "tests"
|
|
89
|
+
Requires-Dist: pytest-asyncio; extra == "tests"
|
|
89
90
|
Requires-Dist: pytest-sugar>=0.9.6; extra == "tests"
|
|
90
91
|
Requires-Dist: pytest-cov>=4.1.0; extra == "tests"
|
|
91
92
|
Requires-Dist: pytest-mock>=3.12.0; extra == "tests"
|
|
@@ -106,6 +106,7 @@ src/datachain/__init__.py
|
|
|
106
106
|
src/datachain/__main__.py
|
|
107
107
|
src/datachain/asyn.py
|
|
108
108
|
src/datachain/cache.py
|
|
109
|
+
src/datachain/checkpoint.py
|
|
109
110
|
src/datachain/config.py
|
|
110
111
|
src/datachain/dataset.py
|
|
111
112
|
src/datachain/delta.py
|
|
@@ -154,6 +155,7 @@ src/datachain/client/fileslice.py
|
|
|
154
155
|
src/datachain/client/fsspec.py
|
|
155
156
|
src/datachain/client/gcs.py
|
|
156
157
|
src/datachain/client/hf.py
|
|
158
|
+
src/datachain/client/http.py
|
|
157
159
|
src/datachain/client/local.py
|
|
158
160
|
src/datachain/client/s3.py
|
|
159
161
|
src/datachain/data_storage/__init__.py
|
|
@@ -362,6 +364,7 @@ tests/unit/test_cli_datasets.py
|
|
|
362
364
|
tests/unit/test_cli_parsing.py
|
|
363
365
|
tests/unit/test_client.py
|
|
364
366
|
tests/unit/test_client_gcs.py
|
|
367
|
+
tests/unit/test_client_http.py
|
|
365
368
|
tests/unit/test_client_s3.py
|
|
366
369
|
tests/unit/test_config.py
|
|
367
370
|
tests/unit/test_data_storage.py
|
|
@@ -1651,6 +1651,18 @@ def test_datachain_save_with_job(test_session, catalog, datachain_job_id):
|
|
|
1651
1651
|
assert result_job_id == datachain_job_id
|
|
1652
1652
|
|
|
1653
1653
|
|
|
1654
|
+
def test_datachain_with_job_and_checkpoint(test_session, catalog, datachain_job_id):
|
|
1655
|
+
dc.read_values(value=["val1", "val2"], session=test_session).save("my-ds")
|
|
1656
|
+
|
|
1657
|
+
checkpoints = list(catalog.metastore.list_checkpoints(datachain_job_id))
|
|
1658
|
+
assert len(checkpoints) == 1
|
|
1659
|
+
checkpoint = checkpoints[0]
|
|
1660
|
+
assert checkpoint.job_id == datachain_job_id
|
|
1661
|
+
assert checkpoint.hash
|
|
1662
|
+
assert checkpoint.partial is False
|
|
1663
|
+
assert checkpoint.created_at
|
|
1664
|
+
|
|
1665
|
+
|
|
1654
1666
|
def test_group_by_signals(cloud_test_catalog):
|
|
1655
1667
|
from datachain import func
|
|
1656
1668
|
|