datachain 0.37.2__tar.gz → 0.37.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.37.2 → datachain-0.37.4}/PKG-INFO +1 -1
- {datachain-0.37.2 → datachain-0.37.4}/docs/guide/checkpoints.md +15 -7
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/data_storage/sqlite.py +0 -56
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/data_storage/warehouse.py +0 -15
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain.egg-info/PKG-INFO +1 -1
- {datachain-0.37.2 → datachain-0.37.4}/.cruft.json +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/.gitattributes +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/.github/codecov.yaml +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/.github/dependabot.yml +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/.github/workflows/release.yml +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/.github/workflows/tests.yml +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/.gitignore +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/.pre-commit-config.yaml +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/LICENSE +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/README.rst +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/api_hooks.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/assets/datachain.svg +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/assets/webhook_dialog.png +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/assets/webhook_list.png +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/commands/auth/login.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/commands/auth/logout.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/commands/auth/team.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/commands/auth/token.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/commands/index.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/commands/job/cancel.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/commands/job/clusters.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/commands/job/logs.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/commands/job/ls.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/commands/job/run.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/contributing.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/examples.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/guide/db_migrations.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/guide/delta.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/guide/env.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/guide/index.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/guide/namespaces.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/guide/processing.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/guide/remotes.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/guide/retry.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/index.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/overrides/main.html +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/quick-start.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/references/data-types/file.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/references/data-types/index.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/references/data-types/pose.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/references/data-types/segment.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/references/datachain.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/references/func.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/references/functions/aggregate.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/references/functions/array.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/references/functions/conditional.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/references/functions/numeric.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/references/functions/path.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/references/functions/random.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/references/functions/string.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/references/functions/window.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/references/index.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/references/toolkit.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/references/torch.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/references/udf.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/studio/api/.gitkeep +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/studio/webhooks.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/templates/main.dot +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/templates/operation.dot +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/templates/responses.def +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/docs/tutorials.md +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/examples/get_started/nested_datamodel.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/examples/incremental_processing/delta.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/examples/incremental_processing/retry.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/examples/incremental_processing/utils.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/examples/multimodal/audio-to-text.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/examples/multimodal/wds.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/mkdocs.yml +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/noxfile.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/pyproject.toml +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/setup.cfg +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/__init__.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/__main__.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/asyn.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/cache.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/catalog/catalog.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/catalog/dependency.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/checkpoint.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/cli/utils.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/client/__init__.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/client/azure.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/client/gcs.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/client/hf.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/client/http.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/client/local.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/client/s3.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/config.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/dataset.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/delta.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/error.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/fs/reference.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/fs/utils.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/func/__init__.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/func/array.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/func/base.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/func/conditional.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/func/func.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/func/numeric.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/func/path.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/func/random.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/func/string.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/func/window.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/hash_utils.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/job.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/audio.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/clip.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/dc/__init__.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/dc/database.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/dc/datachain.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/dc/datasets.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/dc/records.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/dc/storage.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/dc/storage_pattern.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/file.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/hf.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/image.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/listing.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/namespaces.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/projects.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/settings.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/tar.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/text.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/udf.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/utils.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/video.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/listing.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/model/__init__.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/model/bbox.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/model/pose.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/model/segment.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/model/utils.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/namespace.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/node.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/plugins.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/progress.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/project.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/py.typed +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/query/__init__.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/query/batch.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/query/dataset.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/query/metrics.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/query/params.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/query/queue.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/query/schema.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/query/session.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/query/udf.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/remote/studio.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/script_meta.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/semver.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/sql/postgresql_dialect.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/sql/postgresql_types.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/sql/types.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/sql/utils.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/studio.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/telemetry.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain/utils.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain.egg-info/SOURCES.txt +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain.egg-info/requires.txt +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/__init__.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/conftest.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/data.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/examples/__init__.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/examples/test_examples.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/examples/wds_data.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/__init__.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/data/lena.jpg +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/functions/__init__.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/functions/test_aggregate.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/functions/test_array.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/functions/test_conditional.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/functions/test_numeric.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/functions/test_path.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/functions/test_random.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/functions/test_string.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/model/__init__.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_audio.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_catalog.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_checkpoints.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_client.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_data_storage.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_datachain.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_datasets.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_delta.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_file.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_hf.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_image.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_listing.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_ls.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_metastore.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_metrics.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_mutate.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_pull.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_pytorch.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_query.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_read_database.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_read_dataset_remote.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_read_dataset_version_specifiers.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_retry.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_session.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_storage_pattern.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_studio_datetime_parsing.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_temp_table_tracking.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_to_database.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_toolkit.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_udf.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_union.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_video.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/func/test_warehouse.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/scripts/feature_class.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/test_atomicity.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/test_cli_e2e.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/test_cli_studio.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/test_import_time.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/test_job_management_e2e.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/test_query_e2e.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/test_telemetry.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/__init__.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/lib/test_audio.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/lib/test_checkpoints.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/lib/test_datachain.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/lib/test_namespace.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/lib/test_partition_by.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/lib/test_project.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/lib/test_settings.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/lib/test_storage_pattern.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/lib/test_udf.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/model/__init__.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/test_asyn.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/test_batching.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/test_cache.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/test_catalog.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/test_cli_datasets.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/test_client.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/test_client_http.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/test_config.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/test_datachain_hash.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/test_dataset.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/test_func.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/test_hash_utils.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/test_job_management.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/test_listing.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/test_metastore.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/test_query.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/test_query_params.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/test_query_steps_hash.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/test_semver.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/test_serializer.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/test_session.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/test_utils.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.37.2 → datachain-0.37.4}/tests/utils.py +0 -0
|
@@ -2,10 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
Checkpoints allow DataChain to automatically skip re-creating datasets that were successfully saved in previous script runs. When a script fails or is interrupted, you can re-run it and DataChain will resume from where it left off, reusing datasets that were already created.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
Checkpoints are available for both local script runs and Studio executions.
|
|
6
6
|
|
|
7
7
|
## How Checkpoints Work
|
|
8
8
|
|
|
9
|
+
### Local Script Runs
|
|
10
|
+
|
|
9
11
|
When you run a Python script locally (e.g., `python my_script.py`), DataChain automatically:
|
|
10
12
|
|
|
11
13
|
1. **Creates a job** for the script execution, using the script's absolute path as the job name
|
|
@@ -16,6 +18,18 @@ When you run a Python script locally (e.g., `python my_script.py`), DataChain au
|
|
|
16
18
|
|
|
17
19
|
This means that if your script creates multiple datasets and fails partway through, the next run will skip recreating the datasets that were already successfully saved.
|
|
18
20
|
|
|
21
|
+
### Studio Runs
|
|
22
|
+
|
|
23
|
+
When running jobs on Studio, the checkpoint workflow is managed through the UI:
|
|
24
|
+
|
|
25
|
+
1. **Job execution** is triggered using the Run button in the Studio interface
|
|
26
|
+
2. **Checkpoint control** is explicit - you choose between:
|
|
27
|
+
- **Run from scratch**: Ignores any existing checkpoints and recreates all datasets
|
|
28
|
+
- **Continue from last checkpoint**: Resumes from the last successful checkpoint, skipping already-completed stages
|
|
29
|
+
3. **Parent-child job linking** is handled automatically by the system - no need for script path matching or job name conventions
|
|
30
|
+
4. **Checkpoint behavior** during execution is the same as local runs: datasets are saved at each `.save()` call and can be reused on retry
|
|
31
|
+
|
|
32
|
+
|
|
19
33
|
## Example
|
|
20
34
|
|
|
21
35
|
Consider this script that processes data in multiple stages:
|
|
@@ -66,7 +80,6 @@ Checkpoints are **not** used when:
|
|
|
66
80
|
- Running code interactively (Python REPL, Jupyter notebooks)
|
|
67
81
|
- Running code as a module (e.g., `python -m mymodule`)
|
|
68
82
|
- The `DATACHAIN_CHECKPOINTS_RESET` environment variable is set (see below)
|
|
69
|
-
- Running on Studio (checkpoints support planned for future releases)
|
|
70
83
|
|
|
71
84
|
## Resetting Checkpoints
|
|
72
85
|
|
|
@@ -176,17 +189,12 @@ for ds in dc.datasets():
|
|
|
176
189
|
|
|
177
190
|
## Limitations
|
|
178
191
|
|
|
179
|
-
- **Local only:** Checkpoints currently work only for local script runs. Studio support is planned.
|
|
180
192
|
- **Script-based:** Code must be run as a script (not interactively or as a module).
|
|
181
193
|
- **Hash-based matching:** Any change to the chain will create a different hash, preventing checkpoint reuse.
|
|
182
194
|
- **Same script path:** The script must be run from the same absolute path for parent job linking to work.
|
|
183
195
|
|
|
184
196
|
## Future Plans
|
|
185
197
|
|
|
186
|
-
### Studio Support
|
|
187
|
-
|
|
188
|
-
Support for checkpoints on Studio is planned for future releases, which will enable checkpoint functionality for collaborative workflows and cloud-based data processing.
|
|
189
|
-
|
|
190
198
|
### UDF-Level Checkpoints
|
|
191
199
|
|
|
192
200
|
Currently, checkpoints are created only when datasets are saved using `.save()`. This means that if a script fails during a long-running UDF operation (like `.map()`, `.gen()`, or `.agg()`), the entire UDF computation must be rerun on the next execution.
|
|
@@ -29,7 +29,6 @@ from sqlalchemy.sql.selectable import Select
|
|
|
29
29
|
from tqdm.auto import tqdm
|
|
30
30
|
|
|
31
31
|
import datachain.sql.sqlite
|
|
32
|
-
from datachain import semver
|
|
33
32
|
from datachain.data_storage import AbstractDBMetastore, AbstractWarehouse
|
|
34
33
|
from datachain.data_storage.db_engine import DatabaseEngine
|
|
35
34
|
from datachain.data_storage.schema import DefaultSchema
|
|
@@ -692,61 +691,6 @@ class SQLiteWarehouse(AbstractWarehouse):
|
|
|
692
691
|
for row in self.db.execute(query, cursor=cur)
|
|
693
692
|
]
|
|
694
693
|
|
|
695
|
-
def merge_dataset_rows(
|
|
696
|
-
self,
|
|
697
|
-
src: DatasetRecord,
|
|
698
|
-
dst: DatasetRecord,
|
|
699
|
-
src_version: str,
|
|
700
|
-
dst_version: str,
|
|
701
|
-
) -> None:
|
|
702
|
-
dst_empty = False
|
|
703
|
-
|
|
704
|
-
if not self.db.has_table(self.dataset_table_name(src, src_version)):
|
|
705
|
-
# source table doesn't exist, nothing to do
|
|
706
|
-
return
|
|
707
|
-
|
|
708
|
-
src_dr = self.dataset_rows(src, src_version).table
|
|
709
|
-
|
|
710
|
-
if not self.db.has_table(self.dataset_table_name(dst, dst_version)):
|
|
711
|
-
# destination table doesn't exist, create it
|
|
712
|
-
self.create_dataset_rows_table(
|
|
713
|
-
self.dataset_table_name(dst, dst_version),
|
|
714
|
-
columns=src_dr.columns,
|
|
715
|
-
)
|
|
716
|
-
dst_empty = True
|
|
717
|
-
|
|
718
|
-
dst_dr = self.dataset_rows(dst, dst_version).table
|
|
719
|
-
merge_fields = [c.name for c in src_dr.columns if c.name != "sys__id"]
|
|
720
|
-
select_src = select(*(getattr(src_dr.columns, f) for f in merge_fields))
|
|
721
|
-
|
|
722
|
-
if dst_empty:
|
|
723
|
-
# we don't need union, but just select from source to destination
|
|
724
|
-
insert_query = sqlite.insert(dst_dr).from_select(merge_fields, select_src)
|
|
725
|
-
else:
|
|
726
|
-
dst_version_latest = None
|
|
727
|
-
# find the previous version of the destination dataset
|
|
728
|
-
dst_previous_versions = [
|
|
729
|
-
v.version
|
|
730
|
-
for v in dst.versions # type: ignore [union-attr]
|
|
731
|
-
if semver.compare(v.version, dst_version) == -1
|
|
732
|
-
]
|
|
733
|
-
if dst_previous_versions:
|
|
734
|
-
dst_version_latest = max(dst_previous_versions)
|
|
735
|
-
|
|
736
|
-
dst_dr_latest = self.dataset_rows(dst, dst_version_latest).table
|
|
737
|
-
|
|
738
|
-
select_dst_latest = select(
|
|
739
|
-
*(getattr(dst_dr_latest.c, f) for f in merge_fields)
|
|
740
|
-
)
|
|
741
|
-
union_query = sqlalchemy.union(select_src, select_dst_latest)
|
|
742
|
-
insert_query = (
|
|
743
|
-
sqlite.insert(dst_dr)
|
|
744
|
-
.from_select(merge_fields, union_query)
|
|
745
|
-
.prefix_with("OR IGNORE")
|
|
746
|
-
)
|
|
747
|
-
|
|
748
|
-
self.db.execute(insert_query)
|
|
749
|
-
|
|
750
694
|
def prepare_entries(self, entries: "Iterable[File]") -> Iterable[dict[str, Any]]:
|
|
751
695
|
return (e.model_dump() for e in entries)
|
|
752
696
|
|
|
@@ -371,21 +371,6 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
371
371
|
table = sa.Table(table_name, self.db.metadata)
|
|
372
372
|
self.db.drop_table(table, if_exists=if_exists)
|
|
373
373
|
|
|
374
|
-
@abstractmethod
|
|
375
|
-
def merge_dataset_rows(
|
|
376
|
-
self,
|
|
377
|
-
src: "DatasetRecord",
|
|
378
|
-
dst: "DatasetRecord",
|
|
379
|
-
src_version: str,
|
|
380
|
-
dst_version: str,
|
|
381
|
-
) -> None:
|
|
382
|
-
"""
|
|
383
|
-
Merges source dataset rows and current latest destination dataset rows
|
|
384
|
-
into a new rows table created for new destination dataset version.
|
|
385
|
-
Note that table for new destination version must be created upfront.
|
|
386
|
-
Merge results should not contain duplicates.
|
|
387
|
-
"""
|
|
388
|
-
|
|
389
374
|
def dataset_rows_select(
|
|
390
375
|
self,
|
|
391
376
|
query: sa.Select,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|