datachain 0.33.0__tar.gz → 0.34.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.33.0 → datachain-0.34.0}/.pre-commit-config.yaml +1 -1
- {datachain-0.33.0 → datachain-0.34.0}/PKG-INFO +2 -2
- {datachain-0.33.0 → datachain-0.34.0}/pyproject.toml +1 -1
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/catalog/catalog.py +58 -22
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/data_storage/job.py +1 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/data_storage/metastore.py +22 -1
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/diff/__init__.py +7 -13
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/error.py +4 -0
- datachain-0.34.0/src/datachain/hash_utils.py +147 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/job.py +3 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/dc/datachain.py +166 -70
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/signal_schema.py +7 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/udf.py +20 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/query/dataset.py +107 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/utils.py +6 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain.egg-info/PKG-INFO +2 -2
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain.egg-info/SOURCES.txt +5 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain.egg-info/requires.txt +1 -1
- {datachain-0.33.0 → datachain-0.34.0}/tests/conftest.py +26 -5
- datachain-0.34.0/tests/unit/lib/test_checkpoints.py +200 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_datachain.py +1 -1
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_diff.py +41 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_signal_schema.py +24 -0
- datachain-0.34.0/tests/unit/test_datachain_hash.py +173 -0
- datachain-0.34.0/tests/unit/test_hash_utils.py +109 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_query.py +22 -3
- datachain-0.34.0/tests/unit/test_query_steps_hash.py +505 -0
- {datachain-0.33.0 → datachain-0.34.0}/.cruft.json +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/.gitattributes +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/.github/codecov.yaml +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/.github/dependabot.yml +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/.github/workflows/release.yml +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/.github/workflows/tests.yml +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/.gitignore +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/LICENSE +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/README.rst +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/api_hooks.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/assets/datachain.svg +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/assets/webhook_dialog.png +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/assets/webhook_list.png +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/commands/auth/login.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/commands/auth/logout.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/commands/auth/team.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/commands/auth/token.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/commands/index.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/commands/job/cancel.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/commands/job/clusters.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/commands/job/logs.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/commands/job/ls.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/commands/job/run.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/contributing.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/examples.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/guide/db_migrations.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/guide/delta.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/guide/env.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/guide/index.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/guide/namespaces.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/guide/processing.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/guide/remotes.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/guide/retry.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/index.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/overrides/main.html +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/quick-start.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/references/data-types/file.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/references/data-types/index.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/references/data-types/pose.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/references/data-types/segment.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/references/datachain.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/references/func.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/references/functions/aggregate.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/references/functions/array.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/references/functions/conditional.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/references/functions/numeric.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/references/functions/path.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/references/functions/random.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/references/functions/string.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/references/functions/window.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/references/index.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/references/toolkit.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/references/torch.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/references/udf.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/studio/api/.gitkeep +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/studio/webhooks.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/templates/main.dot +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/templates/operation.dot +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/templates/responses.def +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/docs/tutorials.md +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/examples/get_started/nested_datamodel.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/examples/incremental_processing/delta.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/examples/incremental_processing/retry.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/examples/incremental_processing/utils.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/examples/multimodal/audio-to-text.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/examples/multimodal/wds.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/mkdocs.yml +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/noxfile.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/setup.cfg +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/__init__.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/__main__.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/asyn.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/cache.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/checkpoint.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/cli/utils.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/client/__init__.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/client/azure.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/client/gcs.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/client/hf.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/client/http.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/client/local.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/client/s3.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/config.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/dataset.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/delta.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/fs/reference.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/fs/utils.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/func/__init__.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/func/array.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/func/base.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/func/conditional.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/func/func.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/func/numeric.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/func/path.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/func/random.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/func/string.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/func/window.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/audio.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/clip.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/dc/__init__.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/dc/database.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/dc/datasets.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/dc/records.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/dc/storage.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/dc/storage_pattern.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/file.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/hf.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/image.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/listing.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/namespaces.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/projects.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/settings.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/tar.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/text.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/utils.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/video.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/listing.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/model/__init__.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/model/bbox.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/model/pose.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/model/segment.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/model/utils.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/namespace.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/node.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/progress.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/project.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/py.typed +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/query/__init__.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/query/batch.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/query/metrics.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/query/params.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/query/queue.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/query/schema.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/query/session.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/query/udf.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/query/utils.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/remote/studio.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/script_meta.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/semver.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/postgresql_dialect.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/postgresql_types.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/types.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/utils.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/studio.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/telemetry.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/__init__.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/data.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/examples/__init__.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/examples/test_examples.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/examples/wds_data.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/__init__.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/data/lena.jpg +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/functions/__init__.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/functions/test_aggregate.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/functions/test_array.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/functions/test_conditional.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/functions/test_numeric.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/functions/test_path.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/functions/test_random.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/functions/test_string.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/model/__init__.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_audio.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_batching.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_catalog.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_client.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_data_storage.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_datachain.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_datasets.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_delta.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_file.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_hf.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_image.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_listing.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_ls.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_metastore.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_metrics.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_mutate.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_pull.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_pytorch.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_query.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_read_database.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_read_dataset_remote.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_read_dataset_version_specifiers.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_retry.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_session.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_storage_pattern.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_studio_datetime_parsing.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_to_database.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_toolkit.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_video.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_warehouse.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/scripts/feature_class.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/test_atomicity.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/test_cli_e2e.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/test_cli_studio.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/test_import_time.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/test_query_e2e.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/test_telemetry.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/__init__.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_audio.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_namespace.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_partition_by.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_project.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_settings.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_storage_pattern.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_udf.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/model/__init__.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_asyn.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_cache.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_catalog.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_cli_datasets.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_client.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_client_http.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_config.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_dataset.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_func.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_listing.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_metastore.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_query_params.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_semver.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_serializer.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_session.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_utils.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.33.0 → datachain-0.34.0}/tests/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.34.0
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -103,7 +103,7 @@ Requires-Dist: scipy; extra == "tests"
|
|
|
103
103
|
Requires-Dist: ultralytics; extra == "tests"
|
|
104
104
|
Provides-Extra: dev
|
|
105
105
|
Requires-Dist: datachain[docs,tests]; extra == "dev"
|
|
106
|
-
Requires-Dist: mypy==1.18.
|
|
106
|
+
Requires-Dist: mypy==1.18.2; extra == "dev"
|
|
107
107
|
Requires-Dist: types-python-dateutil; extra == "dev"
|
|
108
108
|
Requires-Dist: types-dateparser; extra == "dev"
|
|
109
109
|
Requires-Dist: types-pytz; extra == "dev"
|
|
@@ -144,19 +144,26 @@ def shutdown_process(
|
|
|
144
144
|
return proc.wait()
|
|
145
145
|
|
|
146
146
|
|
|
147
|
-
def
|
|
147
|
+
def process_output(stream: IO[bytes], callback: Callable[[str], None]) -> None:
|
|
148
148
|
buffer = b""
|
|
149
|
-
while byt := stream.read(1): # Read one byte at a time
|
|
150
|
-
buffer += byt
|
|
151
149
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
buffer = b"" # Clear buffer for next line
|
|
150
|
+
try:
|
|
151
|
+
while byt := stream.read(1): # Read one byte at a time
|
|
152
|
+
buffer += byt
|
|
156
153
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
154
|
+
if byt in (b"\n", b"\r"): # Check for newline or carriage return
|
|
155
|
+
line = buffer.decode("utf-8", errors="replace")
|
|
156
|
+
callback(line)
|
|
157
|
+
buffer = b"" # Clear buffer for the next line
|
|
158
|
+
|
|
159
|
+
if buffer: # Handle any remaining data in the buffer
|
|
160
|
+
line = buffer.decode("utf-8", errors="replace")
|
|
161
|
+
callback(line)
|
|
162
|
+
finally:
|
|
163
|
+
try:
|
|
164
|
+
stream.close() # Ensure output is closed
|
|
165
|
+
except Exception: # noqa: BLE001, S110
|
|
166
|
+
pass
|
|
160
167
|
|
|
161
168
|
|
|
162
169
|
class DatasetRowsFetcher(NodesThreadPool):
|
|
@@ -1760,13 +1767,13 @@ class Catalog:
|
|
|
1760
1767
|
recursive=recursive,
|
|
1761
1768
|
)
|
|
1762
1769
|
|
|
1770
|
+
@staticmethod
|
|
1763
1771
|
def query(
|
|
1764
|
-
self,
|
|
1765
1772
|
query_script: str,
|
|
1766
1773
|
env: Optional[Mapping[str, str]] = None,
|
|
1767
1774
|
python_executable: str = sys.executable,
|
|
1768
|
-
|
|
1769
|
-
|
|
1775
|
+
stdout_callback: Optional[Callable[[str], None]] = None,
|
|
1776
|
+
stderr_callback: Optional[Callable[[str], None]] = None,
|
|
1770
1777
|
params: Optional[dict[str, str]] = None,
|
|
1771
1778
|
job_id: Optional[str] = None,
|
|
1772
1779
|
interrupt_timeout: Optional[int] = None,
|
|
@@ -1781,13 +1788,18 @@ class Catalog:
|
|
|
1781
1788
|
},
|
|
1782
1789
|
)
|
|
1783
1790
|
popen_kwargs: dict[str, Any] = {}
|
|
1784
|
-
|
|
1785
|
-
|
|
1791
|
+
|
|
1792
|
+
if stdout_callback is not None:
|
|
1793
|
+
popen_kwargs = {"stdout": subprocess.PIPE}
|
|
1794
|
+
if stderr_callback is not None:
|
|
1795
|
+
popen_kwargs["stderr"] = subprocess.PIPE
|
|
1786
1796
|
|
|
1787
1797
|
def raise_termination_signal(sig: int, _: Any) -> NoReturn:
|
|
1788
1798
|
raise TerminationSignal(sig)
|
|
1789
1799
|
|
|
1790
|
-
|
|
1800
|
+
stdout_thread: Optional[Thread] = None
|
|
1801
|
+
stderr_thread: Optional[Thread] = None
|
|
1802
|
+
|
|
1791
1803
|
with subprocess.Popen(cmd, env=env, **popen_kwargs) as proc: # noqa: S603
|
|
1792
1804
|
logger.info("Starting process %s", proc.pid)
|
|
1793
1805
|
|
|
@@ -1801,10 +1813,20 @@ class Catalog:
|
|
|
1801
1813
|
orig_sigterm_handler = signal.getsignal(signal.SIGTERM)
|
|
1802
1814
|
signal.signal(signal.SIGTERM, raise_termination_signal)
|
|
1803
1815
|
try:
|
|
1804
|
-
if
|
|
1805
|
-
|
|
1806
|
-
|
|
1807
|
-
|
|
1816
|
+
if stdout_callback is not None:
|
|
1817
|
+
stdout_thread = Thread(
|
|
1818
|
+
target=process_output,
|
|
1819
|
+
args=(proc.stdout, stdout_callback),
|
|
1820
|
+
daemon=True,
|
|
1821
|
+
)
|
|
1822
|
+
stdout_thread.start()
|
|
1823
|
+
if stderr_callback is not None:
|
|
1824
|
+
stderr_thread = Thread(
|
|
1825
|
+
target=process_output,
|
|
1826
|
+
args=(proc.stderr, stderr_callback),
|
|
1827
|
+
daemon=True,
|
|
1828
|
+
)
|
|
1829
|
+
stderr_thread.start()
|
|
1808
1830
|
|
|
1809
1831
|
proc.wait()
|
|
1810
1832
|
except TerminationSignal as exc:
|
|
@@ -1822,8 +1844,22 @@ class Catalog:
|
|
|
1822
1844
|
finally:
|
|
1823
1845
|
signal.signal(signal.SIGTERM, orig_sigterm_handler)
|
|
1824
1846
|
signal.signal(signal.SIGINT, orig_sigint_handler)
|
|
1825
|
-
|
|
1826
|
-
|
|
1847
|
+
# wait for the reader thread
|
|
1848
|
+
thread_join_timeout_seconds = 30
|
|
1849
|
+
if stdout_thread is not None:
|
|
1850
|
+
stdout_thread.join(timeout=thread_join_timeout_seconds)
|
|
1851
|
+
if stdout_thread.is_alive():
|
|
1852
|
+
logger.warning(
|
|
1853
|
+
"stdout thread is still alive after %s seconds",
|
|
1854
|
+
thread_join_timeout_seconds,
|
|
1855
|
+
)
|
|
1856
|
+
if stderr_thread is not None:
|
|
1857
|
+
stderr_thread.join(timeout=thread_join_timeout_seconds)
|
|
1858
|
+
if stderr_thread.is_alive():
|
|
1859
|
+
logger.warning(
|
|
1860
|
+
"stderr thread is still alive after %s seconds",
|
|
1861
|
+
thread_join_timeout_seconds,
|
|
1862
|
+
)
|
|
1827
1863
|
|
|
1828
1864
|
logger.info("Process %s exited with return code %s", proc.pid, proc.returncode)
|
|
1829
1865
|
if proc.returncode in (
|
|
@@ -21,6 +21,7 @@ from sqlalchemy import (
|
|
|
21
21
|
Table,
|
|
22
22
|
Text,
|
|
23
23
|
UniqueConstraint,
|
|
24
|
+
desc,
|
|
24
25
|
select,
|
|
25
26
|
)
|
|
26
27
|
from sqlalchemy.sql import func as f
|
|
@@ -399,6 +400,7 @@ class AbstractMetastore(ABC, Serializable):
|
|
|
399
400
|
workers: int = 1,
|
|
400
401
|
python_version: Optional[str] = None,
|
|
401
402
|
params: Optional[dict[str, str]] = None,
|
|
403
|
+
parent_job_id: Optional[str] = None,
|
|
402
404
|
) -> str:
|
|
403
405
|
"""
|
|
404
406
|
Creates a new job.
|
|
@@ -443,6 +445,10 @@ class AbstractMetastore(ABC, Serializable):
|
|
|
443
445
|
def list_checkpoints(self, job_id: str, conn=None) -> Iterator["Checkpoint"]:
|
|
444
446
|
"""Returns all checkpoints related to some job"""
|
|
445
447
|
|
|
448
|
+
@abstractmethod
|
|
449
|
+
def get_last_checkpoint(self, job_id: str, conn=None) -> Optional[Checkpoint]:
|
|
450
|
+
"""Get last created checkpoint for some job."""
|
|
451
|
+
|
|
446
452
|
@abstractmethod
|
|
447
453
|
def get_checkpoint_by_id(self, checkpoint_id: str, conn=None) -> Checkpoint:
|
|
448
454
|
"""Gets single checkpoint by id"""
|
|
@@ -1548,6 +1554,7 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
1548
1554
|
Column("error_stack", Text, nullable=False, default=""),
|
|
1549
1555
|
Column("params", JSON, nullable=False),
|
|
1550
1556
|
Column("metrics", JSON, nullable=False),
|
|
1557
|
+
Column("parent_job_id", Text, nullable=True),
|
|
1551
1558
|
]
|
|
1552
1559
|
|
|
1553
1560
|
@cached_property
|
|
@@ -1595,6 +1602,7 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
1595
1602
|
workers: int = 1,
|
|
1596
1603
|
python_version: Optional[str] = None,
|
|
1597
1604
|
params: Optional[dict[str, str]] = None,
|
|
1605
|
+
parent_job_id: Optional[str] = None,
|
|
1598
1606
|
conn: Optional[Any] = None,
|
|
1599
1607
|
) -> str:
|
|
1600
1608
|
"""
|
|
@@ -1616,6 +1624,7 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
1616
1624
|
error_stack="",
|
|
1617
1625
|
params=json.dumps(params or {}),
|
|
1618
1626
|
metrics=json.dumps({}),
|
|
1627
|
+
parent_job_id=parent_job_id,
|
|
1619
1628
|
),
|
|
1620
1629
|
conn=conn,
|
|
1621
1630
|
)
|
|
@@ -1770,7 +1779,7 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
1770
1779
|
)
|
|
1771
1780
|
return self.get_checkpoint_by_id(checkpoint_id)
|
|
1772
1781
|
|
|
1773
|
-
def list_checkpoints(self, job_id: str, conn=None) -> Iterator[
|
|
1782
|
+
def list_checkpoints(self, job_id: str, conn=None) -> Iterator[Checkpoint]:
|
|
1774
1783
|
"""List checkpoints by job id."""
|
|
1775
1784
|
query = self._checkpoints_query().where(self._checkpoints.c.job_id == job_id)
|
|
1776
1785
|
rows = list(self.db.execute(query, conn=conn))
|
|
@@ -1800,3 +1809,15 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
1800
1809
|
if not rows:
|
|
1801
1810
|
return None
|
|
1802
1811
|
return self.checkpoint_class.parse(*rows[0])
|
|
1812
|
+
|
|
1813
|
+
def get_last_checkpoint(self, job_id: str, conn=None) -> Optional[Checkpoint]:
|
|
1814
|
+
query = (
|
|
1815
|
+
self._checkpoints_query()
|
|
1816
|
+
.where(self._checkpoints.c.job_id == job_id)
|
|
1817
|
+
.order_by(desc(self._checkpoints.c.created_at))
|
|
1818
|
+
.limit(1)
|
|
1819
|
+
)
|
|
1820
|
+
rows = list(self.db.execute(query, conn=conn))
|
|
1821
|
+
if not rows:
|
|
1822
|
+
return None
|
|
1823
|
+
return self.checkpoint_class.parse(*rows[0])
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
import random
|
|
2
|
-
import string
|
|
3
1
|
from collections.abc import Sequence
|
|
4
2
|
from enum import Enum
|
|
5
3
|
from typing import TYPE_CHECKING, Optional, Union
|
|
@@ -11,16 +9,12 @@ from datachain.query.schema import Column
|
|
|
11
9
|
if TYPE_CHECKING:
|
|
12
10
|
from datachain.lib.dc import DataChain
|
|
13
11
|
|
|
14
|
-
|
|
15
12
|
C = Column
|
|
16
13
|
|
|
17
14
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
random.choice(string.ascii_letters) # noqa: S311
|
|
22
|
-
for _ in range(10)
|
|
23
|
-
)
|
|
15
|
+
STATUS_COL_NAME = "diff_7aeed3aa17ba4d50b8d1c368c76e16a6"
|
|
16
|
+
LEFT_DIFF_COL_NAME = "diff_95f95344064a4b819c8625cd1a5cfc2b"
|
|
17
|
+
RIGHT_DIFF_COL_NAME = "diff_5808838a49b54849aa461d7387376d34"
|
|
24
18
|
|
|
25
19
|
|
|
26
20
|
class CompareStatus(str, Enum):
|
|
@@ -101,9 +95,9 @@ def _compare( # noqa: C901, PLR0912
|
|
|
101
95
|
compare = right_compare = [c for c in cols if c in right_cols and c not in on] # type: ignore[misc]
|
|
102
96
|
|
|
103
97
|
# get diff column names
|
|
104
|
-
diff_col = status_col or
|
|
105
|
-
ldiff_col =
|
|
106
|
-
rdiff_col =
|
|
98
|
+
diff_col = status_col or STATUS_COL_NAME
|
|
99
|
+
ldiff_col = LEFT_DIFF_COL_NAME
|
|
100
|
+
rdiff_col = RIGHT_DIFF_COL_NAME
|
|
107
101
|
|
|
108
102
|
# adding helper diff columns, which will be removed after
|
|
109
103
|
left = left.mutate(**{ldiff_col: 1})
|
|
@@ -227,7 +221,7 @@ def compare_and_split(
|
|
|
227
221
|
)
|
|
228
222
|
```
|
|
229
223
|
"""
|
|
230
|
-
status_col =
|
|
224
|
+
status_col = STATUS_COL_NAME
|
|
231
225
|
|
|
232
226
|
res = _compare(
|
|
233
227
|
left,
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import inspect
|
|
3
|
+
import json
|
|
4
|
+
import textwrap
|
|
5
|
+
from collections.abc import Sequence
|
|
6
|
+
from typing import TypeVar, Union
|
|
7
|
+
|
|
8
|
+
from sqlalchemy.sql.elements import (
|
|
9
|
+
BinaryExpression,
|
|
10
|
+
BindParameter,
|
|
11
|
+
ColumnElement,
|
|
12
|
+
Label,
|
|
13
|
+
Over,
|
|
14
|
+
UnaryExpression,
|
|
15
|
+
)
|
|
16
|
+
from sqlalchemy.sql.functions import Function
|
|
17
|
+
|
|
18
|
+
T = TypeVar("T", bound=ColumnElement)
|
|
19
|
+
ColumnLike = Union[str, T]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def serialize_column_element(expr: Union[str, ColumnElement]) -> dict: # noqa: PLR0911
|
|
23
|
+
"""
|
|
24
|
+
Recursively serialize a SQLAlchemy ColumnElement into a deterministic structure.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
# Binary operations: col > 5, col1 + col2, etc.
|
|
28
|
+
if isinstance(expr, BinaryExpression):
|
|
29
|
+
op = (
|
|
30
|
+
expr.operator.__name__
|
|
31
|
+
if hasattr(expr.operator, "__name__")
|
|
32
|
+
else str(expr.operator)
|
|
33
|
+
)
|
|
34
|
+
return {
|
|
35
|
+
"type": "binary",
|
|
36
|
+
"op": op,
|
|
37
|
+
"left": serialize_column_element(expr.left),
|
|
38
|
+
"right": serialize_column_element(expr.right),
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
# Unary operations: -col, NOT col, etc.
|
|
42
|
+
if isinstance(expr, UnaryExpression):
|
|
43
|
+
op = (
|
|
44
|
+
expr.operator.__name__
|
|
45
|
+
if expr.operator is not None and hasattr(expr.operator, "__name__")
|
|
46
|
+
else str(expr.operator)
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
return {
|
|
50
|
+
"type": "unary",
|
|
51
|
+
"op": op,
|
|
52
|
+
"element": serialize_column_element(expr.element), # type: ignore[arg-type]
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
# Function calls: func.lower(col), func.count(col), etc.
|
|
56
|
+
if isinstance(expr, Function):
|
|
57
|
+
return {
|
|
58
|
+
"type": "function",
|
|
59
|
+
"name": expr.name,
|
|
60
|
+
"clauses": [serialize_column_element(c) for c in expr.clauses],
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
# Window functions: func.row_number().over(partition_by=..., order_by=...)
|
|
64
|
+
if isinstance(expr, Over):
|
|
65
|
+
return {
|
|
66
|
+
"type": "window",
|
|
67
|
+
"function": serialize_column_element(expr.element),
|
|
68
|
+
"partition_by": [
|
|
69
|
+
serialize_column_element(p) for p in getattr(expr, "partition_by", [])
|
|
70
|
+
],
|
|
71
|
+
"order_by": [
|
|
72
|
+
serialize_column_element(o) for o in getattr(expr, "order_by", [])
|
|
73
|
+
],
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
# Labeled expressions: col.label("alias")
|
|
77
|
+
if isinstance(expr, Label):
|
|
78
|
+
return {
|
|
79
|
+
"type": "label",
|
|
80
|
+
"name": expr.name,
|
|
81
|
+
"element": serialize_column_element(expr.element),
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
# Bound values (constants)
|
|
85
|
+
if isinstance(expr, BindParameter):
|
|
86
|
+
return {"type": "bind", "value": expr.value}
|
|
87
|
+
|
|
88
|
+
# Plain columns
|
|
89
|
+
if hasattr(expr, "name"):
|
|
90
|
+
return {"type": "column", "name": expr.name}
|
|
91
|
+
|
|
92
|
+
# Fallback: stringify unknown nodes
|
|
93
|
+
return {"type": "other", "repr": str(expr)}
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def hash_column_elements(columns: Sequence[ColumnLike]) -> str:
|
|
97
|
+
"""
|
|
98
|
+
Hash a list of ColumnElements deterministically, dialect agnostic.
|
|
99
|
+
Only accepts ordered iterables (like list or tuple).
|
|
100
|
+
"""
|
|
101
|
+
serialized = [serialize_column_element(c) for c in columns]
|
|
102
|
+
json_str = json.dumps(serialized, sort_keys=True) # stable JSON
|
|
103
|
+
return hashlib.sha256(json_str.encode("utf-8")).hexdigest()
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def hash_callable(func):
|
|
107
|
+
"""
|
|
108
|
+
Calculate a hash from a callable.
|
|
109
|
+
Rules:
|
|
110
|
+
- Named functions (def) → use source code for stable, cross-version hashing
|
|
111
|
+
- Lambdas → use bytecode (deterministic in same Python runtime)
|
|
112
|
+
"""
|
|
113
|
+
if not callable(func):
|
|
114
|
+
raise TypeError("Expected a callable")
|
|
115
|
+
|
|
116
|
+
# Determine if it is a lambda
|
|
117
|
+
is_lambda = func.__name__ == "<lambda>"
|
|
118
|
+
|
|
119
|
+
if not is_lambda:
|
|
120
|
+
# Try to get exact source of named function
|
|
121
|
+
try:
|
|
122
|
+
lines, _ = inspect.getsourcelines(func)
|
|
123
|
+
payload = textwrap.dedent("".join(lines)).strip()
|
|
124
|
+
except (OSError, TypeError):
|
|
125
|
+
# Fallback: bytecode if source not available
|
|
126
|
+
payload = func.__code__.co_code
|
|
127
|
+
else:
|
|
128
|
+
# For lambdas, fall back directly to bytecode
|
|
129
|
+
payload = func.__code__.co_code
|
|
130
|
+
|
|
131
|
+
# Normalize annotations
|
|
132
|
+
annotations = {
|
|
133
|
+
k: getattr(v, "__name__", str(v)) for k, v in func.__annotations__.items()
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
# Extras to distinguish functions with same code but different metadata
|
|
137
|
+
extras = {
|
|
138
|
+
"name": func.__name__,
|
|
139
|
+
"defaults": func.__defaults__,
|
|
140
|
+
"annotations": annotations,
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
# Compute SHA256
|
|
144
|
+
h = hashlib.sha256()
|
|
145
|
+
h.update(str(payload).encode() if isinstance(payload, str) else payload)
|
|
146
|
+
h.update(str(extras).encode())
|
|
147
|
+
return h.hexdigest()
|
|
@@ -22,6 +22,7 @@ class Job:
|
|
|
22
22
|
python_version: Optional[str] = None
|
|
23
23
|
error_message: str = ""
|
|
24
24
|
error_stack: str = ""
|
|
25
|
+
parent_job_id: Optional[str] = None
|
|
25
26
|
|
|
26
27
|
@classmethod
|
|
27
28
|
def parse(
|
|
@@ -39,6 +40,7 @@ class Job:
|
|
|
39
40
|
error_stack: str,
|
|
40
41
|
params: str,
|
|
41
42
|
metrics: str,
|
|
43
|
+
parent_job_id: Optional[str],
|
|
42
44
|
) -> "Job":
|
|
43
45
|
return cls(
|
|
44
46
|
str(id),
|
|
@@ -54,4 +56,5 @@ class Job:
|
|
|
54
56
|
python_version,
|
|
55
57
|
error_message,
|
|
56
58
|
error_stack,
|
|
59
|
+
parent_job_id,
|
|
57
60
|
)
|