datachain 0.16.3__tar.gz → 0.16.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.16.3 → datachain-0.16.5}/.github/workflows/benchmarks.yml +4 -1
- {datachain-0.16.3 → datachain-0.16.5}/.github/workflows/release.yml +1 -1
- {datachain-0.16.3 → datachain-0.16.5}/.github/workflows/tests-studio.yml +2 -1
- {datachain-0.16.3 → datachain-0.16.5}/.github/workflows/tests.yml +12 -3
- {datachain-0.16.3 → datachain-0.16.5}/.pre-commit-config.yaml +1 -1
- {datachain-0.16.3/src/datachain.egg-info → datachain-0.16.5}/PKG-INFO +1 -1
- {datachain-0.16.3 → datachain-0.16.5}/docs/commands/job/run.md +5 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/catalog/catalog.py +5 -1
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/cli/__init__.py +11 -9
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/cli/commands/query.py +1 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/cli/parser/__init__.py +9 -1
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/cli/parser/job.py +6 -1
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/data_storage/job.py +1 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/data_storage/metastore.py +82 -71
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/data_storage/warehouse.py +46 -34
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/arrow.py +23 -1
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/dc/csv.py +1 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/dc/datachain.py +30 -13
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/listing.py +2 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/udf.py +17 -5
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/query/batch.py +40 -39
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/query/dataset.py +33 -32
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/query/dispatch.py +137 -75
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/query/metrics.py +1 -2
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/query/queue.py +1 -11
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/query/udf.py +1 -1
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/query/utils.py +8 -14
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/remote/studio.py +2 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/studio.py +3 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/utils.py +3 -0
- {datachain-0.16.3 → datachain-0.16.5/src/datachain.egg-info}/PKG-INFO +1 -1
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain.egg-info/SOURCES.txt +2 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/conftest.py +35 -8
- datachain-0.16.5/tests/func/test_batching.py +243 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_datachain.py +35 -75
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_metrics.py +11 -2
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_query.py +13 -10
- {datachain-0.16.3 → datachain-0.16.5}/tests/test_cli_studio.py +3 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/test_query_e2e.py +3 -2
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_datachain.py +6 -2
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_datachain_bootstrap.py +3 -1
- datachain-0.16.5/tests/unit/lib/test_udf.py +36 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_dispatch.py +1 -15
- {datachain-0.16.3 → datachain-0.16.5}/.cruft.json +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/.gitattributes +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/.github/codecov.yaml +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/.github/dependabot.yml +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/.gitignore +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/LICENSE +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/README.rst +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/docs/assets/datachain.svg +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/docs/commands/auth/login.md +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/docs/commands/auth/logout.md +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/docs/commands/auth/team.md +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/docs/commands/auth/token.md +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/docs/commands/index.md +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/docs/commands/job/cancel.md +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/docs/commands/job/logs.md +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/docs/contributing.md +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/docs/examples.md +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/docs/index.md +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/docs/overrides/main.html +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/docs/quick-start.md +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/docs/references/data-types/file.md +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/docs/references/data-types/index.md +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/docs/references/data-types/pose.md +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/docs/references/data-types/segment.md +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/docs/references/datachain.md +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/docs/references/func.md +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/docs/references/index.md +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/docs/references/remotes.md +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/docs/references/toolkit.md +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/docs/references/torch.md +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/docs/references/udf.md +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/docs/tutorials.md +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/examples/multimodal/wds.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/mkdocs.yml +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/noxfile.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/pyproject.toml +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/setup.cfg +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/__init__.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/__main__.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/asyn.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/cache.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/cli/utils.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/client/__init__.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/client/azure.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/client/gcs.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/client/hf.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/client/local.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/client/s3.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/config.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/dataset.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/error.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/fs/reference.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/fs/utils.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/func/__init__.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/func/array.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/func/base.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/func/conditional.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/func/func.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/func/numeric.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/func/path.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/func/random.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/func/string.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/func/window.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/job.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/clip.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/dc/__init__.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/dc/database.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/dc/datasets.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/dc/records.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/dc/storage.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/file.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/hf.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/image.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/settings.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/tar.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/text.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/utils.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/video.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/listing.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/model/__init__.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/model/bbox.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/model/pose.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/model/segment.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/model/utils.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/node.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/progress.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/py.typed +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/query/__init__.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/query/params.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/query/schema.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/query/session.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/script_meta.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/types.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/utils.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/telemetry.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain.egg-info/requires.txt +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/__init__.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/data.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/examples/__init__.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/examples/test_examples.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/examples/wds_data.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/__init__.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/data/lena.jpg +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/model/__init__.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_catalog.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_client.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_data_storage.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_datasets.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_file.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_func.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_hf.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_image.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_listing.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_ls.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_pull.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_pytorch.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_read_database.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_session.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_toolkit.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_video.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_warehouse.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/scripts/feature_class.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/test_atomicity.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/test_cli_e2e.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/test_import_time.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/test_telemetry.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/__init__.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/model/__init__.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_asyn.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_cache.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_catalog.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_client.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_config.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_dataset.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_func.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_listing.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_metastore.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_query.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_query_params.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_serializer.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_session.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_utils.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.16.3 → datachain-0.16.5}/tests/utils.py +0 -0
|
@@ -25,11 +25,14 @@ jobs:
|
|
|
25
25
|
python-version: '3.13'
|
|
26
26
|
|
|
27
27
|
- name: Setup uv
|
|
28
|
-
uses: astral-sh/setup-uv@
|
|
28
|
+
uses: astral-sh/setup-uv@v6
|
|
29
29
|
with:
|
|
30
30
|
enable-cache: true
|
|
31
31
|
cache-suffix: benchmarks
|
|
32
32
|
cache-dependency-glob: pyproject.toml
|
|
33
|
+
# revert after this is fixed
|
|
34
|
+
# https://github.com/wntrblm/nox/issues/953
|
|
35
|
+
version: ">=0.6,<0.7"
|
|
33
36
|
|
|
34
37
|
- name: Install nox and dvc
|
|
35
38
|
run: uv pip install dvc[gs] nox --system
|
|
@@ -84,7 +84,7 @@ jobs:
|
|
|
84
84
|
python-version: ${{ matrix.pyv }}
|
|
85
85
|
|
|
86
86
|
- name: Setup uv
|
|
87
|
-
uses: astral-sh/setup-uv@
|
|
87
|
+
uses: astral-sh/setup-uv@v6
|
|
88
88
|
with:
|
|
89
89
|
enable-cache: true
|
|
90
90
|
cache-suffix: studio
|
|
@@ -98,6 +98,7 @@ jobs:
|
|
|
98
98
|
- name: Run tests
|
|
99
99
|
# Generate `.test_durations` file with `pytest --store-durations --durations-path ../.github/.test_durations ...`
|
|
100
100
|
run: >
|
|
101
|
+
PYTHONPATH="$(pwd)/..:${PYTHONPATH}"
|
|
101
102
|
pytest
|
|
102
103
|
--config-file=pyproject.toml -rs
|
|
103
104
|
--splits=6 --group=${{ matrix.group }} --durations-path=../../.github/.test_durations
|
|
@@ -29,11 +29,14 @@ jobs:
|
|
|
29
29
|
python-version: '3.9'
|
|
30
30
|
|
|
31
31
|
- name: Setup uv
|
|
32
|
-
uses: astral-sh/setup-uv@
|
|
32
|
+
uses: astral-sh/setup-uv@v6
|
|
33
33
|
with:
|
|
34
34
|
enable-cache: true
|
|
35
35
|
cache-suffix: lint
|
|
36
36
|
cache-dependency-glob: pyproject.toml
|
|
37
|
+
# revert after this is fixed
|
|
38
|
+
# https://github.com/wntrblm/nox/issues/953
|
|
39
|
+
version: ">=0.6,<0.7"
|
|
37
40
|
|
|
38
41
|
- name: Install nox
|
|
39
42
|
run: uv pip install nox --system
|
|
@@ -87,11 +90,14 @@ jobs:
|
|
|
87
90
|
python-version: ${{ matrix.pyv }}
|
|
88
91
|
|
|
89
92
|
- name: Setup uv
|
|
90
|
-
uses: astral-sh/setup-uv@
|
|
93
|
+
uses: astral-sh/setup-uv@v6
|
|
91
94
|
with:
|
|
92
95
|
enable-cache: true
|
|
93
96
|
cache-suffix: tests-${{ matrix.pyv }}
|
|
94
97
|
cache-dependency-glob: pyproject.toml
|
|
98
|
+
# revert after this is fixed
|
|
99
|
+
# https://github.com/wntrblm/nox/issues/953
|
|
100
|
+
version: ">=0.6,<0.7"
|
|
95
101
|
|
|
96
102
|
- name: Install nox
|
|
97
103
|
run: uv pip install nox --system
|
|
@@ -154,11 +160,14 @@ jobs:
|
|
|
154
160
|
python-version: ${{ matrix.pyv }}
|
|
155
161
|
|
|
156
162
|
- name: Setup uv
|
|
157
|
-
uses: astral-sh/setup-uv@
|
|
163
|
+
uses: astral-sh/setup-uv@v6
|
|
158
164
|
with:
|
|
159
165
|
enable-cache: true
|
|
160
166
|
cache-suffix: examples-${{ matrix.pyv }}
|
|
161
167
|
cache-dependency-glob: pyproject.toml
|
|
168
|
+
# revert after this is fixed
|
|
169
|
+
# https://github.com/wntrblm/nox/issues/953
|
|
170
|
+
version: ">=0.6,<0.7"
|
|
162
171
|
|
|
163
172
|
- name: Install nox
|
|
164
173
|
run: uv pip install nox --system
|
|
@@ -60,6 +60,11 @@ datachain job run --workers 4 --files utils.py config.json query.py
|
|
|
60
60
|
datachain job run --env API_KEY=123 --req pandas numpy query.py
|
|
61
61
|
```
|
|
62
62
|
|
|
63
|
+
6. Run a job with a repository (will be cloned in the job working directory):
|
|
64
|
+
```bash
|
|
65
|
+
datachain job run --repository https://github.com/iterative/datachain query.py
|
|
66
|
+
```
|
|
67
|
+
|
|
63
68
|
## Notes
|
|
64
69
|
|
|
65
70
|
* Closing the logs command (e.g., with Ctrl+C) will only stop displaying the logs but will not cancel the job execution
|
|
@@ -79,6 +79,7 @@ DATASET_INTERNAL_ERROR_MESSAGE = "Internal error on creating dataset"
|
|
|
79
79
|
QUERY_SCRIPT_INVALID_LAST_STATEMENT_EXIT_CODE = 10
|
|
80
80
|
# exit code we use if query script was canceled
|
|
81
81
|
QUERY_SCRIPT_CANCELED_EXIT_CODE = 11
|
|
82
|
+
QUERY_SCRIPT_SIGTERM_EXIT_CODE = -15 # if query script was terminated by SIGTERM
|
|
82
83
|
|
|
83
84
|
# dataset pull
|
|
84
85
|
PULL_DATASET_MAX_THREADS = 5
|
|
@@ -1645,7 +1646,10 @@ class Catalog:
|
|
|
1645
1646
|
thread.join() # wait for the reader thread
|
|
1646
1647
|
|
|
1647
1648
|
logger.info("Process %s exited with return code %s", proc.pid, proc.returncode)
|
|
1648
|
-
if proc.returncode
|
|
1649
|
+
if proc.returncode in (
|
|
1650
|
+
QUERY_SCRIPT_CANCELED_EXIT_CODE,
|
|
1651
|
+
QUERY_SCRIPT_SIGTERM_EXIT_CODE,
|
|
1652
|
+
):
|
|
1649
1653
|
raise QueryScriptCancelError(
|
|
1650
1654
|
"Query script was canceled by user",
|
|
1651
1655
|
return_code=proc.returncode,
|
|
@@ -34,8 +34,10 @@ def main(argv: Optional[list[str]] = None) -> int:
|
|
|
34
34
|
datachain_parser = get_parser()
|
|
35
35
|
args = datachain_parser.parse_args(argv)
|
|
36
36
|
|
|
37
|
-
if args.command
|
|
38
|
-
return handle_udf(
|
|
37
|
+
if args.command == "internal-run-udf":
|
|
38
|
+
return handle_udf()
|
|
39
|
+
if args.command == "internal-run-udf-worker":
|
|
40
|
+
return handle_udf_runner(args.fd)
|
|
39
41
|
|
|
40
42
|
if args.command is None:
|
|
41
43
|
datachain_parser.print_help(sys.stderr)
|
|
@@ -303,13 +305,13 @@ def handle_general_exception(exc, args, logging_level):
|
|
|
303
305
|
return error, 1
|
|
304
306
|
|
|
305
307
|
|
|
306
|
-
def handle_udf(
|
|
307
|
-
|
|
308
|
-
from datachain.query.dispatch import udf_entrypoint
|
|
308
|
+
def handle_udf() -> int:
|
|
309
|
+
from datachain.query.dispatch import udf_entrypoint
|
|
309
310
|
|
|
310
|
-
|
|
311
|
+
return udf_entrypoint()
|
|
311
312
|
|
|
312
|
-
if command == "internal-run-udf-worker":
|
|
313
|
-
from datachain.query.dispatch import udf_worker_entrypoint
|
|
314
313
|
|
|
315
|
-
|
|
314
|
+
def handle_udf_runner(fd: Optional[int] = None) -> int:
|
|
315
|
+
from datachain.query.dispatch import udf_worker_entrypoint
|
|
316
|
+
|
|
317
|
+
return udf_worker_entrypoint(fd)
|
|
@@ -549,7 +549,15 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
|
|
|
549
549
|
add_anon_arg(parse_gc)
|
|
550
550
|
|
|
551
551
|
subp.add_parser("internal-run-udf", parents=[parent_parser])
|
|
552
|
-
subp.add_parser("internal-run-udf-worker", parents=[parent_parser])
|
|
552
|
+
run_udf_worker = subp.add_parser("internal-run-udf-worker", parents=[parent_parser])
|
|
553
|
+
run_udf_worker.add_argument(
|
|
554
|
+
"--fd",
|
|
555
|
+
type=int,
|
|
556
|
+
action="store",
|
|
557
|
+
default=None,
|
|
558
|
+
help="File descriptor to write results to",
|
|
559
|
+
)
|
|
560
|
+
|
|
553
561
|
add_completion_parser(subp, [parent_parser])
|
|
554
562
|
return parser
|
|
555
563
|
|
|
@@ -13,7 +13,7 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
|
|
|
13
13
|
)
|
|
14
14
|
jobs_subparser = jobs_parser.add_subparsers(
|
|
15
15
|
dest="cmd",
|
|
16
|
-
help="Use `datachain
|
|
16
|
+
help="Use `datachain job CMD --help` to display command-specific help",
|
|
17
17
|
)
|
|
18
18
|
|
|
19
19
|
studio_run_help = "Run a job in Studio"
|
|
@@ -66,6 +66,11 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
|
|
|
66
66
|
action="store",
|
|
67
67
|
help="Python version for the job (e.g., 3.9, 3.10, 3.11)",
|
|
68
68
|
)
|
|
69
|
+
studio_run_parser.add_argument(
|
|
70
|
+
"--repository",
|
|
71
|
+
action="store",
|
|
72
|
+
help="Repository URL to clone before running the job",
|
|
73
|
+
)
|
|
69
74
|
studio_run_parser.add_argument(
|
|
70
75
|
"--req-file",
|
|
71
76
|
action="store",
|
|
@@ -254,6 +254,7 @@ class AbstractMetastore(ABC, Serializable):
|
|
|
254
254
|
name: str,
|
|
255
255
|
query: str,
|
|
256
256
|
query_type: JobQueryType = JobQueryType.PYTHON,
|
|
257
|
+
status: JobStatus = JobStatus.CREATED,
|
|
257
258
|
workers: int = 1,
|
|
258
259
|
python_version: Optional[str] = None,
|
|
259
260
|
params: Optional[dict[str, str]] = None,
|
|
@@ -264,33 +265,35 @@ class AbstractMetastore(ABC, Serializable):
|
|
|
264
265
|
"""
|
|
265
266
|
|
|
266
267
|
@abstractmethod
|
|
267
|
-
def
|
|
268
|
+
def get_job(self, job_id: str) -> Optional[Job]:
|
|
269
|
+
"""Returns the job with the given ID."""
|
|
270
|
+
|
|
271
|
+
@abstractmethod
|
|
272
|
+
def update_job(
|
|
268
273
|
self,
|
|
269
274
|
job_id: str,
|
|
270
|
-
status: JobStatus,
|
|
275
|
+
status: Optional[JobStatus] = None,
|
|
276
|
+
exit_code: Optional[int] = None,
|
|
271
277
|
error_message: Optional[str] = None,
|
|
272
278
|
error_stack: Optional[str] = None,
|
|
279
|
+
finished_at: Optional[datetime] = None,
|
|
273
280
|
metrics: Optional[dict[str, Any]] = None,
|
|
274
|
-
) ->
|
|
275
|
-
"""
|
|
281
|
+
) -> Optional["Job"]:
|
|
282
|
+
"""Updates job fields."""
|
|
276
283
|
|
|
277
284
|
@abstractmethod
|
|
278
|
-
def
|
|
279
|
-
"""Returns the status of the given job."""
|
|
280
|
-
|
|
281
|
-
@abstractmethod
|
|
282
|
-
def set_job_and_dataset_status(
|
|
285
|
+
def set_job_status(
|
|
283
286
|
self,
|
|
284
287
|
job_id: str,
|
|
285
|
-
|
|
286
|
-
|
|
288
|
+
status: JobStatus,
|
|
289
|
+
error_message: Optional[str] = None,
|
|
290
|
+
error_stack: Optional[str] = None,
|
|
287
291
|
) -> None:
|
|
288
|
-
"""Set the status of the given job
|
|
292
|
+
"""Set the status of the given job."""
|
|
289
293
|
|
|
290
294
|
@abstractmethod
|
|
291
|
-
def
|
|
292
|
-
"""Returns
|
|
293
|
-
raise NotImplementedError
|
|
295
|
+
def get_job_status(self, job_id: str) -> Optional[JobStatus]:
|
|
296
|
+
"""Returns the status of the given job."""
|
|
294
297
|
|
|
295
298
|
|
|
296
299
|
class AbstractDBMetastore(AbstractMetastore):
|
|
@@ -651,30 +654,31 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
651
654
|
dataset_version = dataset.get_version(version)
|
|
652
655
|
|
|
653
656
|
values = {}
|
|
657
|
+
version_values: dict = {}
|
|
654
658
|
for field, value in kwargs.items():
|
|
655
659
|
if field in self._dataset_version_fields[1:]:
|
|
656
660
|
if field == "schema":
|
|
657
|
-
dataset_version.update(**{field: DatasetRecord.parse_schema(value)})
|
|
658
661
|
values[field] = json.dumps(value) if value else None
|
|
662
|
+
version_values[field] = DatasetRecord.parse_schema(value)
|
|
659
663
|
elif field == "feature_schema":
|
|
660
664
|
values[field] = json.dumps(value) if value else None
|
|
665
|
+
version_values[field] = value
|
|
661
666
|
elif field == "preview" and isinstance(value, list):
|
|
662
667
|
values[field] = json.dumps(value, cls=JSONSerialize)
|
|
668
|
+
version_values[field] = value
|
|
663
669
|
else:
|
|
664
670
|
values[field] = value
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
if not values:
|
|
668
|
-
# Nothing to update
|
|
669
|
-
return dataset_version
|
|
671
|
+
version_values[field] = value
|
|
670
672
|
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
self.
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
673
|
+
if values:
|
|
674
|
+
dv = self._datasets_versions
|
|
675
|
+
self.db.execute(
|
|
676
|
+
self._datasets_versions_update()
|
|
677
|
+
.where(dv.c.dataset_id == dataset.id and dv.c.version == version)
|
|
678
|
+
.values(values),
|
|
679
|
+
conn=conn,
|
|
680
|
+
) # type: ignore [attr-defined]
|
|
681
|
+
dataset_version.update(**version_values)
|
|
678
682
|
|
|
679
683
|
return dataset_version
|
|
680
684
|
|
|
@@ -702,7 +706,7 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
702
706
|
dataset_fields: list[str],
|
|
703
707
|
dataset_version_fields: list[str],
|
|
704
708
|
isouter: bool = True,
|
|
705
|
-
):
|
|
709
|
+
) -> "Select":
|
|
706
710
|
if not (
|
|
707
711
|
self.db.has_table(self._datasets.name)
|
|
708
712
|
and self.db.has_table(self._datasets_versions.name)
|
|
@@ -719,12 +723,12 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
719
723
|
j = d.join(dv, d.c.id == dv.c.dataset_id, isouter=isouter)
|
|
720
724
|
return query.select_from(j)
|
|
721
725
|
|
|
722
|
-
def _base_dataset_query(self):
|
|
726
|
+
def _base_dataset_query(self) -> "Select":
|
|
723
727
|
return self._get_dataset_query(
|
|
724
728
|
self._dataset_fields, self._dataset_version_fields
|
|
725
729
|
)
|
|
726
730
|
|
|
727
|
-
def _base_list_datasets_query(self):
|
|
731
|
+
def _base_list_datasets_query(self) -> "Select":
|
|
728
732
|
return self._get_dataset_query(
|
|
729
733
|
self._dataset_list_fields, self._dataset_list_version_fields, isouter=False
|
|
730
734
|
)
|
|
@@ -1018,6 +1022,7 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
1018
1022
|
name: str,
|
|
1019
1023
|
query: str,
|
|
1020
1024
|
query_type: JobQueryType = JobQueryType.PYTHON,
|
|
1025
|
+
status: JobStatus = JobStatus.CREATED,
|
|
1021
1026
|
workers: int = 1,
|
|
1022
1027
|
python_version: Optional[str] = None,
|
|
1023
1028
|
params: Optional[dict[str, str]] = None,
|
|
@@ -1032,7 +1037,7 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
1032
1037
|
self._jobs_insert().values(
|
|
1033
1038
|
id=job_id,
|
|
1034
1039
|
name=name,
|
|
1035
|
-
status=
|
|
1040
|
+
status=status,
|
|
1036
1041
|
created_at=datetime.now(timezone.utc),
|
|
1037
1042
|
query=query,
|
|
1038
1043
|
query_type=query_type.value,
|
|
@@ -1047,25 +1052,65 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
1047
1052
|
)
|
|
1048
1053
|
return job_id
|
|
1049
1054
|
|
|
1055
|
+
def get_job(self, job_id: str, conn=None) -> Optional[Job]:
|
|
1056
|
+
"""Returns the job with the given ID."""
|
|
1057
|
+
query = self._jobs_select(self._jobs).where(self._jobs.c.id == job_id)
|
|
1058
|
+
results = list(self.db.execute(query, conn=conn))
|
|
1059
|
+
if not results:
|
|
1060
|
+
return None
|
|
1061
|
+
return self._parse_job(results[0])
|
|
1062
|
+
|
|
1063
|
+
def update_job(
|
|
1064
|
+
self,
|
|
1065
|
+
job_id: str,
|
|
1066
|
+
status: Optional[JobStatus] = None,
|
|
1067
|
+
exit_code: Optional[int] = None,
|
|
1068
|
+
error_message: Optional[str] = None,
|
|
1069
|
+
error_stack: Optional[str] = None,
|
|
1070
|
+
finished_at: Optional[datetime] = None,
|
|
1071
|
+
metrics: Optional[dict[str, Any]] = None,
|
|
1072
|
+
conn: Optional[Any] = None,
|
|
1073
|
+
) -> Optional["Job"]:
|
|
1074
|
+
"""Updates job fields."""
|
|
1075
|
+
values: dict = {}
|
|
1076
|
+
if status is not None:
|
|
1077
|
+
values["status"] = status
|
|
1078
|
+
if exit_code is not None:
|
|
1079
|
+
values["exit_code"] = exit_code
|
|
1080
|
+
if error_message is not None:
|
|
1081
|
+
values["error_message"] = error_message
|
|
1082
|
+
if error_stack is not None:
|
|
1083
|
+
values["error_stack"] = error_stack
|
|
1084
|
+
if finished_at is not None:
|
|
1085
|
+
values["finished_at"] = finished_at
|
|
1086
|
+
if metrics:
|
|
1087
|
+
values["metrics"] = json.dumps(metrics)
|
|
1088
|
+
|
|
1089
|
+
if values:
|
|
1090
|
+
j = self._jobs
|
|
1091
|
+
self.db.execute(
|
|
1092
|
+
self._jobs_update().where(j.c.id == job_id).values(**values),
|
|
1093
|
+
conn=conn,
|
|
1094
|
+
) # type: ignore [attr-defined]
|
|
1095
|
+
|
|
1096
|
+
return self.get_job(job_id, conn=conn)
|
|
1097
|
+
|
|
1050
1098
|
def set_job_status(
|
|
1051
1099
|
self,
|
|
1052
1100
|
job_id: str,
|
|
1053
1101
|
status: JobStatus,
|
|
1054
1102
|
error_message: Optional[str] = None,
|
|
1055
1103
|
error_stack: Optional[str] = None,
|
|
1056
|
-
metrics: Optional[dict[str, Any]] = None,
|
|
1057
1104
|
conn: Optional[Any] = None,
|
|
1058
1105
|
) -> None:
|
|
1059
1106
|
"""Set the status of the given job."""
|
|
1060
|
-
values: dict = {"status": status
|
|
1061
|
-
if status
|
|
1107
|
+
values: dict = {"status": status}
|
|
1108
|
+
if status in JobStatus.finished():
|
|
1062
1109
|
values["finished_at"] = datetime.now(timezone.utc)
|
|
1063
1110
|
if error_message:
|
|
1064
1111
|
values["error_message"] = error_message
|
|
1065
1112
|
if error_stack:
|
|
1066
1113
|
values["error_stack"] = error_stack
|
|
1067
|
-
if metrics:
|
|
1068
|
-
values["metrics"] = json.dumps(metrics)
|
|
1069
1114
|
self.db.execute(
|
|
1070
1115
|
self._jobs_update(self._jobs.c.id == job_id).values(**values),
|
|
1071
1116
|
conn=conn,
|
|
@@ -1086,37 +1131,3 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
1086
1131
|
if not results:
|
|
1087
1132
|
return None
|
|
1088
1133
|
return results[0][0]
|
|
1089
|
-
|
|
1090
|
-
def set_job_and_dataset_status(
|
|
1091
|
-
self,
|
|
1092
|
-
job_id: str,
|
|
1093
|
-
job_status: JobStatus,
|
|
1094
|
-
dataset_status: DatasetStatus,
|
|
1095
|
-
) -> None:
|
|
1096
|
-
"""Set the status of the given job and dataset."""
|
|
1097
|
-
with self.db.transaction() as conn:
|
|
1098
|
-
self.set_job_status(job_id, status=job_status, conn=conn)
|
|
1099
|
-
dv = self._datasets_versions
|
|
1100
|
-
query = (
|
|
1101
|
-
self._datasets_versions_update()
|
|
1102
|
-
.where(
|
|
1103
|
-
(dv.c.job_id == job_id) & (dv.c.status != DatasetStatus.COMPLETE)
|
|
1104
|
-
)
|
|
1105
|
-
.values(status=dataset_status)
|
|
1106
|
-
)
|
|
1107
|
-
self.db.execute(query, conn=conn) # type: ignore[attr-defined]
|
|
1108
|
-
|
|
1109
|
-
def get_job_dataset_versions(self, job_id: str) -> list[tuple[str, int]]:
|
|
1110
|
-
"""Returns dataset names and versions for the job."""
|
|
1111
|
-
dv = self._datasets_versions
|
|
1112
|
-
ds = self._datasets
|
|
1113
|
-
|
|
1114
|
-
join_condition = dv.c.dataset_id == ds.c.id
|
|
1115
|
-
|
|
1116
|
-
query = (
|
|
1117
|
-
self._datasets_versions_select(ds.c.name, dv.c.version)
|
|
1118
|
-
.select_from(dv.join(ds, join_condition))
|
|
1119
|
-
.where(dv.c.job_id == job_id)
|
|
1120
|
-
)
|
|
1121
|
-
|
|
1122
|
-
return list(self.db.execute(query))
|