datachain 0.26.3__tar.gz → 0.26.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.26.3 → datachain-0.26.4}/PKG-INFO +3 -1
- {datachain-0.26.3 → datachain-0.26.4}/docs/commands/job/run.md +57 -14
- {datachain-0.26.3 → datachain-0.26.4}/pyproject.toml +2 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/cli/parser/job.py +14 -1
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/dc/datachain.py +8 -4
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/dc/storage.py +3 -3
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/file.py +17 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/remote/studio.py +4 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/studio.py +43 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain.egg-info/PKG-INFO +3 -1
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain.egg-info/SOURCES.txt +1 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain.egg-info/requires.txt +2 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/test_image.py +28 -0
- datachain-0.26.4/tests/func/test_studio_datetime_parsing.py +107 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/test_cli_studio.py +47 -0
- {datachain-0.26.3 → datachain-0.26.4}/.cruft.json +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/.gitattributes +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/.github/codecov.yaml +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/.github/dependabot.yml +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/.github/workflows/release.yml +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/.github/workflows/tests.yml +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/.gitignore +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/.pre-commit-config.yaml +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/LICENSE +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/README.rst +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/assets/datachain.svg +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/commands/auth/login.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/commands/auth/logout.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/commands/auth/team.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/commands/auth/token.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/commands/index.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/commands/job/cancel.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/commands/job/clusters.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/commands/job/logs.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/commands/job/ls.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/contributing.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/examples.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/guide/db_migrations.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/guide/delta.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/guide/env.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/guide/index.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/guide/namespaces.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/guide/processing.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/guide/remotes.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/guide/retry.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/index.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/overrides/main.html +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/quick-start.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/references/data-types/file.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/references/data-types/index.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/references/data-types/pose.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/references/data-types/segment.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/references/datachain.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/references/func.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/references/index.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/references/toolkit.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/references/torch.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/references/udf.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/docs/tutorials.md +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/examples/incremental_processing/delta.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/examples/incremental_processing/retry.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/examples/incremental_processing/utils.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/examples/multimodal/audio-to-text.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/examples/multimodal/wds.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/mkdocs.yml +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/noxfile.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/setup.cfg +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/__init__.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/__main__.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/asyn.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/cache.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/catalog/catalog.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/cli/utils.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/client/__init__.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/client/azure.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/client/gcs.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/client/hf.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/client/local.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/client/s3.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/config.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/dataset.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/delta.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/error.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/fs/reference.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/fs/utils.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/func/__init__.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/func/array.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/func/base.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/func/conditional.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/func/func.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/func/numeric.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/func/path.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/func/random.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/func/string.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/func/window.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/job.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/audio.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/clip.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/dc/__init__.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/dc/database.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/dc/datasets.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/dc/records.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/hf.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/image.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/listing.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/namespaces.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/projects.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/settings.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/tar.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/text.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/udf.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/utils.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/video.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/listing.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/model/__init__.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/model/bbox.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/model/pose.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/model/segment.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/model/utils.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/namespace.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/node.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/progress.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/project.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/py.typed +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/query/__init__.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/query/batch.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/query/dataset.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/query/metrics.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/query/params.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/query/queue.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/query/schema.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/query/session.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/query/udf.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/query/utils.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/script_meta.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/semver.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/sql/types.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/sql/utils.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/telemetry.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain/utils.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/__init__.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/conftest.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/data.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/examples/__init__.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/examples/test_examples.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/examples/wds_data.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/__init__.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/data/lena.jpg +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/functions/__init__.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/functions/test_aggregate.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/functions/test_array.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/functions/test_conditional.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/functions/test_numeric.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/functions/test_path.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/functions/test_random.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/functions/test_string.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/model/__init__.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/test_audio.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/test_batching.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/test_catalog.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/test_client.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/test_data_storage.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/test_datachain.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/test_datasets.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/test_delta.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/test_file.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/test_hf.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/test_listing.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/test_ls.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/test_metastore.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/test_metrics.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/test_pull.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/test_pytorch.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/test_query.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/test_read_database.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/test_read_dataset_remote.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/test_read_dataset_version_specifiers.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/test_retry.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/test_session.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/test_toolkit.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/test_video.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/func/test_warehouse.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/scripts/feature_class.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/test_atomicity.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/test_cli_e2e.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/test_import_time.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/test_query_e2e.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/test_telemetry.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/__init__.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/lib/test_audio.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/lib/test_datachain.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/lib/test_namespace.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/lib/test_partition_by.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/lib/test_project.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/lib/test_udf.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/model/__init__.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/test_asyn.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/test_cache.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/test_catalog.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/test_client.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/test_config.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/test_dataset.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/test_func.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/test_listing.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/test_metastore.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/test_query.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/test_query_params.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/test_semver.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/test_serializer.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/test_session.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/test_utils.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.26.3 → datachain-0.26.4}/tests/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.26.
|
|
3
|
+
Version: 0.26.4
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -26,6 +26,7 @@ Requires-Dist: packaging
|
|
|
26
26
|
Requires-Dist: pyarrow
|
|
27
27
|
Requires-Dist: typing-extensions
|
|
28
28
|
Requires-Dist: python-dateutil>=2
|
|
29
|
+
Requires-Dist: dateparser>=1.0.0
|
|
29
30
|
Requires-Dist: attrs>=21.3.0
|
|
30
31
|
Requires-Dist: fsspec>=2024.2.0
|
|
31
32
|
Requires-Dist: s3fs>=2024.2.0
|
|
@@ -100,6 +101,7 @@ Provides-Extra: dev
|
|
|
100
101
|
Requires-Dist: datachain[docs,tests]; extra == "dev"
|
|
101
102
|
Requires-Dist: mypy==1.17.0; extra == "dev"
|
|
102
103
|
Requires-Dist: types-python-dateutil; extra == "dev"
|
|
104
|
+
Requires-Dist: types-dateparser; extra == "dev"
|
|
103
105
|
Requires-Dist: types-pytz; extra == "dev"
|
|
104
106
|
Requires-Dist: types-PyYAML; extra == "dev"
|
|
105
107
|
Requires-Dist: types-requests; extra == "dev"
|
|
@@ -5,15 +5,15 @@ Run a job in Studio.
|
|
|
5
5
|
## Synopsis
|
|
6
6
|
|
|
7
7
|
```usage
|
|
8
|
-
usage: datachain job run [-h] [-v] [-q] [--team TEAM] [--env-file ENV_FILE] [--env ENV [ENV ...]]
|
|
9
|
-
[--
|
|
10
|
-
[--
|
|
8
|
+
usage: datachain job run [-h] [-v] [-q] [--team TEAM] [--env-file ENV_FILE] [--env ENV [ENV ...]] [--cluster CLUSTER] [--workers WORKERS]
|
|
9
|
+
[--files FILES [FILES ...]] [--python-version PYTHON_VERSION] [--repository REPOSITORY] [--req-file REQ_FILE] [--req REQ [REQ ...]]
|
|
10
|
+
[--priority PRIORITY] [--start-time START_TIME] [--cron CRON]
|
|
11
11
|
file
|
|
12
12
|
```
|
|
13
13
|
|
|
14
14
|
## Description
|
|
15
15
|
|
|
16
|
-
This command runs a job in Studio using the specified query file. You can configure various aspects of the job including environment variables, Python version, dependencies, and more.
|
|
16
|
+
This command runs a job in Studio using the specified query file. You can configure various aspects of the job including environment variables, Python version, dependencies, and more. When using --start-time or --cron, the job is scheduled as a task and will not show logs immediately. The job will be executed according to the schedule.
|
|
17
17
|
|
|
18
18
|
## Arguments
|
|
19
19
|
|
|
@@ -28,10 +28,12 @@ This command runs a job in Studio using the specified query file. You can config
|
|
|
28
28
|
* `--workers WORKERS` - Number of workers for the job
|
|
29
29
|
* `--files FILES` - Additional files to include in the job
|
|
30
30
|
* `--python-version PYTHON_VERSION` - Python version for the job (e.g., 3.9, 3.10, 3.11)
|
|
31
|
+
* `--repository REPOSITORY` - Repository URL to clone before running the job
|
|
31
32
|
* `--req-file REQ_FILE` - Python requirements file
|
|
32
33
|
* `--req REQ` - Python package requirements
|
|
33
34
|
* `--priority PRIORITY` - Priority for the job in range 0-5. Lower value is higher priority (default: 5)
|
|
34
|
-
* `--
|
|
35
|
+
* `--start-time START_TIME` - Start time in ISO format or natural language for the cron task.
|
|
36
|
+
* `--cron CRON` - Cron expression for the cron task.
|
|
35
37
|
* `-h`, `--help` - Show the help message and exit.
|
|
36
38
|
* `-v`, `--verbose` - Be verbose.
|
|
37
39
|
* `-q`, `--quiet` - Be quiet.
|
|
@@ -66,17 +68,11 @@ datachain job run --env API_KEY=123 --req pandas numpy query.py
|
|
|
66
68
|
6. Run a job with a repository (will be cloned in the job working directory):
|
|
67
69
|
```bash
|
|
68
70
|
datachain job run --repository https://github.com/iterative/datachain query.py
|
|
69
|
-
```
|
|
70
|
-
|
|
71
|
-
To specify a branch / revision:
|
|
72
71
|
|
|
73
|
-
|
|
72
|
+
# To specify a branch / revision:
|
|
74
73
|
datachain job run --repository https://github.com/iterative/datachain@main query.py
|
|
75
|
-
```
|
|
76
|
-
|
|
77
|
-
Git URLs are also supported:
|
|
78
74
|
|
|
79
|
-
|
|
75
|
+
# Git URLs are also supported:
|
|
80
76
|
datachain job run --repository git@github.com:iterative/datachain.git@main query.py
|
|
81
77
|
```
|
|
82
78
|
|
|
@@ -90,7 +86,43 @@ datachain job run --priority 2 query.py
|
|
|
90
86
|
# Get the cluster id using following command
|
|
91
87
|
datachain job clusters
|
|
92
88
|
# Use the id of an active clusters from above
|
|
93
|
-
datachain job run --cluster
|
|
89
|
+
datachain job run --cluster 1 query.py
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
9. Schedule a job to run once at a specific time
|
|
93
|
+
```bash
|
|
94
|
+
# Run job tomorrow at 3pm
|
|
95
|
+
datachain job run --start-time "tomorrow 3pm" query.py
|
|
96
|
+
|
|
97
|
+
# Run job in 2 hours
|
|
98
|
+
datachain job run --start-time "in 2 hours" query.py
|
|
99
|
+
|
|
100
|
+
# Run job on Monday at 9am
|
|
101
|
+
datachain job run --start-time "monday 9am" query.py
|
|
102
|
+
|
|
103
|
+
# Run job at a specific date and time
|
|
104
|
+
datachain job run --start-time "2024-01-15 14:30:00" query.py
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
10. Schedule a recurring job using cron expression
|
|
108
|
+
```bash
|
|
109
|
+
# Run job daily at midnight
|
|
110
|
+
datachain job run --cron "0 0 * * *" query.py
|
|
111
|
+
|
|
112
|
+
# Run job every Monday at 9am
|
|
113
|
+
datachain job run --cron "0 9 * * 1" query.py
|
|
114
|
+
|
|
115
|
+
# Run job every hour
|
|
116
|
+
datachain job run --cron "0 * * * *" query.py
|
|
117
|
+
|
|
118
|
+
# Run job every month
|
|
119
|
+
datachain job run --cron "@monthly" query.py
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
11. Schedule a recurring job with a start time
|
|
123
|
+
```bash
|
|
124
|
+
# Start the cron job after tomorrow 3pm
|
|
125
|
+
datachain job run --start-time "tomorrow 3pm" --cron "0 0 * * *" query.py
|
|
94
126
|
```
|
|
95
127
|
|
|
96
128
|
## Notes
|
|
@@ -99,3 +131,14 @@ datachain job run --cluster-id 1 query.py
|
|
|
99
131
|
* To cancel a running job, use the `datachain job cancel` command
|
|
100
132
|
* The job will continue running in Studio even after you stop viewing the logs
|
|
101
133
|
* You can get the list of compute clusters using `datachain job clusters` command.
|
|
134
|
+
* When using `--start-time` or `--cron` options, the job is scheduled as a task and will not show logs immediately. The job will be executed according to the schedule.
|
|
135
|
+
* The `--start-time` option supports natural language parsing using the dateparser library, allowing flexible time expressions like "tomorrow 3pm", "in 2 hours", "monday 9am", etc.
|
|
136
|
+
* Cron expressions follow the standard format: minute hour day-of-month month day-of-week (e.g., "0 0 * * *" for daily at midnight) or Vixie cron-style “@” keyword expressions.
|
|
137
|
+
* Following options for Vixie cron-style expressions are supported:
|
|
138
|
+
* @midnight
|
|
139
|
+
* @hourly
|
|
140
|
+
* @daily
|
|
141
|
+
* @weekly
|
|
142
|
+
* @monthly
|
|
143
|
+
* @yearly
|
|
144
|
+
* @annually
|
|
@@ -30,6 +30,7 @@ dependencies = [
|
|
|
30
30
|
"pyarrow",
|
|
31
31
|
"typing-extensions",
|
|
32
32
|
"python-dateutil>=2",
|
|
33
|
+
"dateparser>=1.0.0",
|
|
33
34
|
"attrs>=21.3.0",
|
|
34
35
|
"fsspec>=2024.2.0",
|
|
35
36
|
"s3fs>=2024.2.0",
|
|
@@ -116,6 +117,7 @@ dev = [
|
|
|
116
117
|
"datachain[docs,tests]",
|
|
117
118
|
"mypy==1.17.0",
|
|
118
119
|
"types-python-dateutil",
|
|
120
|
+
"types-dateparser",
|
|
119
121
|
"types-pytz",
|
|
120
122
|
"types-PyYAML",
|
|
121
123
|
"types-requests",
|
|
@@ -17,7 +17,12 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
|
|
|
17
17
|
)
|
|
18
18
|
|
|
19
19
|
studio_run_help = "Run a job in Studio"
|
|
20
|
-
studio_run_description = "Run a job in Studio."
|
|
20
|
+
studio_run_description = "Run a job in Studio. \n"
|
|
21
|
+
studio_run_description += (
|
|
22
|
+
"When using --start-time or --cron,"
|
|
23
|
+
" the job is scheduled as a task and will not show logs immediately."
|
|
24
|
+
" The job will be executed according to the schedule."
|
|
25
|
+
)
|
|
21
26
|
|
|
22
27
|
studio_run_parser = jobs_subparser.add_parser(
|
|
23
28
|
"run",
|
|
@@ -96,6 +101,14 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
|
|
|
96
101
|
help="Priority for the job in range 0-5. "
|
|
97
102
|
"Lower value is higher priority (default: 5)",
|
|
98
103
|
)
|
|
104
|
+
studio_run_parser.add_argument(
|
|
105
|
+
"--start-time",
|
|
106
|
+
action="store",
|
|
107
|
+
help="Start time in ISO format or natural language for the cron task.",
|
|
108
|
+
)
|
|
109
|
+
studio_run_parser.add_argument(
|
|
110
|
+
"--cron", action="store", help="Cron expression for the cron task."
|
|
111
|
+
)
|
|
99
112
|
|
|
100
113
|
studio_ls_help = "List jobs in Studio"
|
|
101
114
|
studio_ls_description = "List jobs in Studio."
|
|
@@ -2388,7 +2388,7 @@ class DataChain:
|
|
|
2388
2388
|
placement: FileExportPlacement = "fullpath",
|
|
2389
2389
|
link_type: Literal["copy", "symlink"] = "copy",
|
|
2390
2390
|
num_threads: Optional[int] = EXPORT_FILES_MAX_THREADS,
|
|
2391
|
-
anon: bool =
|
|
2391
|
+
anon: Optional[bool] = None,
|
|
2392
2392
|
client_config: Optional[dict] = None,
|
|
2393
2393
|
) -> None:
|
|
2394
2394
|
"""Export files from a specified signal to a directory. Files can be
|
|
@@ -2403,7 +2403,11 @@ class DataChain:
|
|
|
2403
2403
|
Falls back to `'copy'` if symlinking fails.
|
|
2404
2404
|
num_threads : number of threads to use for exporting files.
|
|
2405
2405
|
By default it uses 5 threads.
|
|
2406
|
-
anon: If
|
|
2406
|
+
anon: If True, we will treat cloud bucket as public one. Default behavior
|
|
2407
|
+
depends on the previous session configuration (e.g. happens in the
|
|
2408
|
+
initial `read_storage`) and particular cloud storage client
|
|
2409
|
+
implementation (e.g. S3 fallbacks to anonymous access if no credentials
|
|
2410
|
+
were found).
|
|
2407
2411
|
client_config: Optional configuration for the destination storage client
|
|
2408
2412
|
|
|
2409
2413
|
Example:
|
|
@@ -2421,8 +2425,8 @@ class DataChain:
|
|
|
2421
2425
|
):
|
|
2422
2426
|
raise ValueError("Files with the same name found")
|
|
2423
2427
|
|
|
2424
|
-
if anon:
|
|
2425
|
-
client_config = (client_config or {}) | {"anon":
|
|
2428
|
+
if anon is not None:
|
|
2429
|
+
client_config = (client_config or {}) | {"anon": anon}
|
|
2426
2430
|
|
|
2427
2431
|
progress_bar = tqdm(
|
|
2428
2432
|
desc=f"Exporting files to {output}: ",
|
|
@@ -33,7 +33,7 @@ def read_storage(
|
|
|
33
33
|
recursive: Optional[bool] = True,
|
|
34
34
|
column: str = "file",
|
|
35
35
|
update: bool = False,
|
|
36
|
-
anon: bool =
|
|
36
|
+
anon: Optional[bool] = None,
|
|
37
37
|
delta: Optional[bool] = False,
|
|
38
38
|
delta_on: Optional[Union[str, Sequence[str]]] = (
|
|
39
39
|
"file.path",
|
|
@@ -124,8 +124,8 @@ def read_storage(
|
|
|
124
124
|
|
|
125
125
|
file_type = get_file_type(type)
|
|
126
126
|
|
|
127
|
-
if anon:
|
|
128
|
-
client_config = (client_config or {}) | {"anon":
|
|
127
|
+
if anon is not None:
|
|
128
|
+
client_config = (client_config or {}) | {"anon": anon}
|
|
129
129
|
session = Session.get(session, client_config=client_config, in_memory=in_memory)
|
|
130
130
|
catalog = session.catalog
|
|
131
131
|
cache = catalog.cache
|
|
@@ -717,6 +717,23 @@ class ImageFile(File):
|
|
|
717
717
|
destination = stringify_path(destination)
|
|
718
718
|
|
|
719
719
|
client: Client = self._catalog.get_client(destination, **(client_config or {}))
|
|
720
|
+
|
|
721
|
+
# If format is not provided, determine it from the file extension
|
|
722
|
+
if format is None:
|
|
723
|
+
from pathlib import PurePosixPath
|
|
724
|
+
|
|
725
|
+
from PIL import Image as PilImage
|
|
726
|
+
|
|
727
|
+
ext = PurePosixPath(destination).suffix.lower()
|
|
728
|
+
format = PilImage.registered_extensions().get(ext)
|
|
729
|
+
|
|
730
|
+
if not format:
|
|
731
|
+
raise FileError(
|
|
732
|
+
f"Can't determine format for destination '{destination}'",
|
|
733
|
+
self.source,
|
|
734
|
+
self.path,
|
|
735
|
+
)
|
|
736
|
+
|
|
720
737
|
with client.fs.open(destination, mode="wb") as f:
|
|
721
738
|
self.read().save(f, format=format)
|
|
722
739
|
|
|
@@ -429,6 +429,8 @@ class StudioClient:
|
|
|
429
429
|
repository: Optional[str] = None,
|
|
430
430
|
priority: Optional[int] = None,
|
|
431
431
|
cluster: Optional[str] = None,
|
|
432
|
+
start_time: Optional[str] = None,
|
|
433
|
+
cron: Optional[str] = None,
|
|
432
434
|
) -> Response[JobData]:
|
|
433
435
|
data = {
|
|
434
436
|
"query": query,
|
|
@@ -442,6 +444,8 @@ class StudioClient:
|
|
|
442
444
|
"repository": repository,
|
|
443
445
|
"priority": priority,
|
|
444
446
|
"compute_cluster_name": cluster,
|
|
447
|
+
"start_after": start_time,
|
|
448
|
+
"cron_expression": cron,
|
|
445
449
|
}
|
|
446
450
|
return self._send_request("datachain/job", data)
|
|
447
451
|
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import os
|
|
3
3
|
import sys
|
|
4
|
+
from datetime import datetime, timezone
|
|
4
5
|
from typing import TYPE_CHECKING, Optional
|
|
5
6
|
|
|
7
|
+
import dateparser
|
|
6
8
|
import tabulate
|
|
7
9
|
|
|
8
10
|
from datachain.config import Config, ConfigLevel
|
|
@@ -42,6 +44,8 @@ def process_jobs_args(args: "Namespace"):
|
|
|
42
44
|
args.req_file,
|
|
43
45
|
args.priority,
|
|
44
46
|
args.cluster,
|
|
47
|
+
args.start_time,
|
|
48
|
+
args.cron,
|
|
45
49
|
)
|
|
46
50
|
|
|
47
51
|
if args.cmd == "cancel":
|
|
@@ -262,6 +266,31 @@ def save_config(hostname, token, level=ConfigLevel.GLOBAL):
|
|
|
262
266
|
return config.config_file()
|
|
263
267
|
|
|
264
268
|
|
|
269
|
+
def parse_start_time(start_time_str: Optional[str]) -> Optional[str]:
|
|
270
|
+
if not start_time_str:
|
|
271
|
+
return None
|
|
272
|
+
|
|
273
|
+
try:
|
|
274
|
+
# Parse the datetime string using dateparser
|
|
275
|
+
parsed_datetime = dateparser.parse(start_time_str)
|
|
276
|
+
|
|
277
|
+
if parsed_datetime is None:
|
|
278
|
+
raise DataChainError(
|
|
279
|
+
f"Could not parse datetime string: '{start_time_str}'. "
|
|
280
|
+
f"Supported formats include: '2024-01-15 14:30:00', 'tomorrow 3pm', "
|
|
281
|
+
f"'monday 9am', '2024-01-15T14:30:00Z', 'in 2 hours', etc."
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
# Convert to ISO format string
|
|
285
|
+
return parsed_datetime.isoformat()
|
|
286
|
+
except Exception as e:
|
|
287
|
+
raise DataChainError(
|
|
288
|
+
f"Invalid datetime format for start_time: '{start_time_str}'. "
|
|
289
|
+
f"Supported formats include: '2024-01-15 14:30:00', 'tomorrow 3pm', "
|
|
290
|
+
f"'monday 9am', '2024-01-15T14:30:00Z', 'in 2 hours', etc. Error: {e}"
|
|
291
|
+
) from e
|
|
292
|
+
|
|
293
|
+
|
|
265
294
|
def show_logs_from_client(client, job_id):
|
|
266
295
|
# Sync usage
|
|
267
296
|
async def _run():
|
|
@@ -310,6 +339,8 @@ def create_job(
|
|
|
310
339
|
req_file: Optional[str] = None,
|
|
311
340
|
priority: Optional[int] = None,
|
|
312
341
|
cluster: Optional[str] = None,
|
|
342
|
+
start_time: Optional[str] = None,
|
|
343
|
+
cron: Optional[str] = None,
|
|
313
344
|
):
|
|
314
345
|
query_type = "PYTHON" if query_file.endswith(".py") else "SHELL"
|
|
315
346
|
with open(query_file) as f:
|
|
@@ -328,6 +359,11 @@ def create_job(
|
|
|
328
359
|
client = StudioClient(team=team_name)
|
|
329
360
|
file_ids = upload_files(client, files) if files else []
|
|
330
361
|
|
|
362
|
+
# Parse start_time if provided
|
|
363
|
+
parsed_start_time = parse_start_time(start_time)
|
|
364
|
+
if cron and parsed_start_time is None:
|
|
365
|
+
parsed_start_time = datetime.now(timezone.utc).isoformat()
|
|
366
|
+
|
|
331
367
|
response = client.create_job(
|
|
332
368
|
query=query,
|
|
333
369
|
query_type=query_type,
|
|
@@ -340,6 +376,8 @@ def create_job(
|
|
|
340
376
|
requirements=requirements,
|
|
341
377
|
priority=priority,
|
|
342
378
|
cluster=cluster,
|
|
379
|
+
start_time=parsed_start_time,
|
|
380
|
+
cron=cron,
|
|
343
381
|
)
|
|
344
382
|
if not response.ok:
|
|
345
383
|
raise DataChainError(response.message)
|
|
@@ -348,6 +386,11 @@ def create_job(
|
|
|
348
386
|
raise DataChainError("Failed to create job")
|
|
349
387
|
|
|
350
388
|
job_id = response.data.get("job", {}).get("id")
|
|
389
|
+
|
|
390
|
+
if parsed_start_time or cron:
|
|
391
|
+
print(f"Job {job_id} is scheduled as a task in Studio.")
|
|
392
|
+
return 0
|
|
393
|
+
|
|
351
394
|
print(f"Job {job_id} created")
|
|
352
395
|
print("Open the job in Studio at", response.data.get("job", {}).get("url"))
|
|
353
396
|
print("=" * 40)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.26.
|
|
3
|
+
Version: 0.26.4
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -26,6 +26,7 @@ Requires-Dist: packaging
|
|
|
26
26
|
Requires-Dist: pyarrow
|
|
27
27
|
Requires-Dist: typing-extensions
|
|
28
28
|
Requires-Dist: python-dateutil>=2
|
|
29
|
+
Requires-Dist: dateparser>=1.0.0
|
|
29
30
|
Requires-Dist: attrs>=21.3.0
|
|
30
31
|
Requires-Dist: fsspec>=2024.2.0
|
|
31
32
|
Requires-Dist: s3fs>=2024.2.0
|
|
@@ -100,6 +101,7 @@ Provides-Extra: dev
|
|
|
100
101
|
Requires-Dist: datachain[docs,tests]; extra == "dev"
|
|
101
102
|
Requires-Dist: mypy==1.17.0; extra == "dev"
|
|
102
103
|
Requires-Dist: types-python-dateutil; extra == "dev"
|
|
104
|
+
Requires-Dist: types-dateparser; extra == "dev"
|
|
103
105
|
Requires-Dist: types-pytz; extra == "dev"
|
|
104
106
|
Requires-Dist: types-PyYAML; extra == "dev"
|
|
105
107
|
Requires-Dist: types-requests; extra == "dev"
|
|
@@ -305,6 +305,7 @@ tests/func/test_read_dataset_remote.py
|
|
|
305
305
|
tests/func/test_read_dataset_version_specifiers.py
|
|
306
306
|
tests/func/test_retry.py
|
|
307
307
|
tests/func/test_session.py
|
|
308
|
+
tests/func/test_studio_datetime_parsing.py
|
|
308
309
|
tests/func/test_toolkit.py
|
|
309
310
|
tests/func/test_video.py
|
|
310
311
|
tests/func/test_warehouse.py
|
|
@@ -7,6 +7,7 @@ packaging
|
|
|
7
7
|
pyarrow
|
|
8
8
|
typing-extensions
|
|
9
9
|
python-dateutil>=2
|
|
10
|
+
dateparser>=1.0.0
|
|
10
11
|
attrs>=21.3.0
|
|
11
12
|
fsspec>=2024.2.0
|
|
12
13
|
s3fs>=2024.2.0
|
|
@@ -43,6 +44,7 @@ soundfile
|
|
|
43
44
|
datachain[docs,tests]
|
|
44
45
|
mypy==1.17.0
|
|
45
46
|
types-python-dateutil
|
|
47
|
+
types-dateparser
|
|
46
48
|
types-pytz
|
|
47
49
|
types-PyYAML
|
|
48
50
|
types-requests
|
|
@@ -42,6 +42,34 @@ def test_image_save(tmp_path, image_file, format):
|
|
|
42
42
|
assert img.size == (256, 256)
|
|
43
43
|
|
|
44
44
|
|
|
45
|
+
def test_image_save_no_extension(tmp_path, image_file):
|
|
46
|
+
image_file = image_file.as_image_file()
|
|
47
|
+
filename = f"{tmp_path}/test"
|
|
48
|
+
with pytest.raises(FileError):
|
|
49
|
+
image_file.save(filename)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@pytest.mark.parametrize("format", [None, "JPEG", "PNG"])
|
|
53
|
+
def test_image_save_cloud(cloud_test_catalog_upload, image_file, format):
|
|
54
|
+
"""Test saving ImageFile to different cloud filesystems (S3, GCS, Azure)."""
|
|
55
|
+
ctc = cloud_test_catalog_upload
|
|
56
|
+
image_file = image_file.as_image_file()
|
|
57
|
+
image_file._set_stream(ctc.catalog)
|
|
58
|
+
|
|
59
|
+
# Save to cloud storage with the specified format
|
|
60
|
+
cloud_filename = f"{ctc.src_uri}/test_image.jpg"
|
|
61
|
+
image_file.save(cloud_filename, format=format, client_config=ctc.client_config)
|
|
62
|
+
|
|
63
|
+
# Verify the saved file by reading it back
|
|
64
|
+
saved_image_file = ImageFile(path="test_image.jpg", source=ctc.src_uri)
|
|
65
|
+
saved_image_file._set_stream(ctc.catalog)
|
|
66
|
+
|
|
67
|
+
# Read the saved image and verify it
|
|
68
|
+
saved_img = saved_image_file.read()
|
|
69
|
+
assert saved_img.format == (format or "JPEG")
|
|
70
|
+
assert saved_img.size == (256, 256)
|
|
71
|
+
|
|
72
|
+
|
|
45
73
|
def test_get_info(image_file):
|
|
46
74
|
info = image_file.as_image_file().get_info()
|
|
47
75
|
assert info.model_dump() == {"width": 256, "height": 256, "format": "JPEG"}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from datachain.error import DataChainError
|
|
6
|
+
from datachain.studio import parse_start_time
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_parse_start_time_none():
|
|
10
|
+
"""Test that None input returns None."""
|
|
11
|
+
assert parse_start_time(None) is None
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def test_parse_start_time_empty_string():
|
|
15
|
+
"""Test that empty string input returns None."""
|
|
16
|
+
assert parse_start_time("") is None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def test_parse_start_time_iso_format():
|
|
20
|
+
"""Test parsing ISO format datetime strings."""
|
|
21
|
+
# Test ISO format with timezone
|
|
22
|
+
result = parse_start_time("2024-01-15T14:30:00Z")
|
|
23
|
+
assert result == "2024-01-15T14:30:00+00:00"
|
|
24
|
+
|
|
25
|
+
# Test ISO format without timezone
|
|
26
|
+
result = parse_start_time("2024-01-15T14:30:00")
|
|
27
|
+
assert result.startswith("2024-01-15T14:30:00")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def test_parse_start_time_standard_format():
|
|
31
|
+
"""Test parsing standard datetime format."""
|
|
32
|
+
result = parse_start_time("2024-01-15 14:30:00")
|
|
33
|
+
assert result.startswith("2024-01-15T14:30:00")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def test_parse_start_time_natural_language():
|
|
37
|
+
"""Test parsing natural language datetime strings."""
|
|
38
|
+
# Test natural language formats that dateparser supports
|
|
39
|
+
result = parse_start_time("tomorrow 3pm")
|
|
40
|
+
assert result is not None
|
|
41
|
+
assert isinstance(result, str)
|
|
42
|
+
|
|
43
|
+
result = parse_start_time("monday 9am")
|
|
44
|
+
assert result is not None
|
|
45
|
+
assert isinstance(result, str)
|
|
46
|
+
|
|
47
|
+
result = parse_start_time("in 2 hours")
|
|
48
|
+
assert result is not None
|
|
49
|
+
assert isinstance(result, str)
|
|
50
|
+
|
|
51
|
+
result = parse_start_time("next week")
|
|
52
|
+
assert result is not None
|
|
53
|
+
assert isinstance(result, str)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def test_parse_start_time_various_formats():
|
|
57
|
+
"""Test parsing various datetime formats."""
|
|
58
|
+
test_cases = [
|
|
59
|
+
"2024-01-15 14:30:00",
|
|
60
|
+
"2024-01-15T14:30:00Z",
|
|
61
|
+
"2024-01-15T14:30:00+00:00",
|
|
62
|
+
"Jan 15, 2024 2:30 PM",
|
|
63
|
+
"15/01/2024 14:30",
|
|
64
|
+
"2024-01-15",
|
|
65
|
+
"tomorrow",
|
|
66
|
+
"next week",
|
|
67
|
+
"in 2 hours",
|
|
68
|
+
"monday 9am",
|
|
69
|
+
"tomorrow 3pm",
|
|
70
|
+
]
|
|
71
|
+
|
|
72
|
+
for test_case in test_cases:
|
|
73
|
+
result = parse_start_time(test_case)
|
|
74
|
+
assert result is not None
|
|
75
|
+
assert isinstance(result, str)
|
|
76
|
+
# Verify it's a valid ISO format
|
|
77
|
+
try:
|
|
78
|
+
datetime.fromisoformat(result.replace("Z", "+00:00"))
|
|
79
|
+
except ValueError:
|
|
80
|
+
pytest.fail(f"Failed to parse result '{result}' as ISO format")
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def test_parse_start_time_invalid_format():
|
|
84
|
+
"""Test that invalid datetime formats raise DataChainError."""
|
|
85
|
+
invalid_formats = [
|
|
86
|
+
"not a date",
|
|
87
|
+
"invalid datetime string",
|
|
88
|
+
"2024-13-45 25:70:99", # Invalid date/time values
|
|
89
|
+
]
|
|
90
|
+
|
|
91
|
+
for invalid_format in invalid_formats:
|
|
92
|
+
with pytest.raises(DataChainError) as exc_info:
|
|
93
|
+
parse_start_time(invalid_format)
|
|
94
|
+
|
|
95
|
+
assert "Invalid datetime format" in str(exc_info.value)
|
|
96
|
+
assert invalid_format in str(exc_info.value)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def test_parse_start_time_timezone_handling():
|
|
100
|
+
"""Test timezone handling in datetime parsing."""
|
|
101
|
+
# Test with explicit timezone
|
|
102
|
+
result = parse_start_time("2024-01-15 14:30:00 UTC")
|
|
103
|
+
assert result is not None
|
|
104
|
+
|
|
105
|
+
# Test with local timezone (should be preserved)
|
|
106
|
+
result = parse_start_time("2024-01-15 14:30:00")
|
|
107
|
+
assert result is not None
|
|
@@ -440,9 +440,56 @@ def test_studio_run(capsys, mocker, tmp_dir):
|
|
|
440
440
|
"repository": "https://github.com/iterative/datachain",
|
|
441
441
|
"priority": 5,
|
|
442
442
|
"compute_cluster_name": "default",
|
|
443
|
+
"start_after": None,
|
|
444
|
+
"cron_expression": None,
|
|
443
445
|
}
|
|
444
446
|
|
|
445
447
|
|
|
448
|
+
def test_studio_run_task(capsys, mocker, tmp_dir, studio_token):
|
|
449
|
+
mocker.patch(
|
|
450
|
+
"datachain.remote.studio.websockets.connect", side_effect=mocked_connect
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
with requests_mock.mock() as m:
|
|
454
|
+
m.post(
|
|
455
|
+
f"{STUDIO_URL}/api/datachain/job",
|
|
456
|
+
json={"job": {"id": 1, "url": "https://example.com"}},
|
|
457
|
+
)
|
|
458
|
+
m.get(
|
|
459
|
+
f"{STUDIO_URL}/api/datachain/datasets/dataset_job_versions?job_id=1&team_name=team_name",
|
|
460
|
+
json={
|
|
461
|
+
"dataset_versions": [
|
|
462
|
+
{"dataset_name": "dataset_name", "version": "1.0.0"}
|
|
463
|
+
]
|
|
464
|
+
},
|
|
465
|
+
)
|
|
466
|
+
(tmp_dir / "example_query.py").write_text("print(1)")
|
|
467
|
+
|
|
468
|
+
assert (
|
|
469
|
+
main(
|
|
470
|
+
[
|
|
471
|
+
"job",
|
|
472
|
+
"run",
|
|
473
|
+
"example_query.py",
|
|
474
|
+
"--start-time",
|
|
475
|
+
"tomorrow 3pm",
|
|
476
|
+
"--cron",
|
|
477
|
+
"0 0 * * *",
|
|
478
|
+
]
|
|
479
|
+
)
|
|
480
|
+
== 0
|
|
481
|
+
)
|
|
482
|
+
first_request = m.request_history[0]
|
|
483
|
+
assert first_request.method == "POST"
|
|
484
|
+
assert first_request.url == f"{STUDIO_URL}/api/datachain/job"
|
|
485
|
+
request_json = first_request.json()
|
|
486
|
+
assert request_json["start_after"] is not None
|
|
487
|
+
assert request_json["cron_expression"] is not None
|
|
488
|
+
|
|
489
|
+
assert request_json["start_after"] is not None
|
|
490
|
+
assert request_json["cron_expression"] == "0 0 * * *"
|
|
491
|
+
|
|
492
|
+
|
|
446
493
|
@pytest.mark.parametrize(
|
|
447
494
|
"status,expected_exit_code", [("FAILED", 1), ("CANCELLED", 2), ("COMPLETED", 0)]
|
|
448
495
|
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|