datachain 0.16.1__tar.gz → 0.16.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.16.1 → datachain-0.16.3}/.pre-commit-config.yaml +1 -1
- {datachain-0.16.1/src/datachain.egg-info → datachain-0.16.3}/PKG-INFO +2 -2
- datachain-0.16.3/docs/commands/auth/login.md +51 -0
- datachain-0.16.3/docs/commands/auth/logout.md +37 -0
- datachain-0.16.3/docs/commands/auth/team.md +36 -0
- datachain-0.16.3/docs/commands/auth/token.md +26 -0
- datachain-0.16.3/docs/commands/index.md +33 -0
- datachain-0.16.3/docs/commands/job/cancel.md +43 -0
- datachain-0.16.3/docs/commands/job/logs.md +47 -0
- datachain-0.16.3/docs/commands/job/run.md +67 -0
- {datachain-0.16.1 → datachain-0.16.3}/mkdocs.yml +12 -0
- {datachain-0.16.1 → datachain-0.16.3}/pyproject.toml +1 -1
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/func/array.py +56 -1
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/func/func.py +32 -1
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/dc/database.py +5 -3
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/dc/records.py +7 -2
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/sql/functions/array.py +11 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/sql/sqlite/base.py +22 -0
- {datachain-0.16.1 → datachain-0.16.3/src/datachain.egg-info}/PKG-INFO +2 -2
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain.egg-info/SOURCES.txt +9 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain.egg-info/requires.txt +1 -1
- datachain-0.16.3/tests/func/test_func.py +124 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/lib/test_datachain.py +0 -4
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/sql/test_array.py +35 -0
- {datachain-0.16.1 → datachain-0.16.3}/.cruft.json +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/.gitattributes +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/.github/codecov.yaml +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/.github/dependabot.yml +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/.github/workflows/release.yml +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/.github/workflows/tests.yml +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/.gitignore +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/LICENSE +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/README.rst +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/docs/assets/datachain.svg +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/docs/contributing.md +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/docs/examples.md +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/docs/index.md +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/docs/overrides/main.html +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/docs/quick-start.md +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/docs/references/data-types/file.md +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/docs/references/data-types/index.md +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/docs/references/data-types/pose.md +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/docs/references/data-types/segment.md +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/docs/references/datachain.md +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/docs/references/func.md +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/docs/references/index.md +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/docs/references/remotes.md +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/docs/references/toolkit.md +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/docs/references/torch.md +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/docs/references/udf.md +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/docs/tutorials.md +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/examples/multimodal/wds.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/noxfile.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/setup.cfg +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/__init__.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/__main__.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/asyn.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/cache.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/catalog/catalog.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/cli/utils.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/client/__init__.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/client/azure.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/client/gcs.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/client/hf.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/client/local.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/client/s3.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/config.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/dataset.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/error.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/fs/reference.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/fs/utils.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/func/__init__.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/func/base.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/func/conditional.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/func/numeric.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/func/path.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/func/random.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/func/string.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/func/window.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/job.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/clip.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/dc/__init__.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/dc/datachain.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/dc/datasets.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/dc/storage.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/file.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/hf.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/image.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/listing.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/settings.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/tar.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/text.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/udf.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/utils.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/video.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/listing.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/model/__init__.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/model/bbox.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/model/pose.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/model/segment.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/model/utils.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/node.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/progress.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/py.typed +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/query/__init__.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/query/batch.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/query/dataset.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/query/metrics.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/query/params.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/query/queue.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/query/schema.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/query/session.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/query/udf.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/query/utils.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/remote/studio.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/script_meta.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/sql/types.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/sql/utils.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/studio.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/telemetry.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain/utils.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/__init__.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/conftest.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/data.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/examples/__init__.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/examples/test_examples.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/examples/wds_data.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/func/__init__.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/func/data/lena.jpg +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/func/model/__init__.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/func/test_catalog.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/func/test_client.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/func/test_data_storage.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/func/test_datachain.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/func/test_datasets.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/func/test_file.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/func/test_hf.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/func/test_image.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/func/test_listing.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/func/test_ls.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/func/test_metrics.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/func/test_pull.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/func/test_pytorch.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/func/test_query.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/func/test_read_database.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/func/test_session.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/func/test_toolkit.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/func/test_video.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/func/test_warehouse.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/scripts/feature_class.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/test_atomicity.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/test_cli_e2e.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/test_cli_studio.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/test_import_time.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/test_query_e2e.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/test_telemetry.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/__init__.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/model/__init__.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/test_asyn.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/test_cache.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/test_catalog.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/test_client.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/test_config.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/test_dataset.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/test_func.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/test_listing.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/test_metastore.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/test_query.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/test_query_params.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/test_serializer.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/test_session.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/test_utils.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.16.1 → datachain-0.16.3}/tests/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.16.
|
|
3
|
+
Version: 0.16.3
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -23,7 +23,7 @@ Requires-Dist: tqdm
|
|
|
23
23
|
Requires-Dist: numpy<3,>=1
|
|
24
24
|
Requires-Dist: pandas>=2.0.0
|
|
25
25
|
Requires-Dist: packaging
|
|
26
|
-
Requires-Dist: pyarrow
|
|
26
|
+
Requires-Dist: pyarrow<20
|
|
27
27
|
Requires-Dist: typing-extensions
|
|
28
28
|
Requires-Dist: python-dateutil>=2
|
|
29
29
|
Requires-Dist: attrs>=21.3.0
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# auth login
|
|
2
|
+
|
|
3
|
+
Authenticate DataChain with Studio to save a client access token to DataChain configuration.
|
|
4
|
+
|
|
5
|
+
## Synopsis
|
|
6
|
+
|
|
7
|
+
```usage
|
|
8
|
+
usage: datachain auth login [-h] [-v] [-q] [-H HOSTNAME] [-s SCOPES] [-n NAME] [--no-open] [--local]
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Description
|
|
12
|
+
|
|
13
|
+
By default, this command authenticates DataChain with Studio using default scopes and assigns a random name as the token name. The authentication token will be used for subsequent Studio operations.
|
|
14
|
+
|
|
15
|
+
## Options
|
|
16
|
+
|
|
17
|
+
* `-H HOSTNAME`, `--hostname HOSTNAME` - The hostname of the Studio instance to authenticate with.
|
|
18
|
+
* `-s SCOPES`, `--scopes SCOPES` - Authentication token scopes. Allowed scopes: `EXPERIMENTS`, `DATASETS`, `MODELS`. Defaults to all available scopes.
|
|
19
|
+
* `-n NAME`, `--name NAME` - The name of the authentication token. It will be used to identify the token shown in Studio profile. Defaults to a random name.
|
|
20
|
+
* `--no-open` - Use code-based authentication without browser. You will be presented with a user code to enter in the browser. DataChain will also use this if it cannot launch the browser on your behalf.
|
|
21
|
+
* `--local` - Save the token in the local project config instead of the global configuration.
|
|
22
|
+
* `-h`, `--help` - Show the help message and exit.
|
|
23
|
+
* `-v`, `--verbose` - Be verbose.
|
|
24
|
+
* `-q`, `--quiet` - Be quiet.
|
|
25
|
+
|
|
26
|
+
## Examples
|
|
27
|
+
|
|
28
|
+
1. Basic authentication with default settings:
|
|
29
|
+
```bash
|
|
30
|
+
datachain auth login
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
2. Authenticate with specific scopes:
|
|
34
|
+
```bash
|
|
35
|
+
datachain auth login --scopes EXPERIMENTS,DATASETS
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
3. Authenticate with a custom token name:
|
|
39
|
+
```bash
|
|
40
|
+
datachain auth login --name my-token
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
4. Authenticate using code-based flow:
|
|
44
|
+
```bash
|
|
45
|
+
datachain auth login --no-open
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
5. Save token locally for the project:
|
|
49
|
+
```bash
|
|
50
|
+
datachain auth login --local
|
|
51
|
+
```
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# auth logout
|
|
2
|
+
|
|
3
|
+
Remove the Studio authentication token from DataChain configuration.
|
|
4
|
+
|
|
5
|
+
## Synopsis
|
|
6
|
+
|
|
7
|
+
```usage
|
|
8
|
+
usage: datachain auth logout [-h] [-v] [-q] [--local]
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Description
|
|
12
|
+
|
|
13
|
+
This command removes the Studio authentication token from the global DataChain configuration. By default, it removes the token from the global configuration, but you can also remove it from the local project configuration using the `--local` option.
|
|
14
|
+
|
|
15
|
+
## Options
|
|
16
|
+
|
|
17
|
+
* `--local` - Remove the token from the local project config instead of the global configuration.
|
|
18
|
+
* `-h`, `--help` - Show the help message and exit.
|
|
19
|
+
* `-v`, `--verbose` - Be verbose.
|
|
20
|
+
* `-q`, `--quiet` - Be quiet.
|
|
21
|
+
|
|
22
|
+
## Examples
|
|
23
|
+
|
|
24
|
+
1. Remove token from global configuration:
|
|
25
|
+
```bash
|
|
26
|
+
datachain auth logout
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
2. Remove token from local project configuration:
|
|
30
|
+
```bash
|
|
31
|
+
datachain auth logout --local
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
3. Remove token with verbose output:
|
|
35
|
+
```bash
|
|
36
|
+
datachain auth logout -v
|
|
37
|
+
```
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# auth team
|
|
2
|
+
|
|
3
|
+
Set the default team for Studio operations.
|
|
4
|
+
|
|
5
|
+
## Synopsis
|
|
6
|
+
|
|
7
|
+
```usage
|
|
8
|
+
usage: datachain auth team [-h] [-v] [-q] [--global] team_name
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Description
|
|
12
|
+
|
|
13
|
+
This command sets the default team for Studio operations. By default, the team setting is project-specific, but you can use the `--global` option to set it for all projects.
|
|
14
|
+
|
|
15
|
+
## Arguments
|
|
16
|
+
|
|
17
|
+
* `team_name` - Name of the team to set as default
|
|
18
|
+
|
|
19
|
+
## Options
|
|
20
|
+
|
|
21
|
+
* `--global` - Set team globally for all projects
|
|
22
|
+
* `-h`, `--help` - Show the help message and exit.
|
|
23
|
+
* `-v`, `--verbose` - Be verbose.
|
|
24
|
+
* `-q`, `--quiet` - Be quiet.
|
|
25
|
+
|
|
26
|
+
## Examples
|
|
27
|
+
|
|
28
|
+
1. Set default team for current project:
|
|
29
|
+
```bash
|
|
30
|
+
datachain auth team my-team
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
2. Set default team globally for all projects:
|
|
34
|
+
```bash
|
|
35
|
+
datachain auth team --global my-team
|
|
36
|
+
```
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# auth token
|
|
2
|
+
|
|
3
|
+
Display the current authentication token for Studio.
|
|
4
|
+
|
|
5
|
+
## Synopsis
|
|
6
|
+
|
|
7
|
+
```usage
|
|
8
|
+
usage: datachain auth token [-h] [-v] [-q]
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Description
|
|
12
|
+
|
|
13
|
+
This command displays the current authentication token that DataChain is using for Studio operations. The token is used for authenticating with Studio and accessing its features.
|
|
14
|
+
|
|
15
|
+
## Options
|
|
16
|
+
|
|
17
|
+
* `-h`, `--help` - Show the help message and exit.
|
|
18
|
+
* `-v`, `--verbose` - Be verbose.
|
|
19
|
+
* `-q`, `--quiet` - Be quiet.
|
|
20
|
+
|
|
21
|
+
## Examples
|
|
22
|
+
|
|
23
|
+
1. Display the current token:
|
|
24
|
+
```bash
|
|
25
|
+
datachain auth token
|
|
26
|
+
```
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
|
|
2
|
+
# Using DataChain Commands
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
DataChain is a command-line tool for wrangling unstructured AI data at scale. Use `datachain -h` to list all available commands.
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
## Typical DataChain Workflow
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
1. **Authentication with Studio**
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
- Use [`datachain auth login`](auth/login.md) to authenticate with Studio
|
|
18
|
+
|
|
19
|
+
- Set your default team with [`datachain auth team`](auth/team.md)
|
|
20
|
+
|
|
21
|
+
- View your token with [`datachain auth token`](auth/token.md)
|
|
22
|
+
|
|
23
|
+
- Log out from Studio with [`datachain auth logout`](auth/logout.md)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
2. **Job Management**
|
|
28
|
+
|
|
29
|
+
- Run jobs in Studio with [`datachain job run`](job/run.md)
|
|
30
|
+
|
|
31
|
+
- Monitor job logs with [`datachain job logs`](job/logs.md)
|
|
32
|
+
|
|
33
|
+
- Cancel running jobs with [`datachain job cancel`](job/cancel.md)
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# job cancel
|
|
2
|
+
|
|
3
|
+
Cancel a running job in Studio.
|
|
4
|
+
|
|
5
|
+
## Synopsis
|
|
6
|
+
|
|
7
|
+
```usage
|
|
8
|
+
usage: datachain job cancel [-h] [-v] [-q] [--team TEAM] id
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Description
|
|
12
|
+
|
|
13
|
+
This command cancels a running job in Studio. The job ID can be obtained from the Studio UI or from the output when the job was created. This is the recommended way to stop a running job, as simply closing the logs view (e.g., with Ctrl+C) will not cancel the job execution.
|
|
14
|
+
|
|
15
|
+
## Arguments
|
|
16
|
+
|
|
17
|
+
* `id` - Job ID to cancel. This ID is displayed when the job is created and can also be found in the Studio UI.
|
|
18
|
+
|
|
19
|
+
## Options
|
|
20
|
+
|
|
21
|
+
* `--team TEAM` - Team to cancel job for (default: from config)
|
|
22
|
+
* `-h`, `--help` - Show the help message and exit.
|
|
23
|
+
* `-v`, `--verbose` - Be verbose.
|
|
24
|
+
* `-q`, `--quiet` - Be quiet.
|
|
25
|
+
|
|
26
|
+
## Examples
|
|
27
|
+
|
|
28
|
+
1. Cancel a specific job:
|
|
29
|
+
```bash
|
|
30
|
+
datachain job cancel job-123
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
2. Cancel a job in a specific team:
|
|
34
|
+
```bash
|
|
35
|
+
datachain job cancel --team my-team job-123
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
## Notes
|
|
40
|
+
|
|
41
|
+
* The job ID is displayed when the job is created using `datachain job run`
|
|
42
|
+
* You can also find the job ID in the Studio UI
|
|
43
|
+
* This is the proper way to stop a running job, as simply closing the logs view will not cancel the job execution
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# job logs
|
|
2
|
+
|
|
3
|
+
Display logs and current status of jobs in Studio.
|
|
4
|
+
|
|
5
|
+
## Synopsis
|
|
6
|
+
|
|
7
|
+
```usage
|
|
8
|
+
usage: datachain job logs [-h] [-v] [-q] [--team TEAM] id
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Description
|
|
12
|
+
|
|
13
|
+
This command displays the logs and current status of a running job in Studio. The command will show real-time logs from the job execution. Note that closing this command (e.g., with Ctrl+C) will only stop displaying the logs but will not cancel the job execution. To cancel a job, use the `job cancel` command.
|
|
14
|
+
|
|
15
|
+
## Arguments
|
|
16
|
+
|
|
17
|
+
* `id` - Job ID to show logs for
|
|
18
|
+
|
|
19
|
+
## Options
|
|
20
|
+
|
|
21
|
+
* `--team TEAM` - Team to check logs for (default: from config)
|
|
22
|
+
* `-h`, `--help` - Show the help message and exit.
|
|
23
|
+
* `-v`, `--verbose` - Be verbose.
|
|
24
|
+
* `-q`, `--quiet` - Be quiet.
|
|
25
|
+
|
|
26
|
+
## Examples
|
|
27
|
+
|
|
28
|
+
1. Display logs for a specific job:
|
|
29
|
+
```bash
|
|
30
|
+
datachain job logs job-123
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
2. Display logs for a job in a specific team:
|
|
34
|
+
```bash
|
|
35
|
+
datachain job logs --team my-team job-123
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
3. Display logs with verbose output:
|
|
39
|
+
```bash
|
|
40
|
+
datachain job logs -v job-123
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Notes
|
|
44
|
+
|
|
45
|
+
* Closing the logs command (e.g., with Ctrl+C) will only stop displaying the logs but will not cancel the job execution
|
|
46
|
+
* To cancel a running job, use the `datachain job cancel` command
|
|
47
|
+
* The job will continue running in Studio even after you stop viewing the logs
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# job run
|
|
2
|
+
|
|
3
|
+
Run a job in Studio.
|
|
4
|
+
|
|
5
|
+
## Synopsis
|
|
6
|
+
|
|
7
|
+
```usage
|
|
8
|
+
usage: datachain job run [-h] [-v] [-q] [--team TEAM] [--env-file ENV_FILE] [--env ENV [ENV ...]]
|
|
9
|
+
[--workers WORKERS] [--files FILES [FILES ...]] [--python-version PYTHON_VERSION]
|
|
10
|
+
[--req-file REQ_FILE] [--req REQ [REQ ...]]
|
|
11
|
+
file
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## Description
|
|
15
|
+
|
|
16
|
+
This command runs a job in Studio using the specified query file. You can configure various aspects of the job including environment variables, Python version, dependencies, and more.
|
|
17
|
+
|
|
18
|
+
## Arguments
|
|
19
|
+
|
|
20
|
+
* `file` - Query file to run
|
|
21
|
+
|
|
22
|
+
## Options
|
|
23
|
+
|
|
24
|
+
* `--team TEAM` - Team to run job for (default: from config)
|
|
25
|
+
* `--env-file ENV_FILE` - File with environment variables for the job
|
|
26
|
+
* `--env ENV` - Environment variables in KEY=VALUE format
|
|
27
|
+
* `--workers WORKERS` - Number of workers for the job
|
|
28
|
+
* `--files FILES` - Additional files to include in the job
|
|
29
|
+
* `--python-version PYTHON_VERSION` - Python version for the job (e.g., 3.9, 3.10, 3.11)
|
|
30
|
+
* `--req-file REQ_FILE` - Python requirements file
|
|
31
|
+
* `--req REQ` - Python package requirements
|
|
32
|
+
* `-h`, `--help` - Show the help message and exit.
|
|
33
|
+
* `-v`, `--verbose` - Be verbose.
|
|
34
|
+
* `-q`, `--quiet` - Be quiet.
|
|
35
|
+
|
|
36
|
+
## Examples
|
|
37
|
+
|
|
38
|
+
1. Run a basic job:
|
|
39
|
+
```bash
|
|
40
|
+
datachain job run query.py
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
2. Run a job with specific team and Python version:
|
|
44
|
+
```bash
|
|
45
|
+
datachain job run --team my-team --python-version 3.11 query.py
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
3. Run a job with environment variables and requirements:
|
|
49
|
+
```bash
|
|
50
|
+
datachain job run --env-file .env --req-file requirements.txt query.py
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
4. Run a job with multiple workers and additional files:
|
|
54
|
+
```bash
|
|
55
|
+
datachain job run --workers 4 --files utils.py config.json query.py
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
5. Run a job with inline environment variables and package requirements:
|
|
59
|
+
```bash
|
|
60
|
+
datachain job run --env API_KEY=123 --req pandas numpy query.py
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## Notes
|
|
64
|
+
|
|
65
|
+
* Closing the logs command (e.g., with Ctrl+C) will only stop displaying the logs but will not cancel the job execution
|
|
66
|
+
* To cancel a running job, use the `datachain job cancel` command
|
|
67
|
+
* The job will continue running in Studio even after you stop viewing the logs
|
|
@@ -84,6 +84,18 @@ nav:
|
|
|
84
84
|
- Torch: references/torch.md
|
|
85
85
|
- Functions: references/func.md
|
|
86
86
|
- Toolkit: references/toolkit.md
|
|
87
|
+
- 📖 CLI Reference:
|
|
88
|
+
- Overview: commands/index.md
|
|
89
|
+
- Commands:
|
|
90
|
+
- auth:
|
|
91
|
+
- login: commands/auth/login.md
|
|
92
|
+
- logout: commands/auth/logout.md
|
|
93
|
+
- token: commands/auth/token.md
|
|
94
|
+
- team: commands/auth/team.md
|
|
95
|
+
- job:
|
|
96
|
+
- run: commands/job/run.md
|
|
97
|
+
- logs: commands/job/logs.md
|
|
98
|
+
- cancel: commands/job/cancel.md
|
|
87
99
|
- 📡 Interacting with remote storage: references/remotes.md
|
|
88
100
|
- 🤝 Contributing: contributing.md
|
|
89
101
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from collections.abc import Sequence
|
|
2
|
-
from typing import Any, Union
|
|
2
|
+
from typing import Any, Optional, Union
|
|
3
3
|
|
|
4
4
|
from datachain.sql.functions import array
|
|
5
5
|
|
|
@@ -178,6 +178,61 @@ def contains(arr: Union[str, Sequence, Func], elem: Any) -> Func:
|
|
|
178
178
|
return Func("contains", inner=inner, cols=cols, args=args, result_type=int)
|
|
179
179
|
|
|
180
180
|
|
|
181
|
+
def get_element(arg: Union[str, Sequence, Func], index: int) -> Func:
|
|
182
|
+
"""
|
|
183
|
+
Returns the element at the given index from the array.
|
|
184
|
+
If the index is out of bounds, it returns None or columns default value.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
arg (str | Sequence | Func): Array to get the element from.
|
|
188
|
+
If a string is provided, it is assumed to be the name of the array column.
|
|
189
|
+
If a sequence is provided, it is assumed to be an array of values.
|
|
190
|
+
If a Func is provided, it is assumed to be a function returning an array.
|
|
191
|
+
index (int): Index of the element to get from the array.
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
Func: A Func object that represents the array get_element function.
|
|
195
|
+
|
|
196
|
+
Example:
|
|
197
|
+
```py
|
|
198
|
+
dc.mutate(
|
|
199
|
+
first_el=func.array.get_element("signal.values", 0),
|
|
200
|
+
second_el=func.array.get_element([1, 2, 3, 4, 5], 1),
|
|
201
|
+
)
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
Note:
|
|
205
|
+
- Result column will always be the same type as the elements of the array.
|
|
206
|
+
"""
|
|
207
|
+
|
|
208
|
+
def type_from_args(arr, _):
|
|
209
|
+
if isinstance(arr, list):
|
|
210
|
+
try:
|
|
211
|
+
return type(arr[0])
|
|
212
|
+
except IndexError:
|
|
213
|
+
return str # if the array is empty, return str as default type
|
|
214
|
+
return None
|
|
215
|
+
|
|
216
|
+
cols: Optional[Union[str, Sequence, Func]]
|
|
217
|
+
args: Union[str, Sequence, Func, int]
|
|
218
|
+
|
|
219
|
+
if isinstance(arg, (str, Func)):
|
|
220
|
+
cols = [arg]
|
|
221
|
+
args = [index]
|
|
222
|
+
else:
|
|
223
|
+
cols = None
|
|
224
|
+
args = [arg, index]
|
|
225
|
+
|
|
226
|
+
return Func(
|
|
227
|
+
"get_element",
|
|
228
|
+
inner=array.get_element,
|
|
229
|
+
cols=cols,
|
|
230
|
+
args=args,
|
|
231
|
+
from_array=True,
|
|
232
|
+
type_from_args=type_from_args,
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
|
|
181
236
|
def sip_hash_64(arg: Union[str, Sequence]) -> Func:
|
|
182
237
|
"""
|
|
183
238
|
Computes the SipHash-64 hash of the array.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import inspect
|
|
2
2
|
from collections.abc import Sequence
|
|
3
|
-
from typing import TYPE_CHECKING, Any, Callable, Optional, Union
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Callable, Optional, Union, get_args, get_origin
|
|
4
4
|
|
|
5
5
|
from sqlalchemy import BindParameter, Case, ColumnElement, Integer, cast, desc
|
|
6
6
|
from sqlalchemy.sql import func as sa_func
|
|
@@ -36,7 +36,9 @@ class Func(Function):
|
|
|
36
36
|
args: Optional[Sequence[Any]] = None,
|
|
37
37
|
kwargs: Optional[dict[str, Any]] = None,
|
|
38
38
|
result_type: Optional["DataType"] = None,
|
|
39
|
+
type_from_args: Optional[Callable[..., "DataType"]] = None,
|
|
39
40
|
is_array: bool = False,
|
|
41
|
+
from_array: bool = False,
|
|
40
42
|
is_window: bool = False,
|
|
41
43
|
window: Optional["Window"] = None,
|
|
42
44
|
label: Optional[str] = None,
|
|
@@ -47,7 +49,9 @@ class Func(Function):
|
|
|
47
49
|
self.args = args or []
|
|
48
50
|
self.kwargs = kwargs or {}
|
|
49
51
|
self.result_type = result_type
|
|
52
|
+
self.type_from_args = type_from_args
|
|
50
53
|
self.is_array = is_array
|
|
54
|
+
self.from_array = from_array
|
|
51
55
|
self.is_window = is_window
|
|
52
56
|
self.window = window
|
|
53
57
|
self.col_label = label
|
|
@@ -66,7 +70,9 @@ class Func(Function):
|
|
|
66
70
|
self.args,
|
|
67
71
|
self.kwargs,
|
|
68
72
|
self.result_type,
|
|
73
|
+
self.type_from_args,
|
|
69
74
|
self.is_array,
|
|
75
|
+
self.from_array,
|
|
70
76
|
self.is_window,
|
|
71
77
|
window,
|
|
72
78
|
self.col_label,
|
|
@@ -101,6 +107,20 @@ class Func(Function):
|
|
|
101
107
|
"Columns must have the same type to infer result type",
|
|
102
108
|
)
|
|
103
109
|
|
|
110
|
+
if self.from_array:
|
|
111
|
+
if get_origin(col_type) is list:
|
|
112
|
+
col_args = get_args(col_type)
|
|
113
|
+
if len(col_args) != 1:
|
|
114
|
+
raise DataChainColumnError(
|
|
115
|
+
str(self),
|
|
116
|
+
"Array column must have a single type argument",
|
|
117
|
+
)
|
|
118
|
+
return col_args[0]
|
|
119
|
+
raise DataChainColumnError(
|
|
120
|
+
str(self),
|
|
121
|
+
"Array column must be of type list",
|
|
122
|
+
)
|
|
123
|
+
|
|
104
124
|
return list[col_type] if self.is_array else col_type # type: ignore[valid-type]
|
|
105
125
|
|
|
106
126
|
def __add__(self, other: Union[ColT, float]) -> "Func":
|
|
@@ -339,7 +359,9 @@ class Func(Function):
|
|
|
339
359
|
self.args,
|
|
340
360
|
self.kwargs,
|
|
341
361
|
self.result_type,
|
|
362
|
+
self.type_from_args,
|
|
342
363
|
self.is_array,
|
|
364
|
+
self.from_array,
|
|
343
365
|
self.is_window,
|
|
344
366
|
self.window,
|
|
345
367
|
label,
|
|
@@ -368,6 +390,15 @@ class Func(Function):
|
|
|
368
390
|
if signals_schema and (col_type := self._db_col_type(signals_schema)):
|
|
369
391
|
return col_type
|
|
370
392
|
|
|
393
|
+
if (
|
|
394
|
+
self.type_from_args
|
|
395
|
+
and (self.cols is None or self.cols == [])
|
|
396
|
+
and self.args is not None
|
|
397
|
+
and len(self.args) > 0
|
|
398
|
+
and (result_type := self.type_from_args(*self.args)) is not None
|
|
399
|
+
):
|
|
400
|
+
return result_type
|
|
401
|
+
|
|
371
402
|
raise DataChainColumnError(
|
|
372
403
|
str(self),
|
|
373
404
|
"Column name is required to infer result type",
|
|
@@ -127,9 +127,11 @@ def read_database(
|
|
|
127
127
|
```
|
|
128
128
|
|
|
129
129
|
Notes:
|
|
130
|
-
This function works with a variety of databases — including,
|
|
131
|
-
SQLite, DuckDB, PostgreSQL, and Snowflake,
|
|
132
|
-
installed.
|
|
130
|
+
- This function works with a variety of databases — including,
|
|
131
|
+
but not limited to, SQLite, DuckDB, PostgreSQL, and Snowflake,
|
|
132
|
+
provided the appropriate driver is installed.
|
|
133
|
+
- This call is blocking, and will execute the query and return once the
|
|
134
|
+
results are saved.
|
|
133
135
|
"""
|
|
134
136
|
from datachain.lib.dc.records import read_records
|
|
135
137
|
|
|
@@ -37,9 +37,13 @@ def read_records(
|
|
|
37
37
|
import datachain as dc
|
|
38
38
|
single_record = dc.read_records(dc.DEFAULT_FILE_RECORD)
|
|
39
39
|
```
|
|
40
|
+
|
|
41
|
+
Notes:
|
|
42
|
+
This call blocks until all records are inserted.
|
|
40
43
|
"""
|
|
41
|
-
from datachain.query.dataset import adjust_outputs, get_col_types
|
|
44
|
+
from datachain.query.dataset import INSERT_BATCH_SIZE, adjust_outputs, get_col_types
|
|
42
45
|
from datachain.sql.types import SQLType
|
|
46
|
+
from datachain.utils import batched
|
|
43
47
|
|
|
44
48
|
from .datasets import read_dataset
|
|
45
49
|
|
|
@@ -89,6 +93,7 @@ def read_records(
|
|
|
89
93
|
{c.name: c.type for c in columns if isinstance(c.type, SQLType)},
|
|
90
94
|
)
|
|
91
95
|
records = (adjust_outputs(warehouse, record, col_types) for record in to_insert)
|
|
92
|
-
|
|
96
|
+
for chunk in batched(records, INSERT_BATCH_SIZE):
|
|
97
|
+
warehouse.insert_rows(table, chunk)
|
|
93
98
|
warehouse.insert_rows_done(table)
|
|
94
99
|
return read_dataset(name=dsr.name, session=session, settings=settings)
|
|
@@ -48,6 +48,16 @@ class contains(GenericFunction): # noqa: N801
|
|
|
48
48
|
inherit_cache = True
|
|
49
49
|
|
|
50
50
|
|
|
51
|
+
class get_element(GenericFunction): # noqa: N801
|
|
52
|
+
"""
|
|
53
|
+
Returns the element at the given index in the array.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
package = "array"
|
|
57
|
+
name = "get_element"
|
|
58
|
+
inherit_cache = True
|
|
59
|
+
|
|
60
|
+
|
|
51
61
|
class sip_hash_64(GenericFunction): # noqa: N801
|
|
52
62
|
"""
|
|
53
63
|
Computes the SipHash-64 hash of the array.
|
|
@@ -63,4 +73,5 @@ compiler_not_implemented(cosine_distance)
|
|
|
63
73
|
compiler_not_implemented(euclidean_distance)
|
|
64
74
|
compiler_not_implemented(length)
|
|
65
75
|
compiler_not_implemented(contains)
|
|
76
|
+
compiler_not_implemented(get_element)
|
|
66
77
|
compiler_not_implemented(sip_hash_64)
|
|
@@ -88,6 +88,7 @@ def setup():
|
|
|
88
88
|
compiles(sql_path.file_ext, "sqlite")(compile_path_file_ext)
|
|
89
89
|
compiles(array.length, "sqlite")(compile_array_length)
|
|
90
90
|
compiles(array.contains, "sqlite")(compile_array_contains)
|
|
91
|
+
compiles(array.get_element, "sqlite")(compile_array_get_element)
|
|
91
92
|
compiles(string.length, "sqlite")(compile_string_length)
|
|
92
93
|
compiles(string.split, "sqlite")(compile_string_split)
|
|
93
94
|
compiles(string.regexp_replace, "sqlite")(compile_string_regexp_replace)
|
|
@@ -270,6 +271,13 @@ def register_user_defined_sql_functions() -> None:
|
|
|
270
271
|
|
|
271
272
|
_registered_function_creators["string_functions"] = create_string_functions
|
|
272
273
|
|
|
274
|
+
def create_array_functions(conn):
|
|
275
|
+
conn.create_function(
|
|
276
|
+
"json_array_get_element", 2, py_json_array_get_element, deterministic=True
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
_registered_function_creators["array_functions"] = create_array_functions
|
|
280
|
+
|
|
273
281
|
has_json_extension = functions_exist(["json_array_length", "json_array_contains"])
|
|
274
282
|
if not has_json_extension:
|
|
275
283
|
|
|
@@ -438,6 +446,20 @@ def py_json_array_contains(arr, value, is_json):
|
|
|
438
446
|
return value in orjson.loads(arr)
|
|
439
447
|
|
|
440
448
|
|
|
449
|
+
def py_json_array_get_element(val, idx):
|
|
450
|
+
arr = orjson.loads(val)
|
|
451
|
+
try:
|
|
452
|
+
return arr[idx]
|
|
453
|
+
except IndexError:
|
|
454
|
+
return None
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
def compile_array_get_element(element, compiler, **kwargs):
|
|
458
|
+
return compiler.process(
|
|
459
|
+
func.json_array_get_element(*element.clauses.clauses), **kwargs
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
|
|
441
463
|
def compile_array_length(element, compiler, **kwargs):
|
|
442
464
|
return compiler.process(func.json_array_length(*element.clauses.clauses), **kwargs)
|
|
443
465
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.16.
|
|
3
|
+
Version: 0.16.3
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -23,7 +23,7 @@ Requires-Dist: tqdm
|
|
|
23
23
|
Requires-Dist: numpy<3,>=1
|
|
24
24
|
Requires-Dist: pandas>=2.0.0
|
|
25
25
|
Requires-Dist: packaging
|
|
26
|
-
Requires-Dist: pyarrow
|
|
26
|
+
Requires-Dist: pyarrow<20
|
|
27
27
|
Requires-Dist: typing-extensions
|
|
28
28
|
Requires-Dist: python-dateutil>=2
|
|
29
29
|
Requires-Dist: attrs>=21.3.0
|