datachain 0.16.0__tar.gz → 0.16.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.16.0 → datachain-0.16.2}/.pre-commit-config.yaml +1 -1
- {datachain-0.16.0/src/datachain.egg-info → datachain-0.16.2}/PKG-INFO +1 -1
- datachain-0.16.2/docs/commands/auth/login.md +51 -0
- datachain-0.16.2/docs/commands/auth/logout.md +37 -0
- datachain-0.16.2/docs/commands/auth/team.md +36 -0
- datachain-0.16.2/docs/commands/auth/token.md +26 -0
- datachain-0.16.2/docs/commands/index.md +33 -0
- datachain-0.16.2/docs/commands/job/cancel.md +43 -0
- datachain-0.16.2/docs/commands/job/logs.md +47 -0
- datachain-0.16.2/docs/commands/job/run.md +67 -0
- {datachain-0.16.0 → datachain-0.16.2}/docs/examples.md +5 -5
- {datachain-0.16.0 → datachain-0.16.2}/docs/quick-start.md +3 -3
- {datachain-0.16.0 → datachain-0.16.2}/mkdocs.yml +12 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/func/aggregate.py +3 -3
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/convert/values_to_tuples.py +6 -8
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/dc/datachain.py +16 -10
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/dc/records.py +18 -10
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/dc/utils.py +2 -2
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/signal_schema.py +1 -10
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/query/dataset.py +13 -6
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/query/schema.py +1 -4
- {datachain-0.16.0 → datachain-0.16.2/src/datachain.egg-info}/PKG-INFO +1 -1
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain.egg-info/SOURCES.txt +8 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/func/test_read_database.py +31 -17
- {datachain-0.16.0 → datachain-0.16.2}/.cruft.json +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/.gitattributes +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/.github/codecov.yaml +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/.github/dependabot.yml +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/.github/workflows/release.yml +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/.github/workflows/tests.yml +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/.gitignore +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/LICENSE +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/README.rst +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/docs/assets/datachain.svg +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/docs/contributing.md +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/docs/index.md +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/docs/overrides/main.html +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/docs/references/data-types/file.md +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/docs/references/data-types/index.md +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/docs/references/data-types/pose.md +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/docs/references/data-types/segment.md +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/docs/references/datachain.md +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/docs/references/func.md +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/docs/references/index.md +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/docs/references/remotes.md +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/docs/references/toolkit.md +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/docs/references/torch.md +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/docs/references/udf.md +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/docs/tutorials.md +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/examples/multimodal/wds.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/noxfile.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/pyproject.toml +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/setup.cfg +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/__init__.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/__main__.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/asyn.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/cache.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/catalog/catalog.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/cli/utils.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/client/__init__.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/client/azure.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/client/gcs.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/client/hf.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/client/local.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/client/s3.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/config.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/dataset.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/error.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/fs/reference.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/fs/utils.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/func/__init__.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/func/array.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/func/base.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/func/conditional.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/func/func.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/func/numeric.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/func/path.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/func/random.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/func/string.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/func/window.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/job.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/clip.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/dc/__init__.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/dc/database.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/dc/datasets.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/dc/storage.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/file.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/hf.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/image.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/listing.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/settings.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/tar.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/text.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/udf.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/utils.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/video.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/listing.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/model/__init__.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/model/bbox.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/model/pose.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/model/segment.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/model/utils.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/node.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/progress.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/py.typed +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/query/__init__.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/query/batch.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/query/metrics.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/query/params.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/query/queue.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/query/session.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/query/udf.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/query/utils.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/remote/studio.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/script_meta.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/sql/types.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/sql/utils.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/studio.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/telemetry.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain/utils.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain.egg-info/requires.txt +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/__init__.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/conftest.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/data.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/examples/__init__.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/examples/test_examples.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/examples/wds_data.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/func/__init__.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/func/data/lena.jpg +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/func/model/__init__.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/func/test_catalog.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/func/test_client.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/func/test_data_storage.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/func/test_datachain.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/func/test_datasets.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/func/test_file.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/func/test_hf.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/func/test_image.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/func/test_listing.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/func/test_ls.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/func/test_metrics.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/func/test_pull.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/func/test_pytorch.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/func/test_query.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/func/test_session.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/func/test_toolkit.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/func/test_video.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/func/test_warehouse.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/scripts/feature_class.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/test_atomicity.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/test_cli_e2e.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/test_cli_studio.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/test_import_time.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/test_query_e2e.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/test_telemetry.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/__init__.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/lib/test_datachain.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/model/__init__.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/test_asyn.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/test_cache.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/test_catalog.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/test_client.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/test_config.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/test_dataset.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/test_func.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/test_listing.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/test_metastore.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/test_query.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/test_query_params.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/test_serializer.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/test_session.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/test_utils.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.16.0 → datachain-0.16.2}/tests/utils.py +0 -0
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# auth login
|
|
2
|
+
|
|
3
|
+
Authenticate DataChain with Studio to save a client access token to DataChain configuration.
|
|
4
|
+
|
|
5
|
+
## Synopsis
|
|
6
|
+
|
|
7
|
+
```usage
|
|
8
|
+
usage: datachain auth login [-h] [-v] [-q] [-H HOSTNAME] [-s SCOPES] [-n NAME] [--no-open] [--local]
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Description
|
|
12
|
+
|
|
13
|
+
By default, this command authenticates DataChain with Studio using default scopes and assigns a random name as the token name. The authentication token will be used for subsequent Studio operations.
|
|
14
|
+
|
|
15
|
+
## Options
|
|
16
|
+
|
|
17
|
+
* `-H HOSTNAME`, `--hostname HOSTNAME` - The hostname of the Studio instance to authenticate with.
|
|
18
|
+
* `-s SCOPES`, `--scopes SCOPES` - Authentication token scopes. Allowed scopes: `EXPERIMENTS`, `DATASETS`, `MODELS`. Defaults to all available scopes.
|
|
19
|
+
* `-n NAME`, `--name NAME` - The name of the authentication token. It will be used to identify the token shown in Studio profile. Defaults to a random name.
|
|
20
|
+
* `--no-open` - Use code-based authentication without browser. You will be presented with a user code to enter in the browser. DataChain will also use this if it cannot launch the browser on your behalf.
|
|
21
|
+
* `--local` - Save the token in the local project config instead of the global configuration.
|
|
22
|
+
* `-h`, `--help` - Show the help message and exit.
|
|
23
|
+
* `-v`, `--verbose` - Be verbose.
|
|
24
|
+
* `-q`, `--quiet` - Be quiet.
|
|
25
|
+
|
|
26
|
+
## Examples
|
|
27
|
+
|
|
28
|
+
1. Basic authentication with default settings:
|
|
29
|
+
```bash
|
|
30
|
+
datachain auth login
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
2. Authenticate with specific scopes:
|
|
34
|
+
```bash
|
|
35
|
+
datachain auth login --scopes EXPERIMENTS,DATASETS
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
3. Authenticate with a custom token name:
|
|
39
|
+
```bash
|
|
40
|
+
datachain auth login --name my-token
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
4. Authenticate using code-based flow:
|
|
44
|
+
```bash
|
|
45
|
+
datachain auth login --no-open
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
5. Save token locally for the project:
|
|
49
|
+
```bash
|
|
50
|
+
datachain auth login --local
|
|
51
|
+
```
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# auth logout
|
|
2
|
+
|
|
3
|
+
Remove the Studio authentication token from DataChain configuration.
|
|
4
|
+
|
|
5
|
+
## Synopsis
|
|
6
|
+
|
|
7
|
+
```usage
|
|
8
|
+
usage: datachain auth logout [-h] [-v] [-q] [--local]
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Description
|
|
12
|
+
|
|
13
|
+
This command removes the Studio authentication token from the global DataChain configuration. By default, it removes the token from the global configuration, but you can also remove it from the local project configuration using the `--local` option.
|
|
14
|
+
|
|
15
|
+
## Options
|
|
16
|
+
|
|
17
|
+
* `--local` - Remove the token from the local project config instead of the global configuration.
|
|
18
|
+
* `-h`, `--help` - Show the help message and exit.
|
|
19
|
+
* `-v`, `--verbose` - Be verbose.
|
|
20
|
+
* `-q`, `--quiet` - Be quiet.
|
|
21
|
+
|
|
22
|
+
## Examples
|
|
23
|
+
|
|
24
|
+
1. Remove token from global configuration:
|
|
25
|
+
```bash
|
|
26
|
+
datachain auth logout
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
2. Remove token from local project configuration:
|
|
30
|
+
```bash
|
|
31
|
+
datachain auth logout --local
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
3. Remove token with verbose output:
|
|
35
|
+
```bash
|
|
36
|
+
datachain auth logout -v
|
|
37
|
+
```
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# auth team
|
|
2
|
+
|
|
3
|
+
Set the default team for Studio operations.
|
|
4
|
+
|
|
5
|
+
## Synopsis
|
|
6
|
+
|
|
7
|
+
```usage
|
|
8
|
+
usage: datachain auth team [-h] [-v] [-q] [--global] team_name
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Description
|
|
12
|
+
|
|
13
|
+
This command sets the default team for Studio operations. By default, the team setting is project-specific, but you can use the `--global` option to set it for all projects.
|
|
14
|
+
|
|
15
|
+
## Arguments
|
|
16
|
+
|
|
17
|
+
* `team_name` - Name of the team to set as default
|
|
18
|
+
|
|
19
|
+
## Options
|
|
20
|
+
|
|
21
|
+
* `--global` - Set team globally for all projects
|
|
22
|
+
* `-h`, `--help` - Show the help message and exit.
|
|
23
|
+
* `-v`, `--verbose` - Be verbose.
|
|
24
|
+
* `-q`, `--quiet` - Be quiet.
|
|
25
|
+
|
|
26
|
+
## Examples
|
|
27
|
+
|
|
28
|
+
1. Set default team for current project:
|
|
29
|
+
```bash
|
|
30
|
+
datachain auth team my-team
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
2. Set default team globally for all projects:
|
|
34
|
+
```bash
|
|
35
|
+
datachain auth team --global my-team
|
|
36
|
+
```
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# auth token
|
|
2
|
+
|
|
3
|
+
Display the current authentication token for Studio.
|
|
4
|
+
|
|
5
|
+
## Synopsis
|
|
6
|
+
|
|
7
|
+
```usage
|
|
8
|
+
usage: datachain auth token [-h] [-v] [-q]
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Description
|
|
12
|
+
|
|
13
|
+
This command displays the current authentication token that DataChain is using for Studio operations. The token is used for authenticating with Studio and accessing its features.
|
|
14
|
+
|
|
15
|
+
## Options
|
|
16
|
+
|
|
17
|
+
* `-h`, `--help` - Show the help message and exit.
|
|
18
|
+
* `-v`, `--verbose` - Be verbose.
|
|
19
|
+
* `-q`, `--quiet` - Be quiet.
|
|
20
|
+
|
|
21
|
+
## Examples
|
|
22
|
+
|
|
23
|
+
1. Display the current token:
|
|
24
|
+
```bash
|
|
25
|
+
datachain auth token
|
|
26
|
+
```
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
|
|
2
|
+
# Using DataChain Commands
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
DataChain is a command-line tool for wrangling unstructured AI data at scale. Use `datachain -h` to list all available commands.
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
## Typical DataChain Workflow
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
1. **Authentication with Studio**
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
- Use [`datachain auth login`](auth/login.md) to authenticate with Studio
|
|
18
|
+
|
|
19
|
+
- Set your default team with [`datachain auth team`](auth/team.md)
|
|
20
|
+
|
|
21
|
+
- View your token with [`datachain auth token`](auth/token.md)
|
|
22
|
+
|
|
23
|
+
- Log out from Studio with [`datachain auth logout`](auth/logout.md)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
2. **Job Management**
|
|
28
|
+
|
|
29
|
+
- Run jobs in Studio with [`datachain job run`](job/run.md)
|
|
30
|
+
|
|
31
|
+
- Monitor job logs with [`datachain job logs`](job/logs.md)
|
|
32
|
+
|
|
33
|
+
- Cancel running jobs with [`datachain job cancel`](job/cancel.md)
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# job cancel
|
|
2
|
+
|
|
3
|
+
Cancel a running job in Studio.
|
|
4
|
+
|
|
5
|
+
## Synopsis
|
|
6
|
+
|
|
7
|
+
```usage
|
|
8
|
+
usage: datachain job cancel [-h] [-v] [-q] [--team TEAM] id
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Description
|
|
12
|
+
|
|
13
|
+
This command cancels a running job in Studio. The job ID can be obtained from the Studio UI or from the output when the job was created. This is the recommended way to stop a running job, as simply closing the logs view (e.g., with Ctrl+C) will not cancel the job execution.
|
|
14
|
+
|
|
15
|
+
## Arguments
|
|
16
|
+
|
|
17
|
+
* `id` - Job ID to cancel. This ID is displayed when the job is created and can also be found in the Studio UI.
|
|
18
|
+
|
|
19
|
+
## Options
|
|
20
|
+
|
|
21
|
+
* `--team TEAM` - Team to cancel job for (default: from config)
|
|
22
|
+
* `-h`, `--help` - Show the help message and exit.
|
|
23
|
+
* `-v`, `--verbose` - Be verbose.
|
|
24
|
+
* `-q`, `--quiet` - Be quiet.
|
|
25
|
+
|
|
26
|
+
## Examples
|
|
27
|
+
|
|
28
|
+
1. Cancel a specific job:
|
|
29
|
+
```bash
|
|
30
|
+
datachain job cancel job-123
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
2. Cancel a job in a specific team:
|
|
34
|
+
```bash
|
|
35
|
+
datachain job cancel --team my-team job-123
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
## Notes
|
|
40
|
+
|
|
41
|
+
* The job ID is displayed when the job is created using `datachain job run`
|
|
42
|
+
* You can also find the job ID in the Studio UI
|
|
43
|
+
* This is the proper way to stop a running job, as simply closing the logs view will not cancel the job execution
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# job logs
|
|
2
|
+
|
|
3
|
+
Display logs and current status of jobs in Studio.
|
|
4
|
+
|
|
5
|
+
## Synopsis
|
|
6
|
+
|
|
7
|
+
```usage
|
|
8
|
+
usage: datachain job logs [-h] [-v] [-q] [--team TEAM] id
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Description
|
|
12
|
+
|
|
13
|
+
This command displays the logs and current status of a running job in Studio. The command will show real-time logs from the job execution. Note that closing this command (e.g., with Ctrl+C) will only stop displaying the logs but will not cancel the job execution. To cancel a job, use the `job cancel` command.
|
|
14
|
+
|
|
15
|
+
## Arguments
|
|
16
|
+
|
|
17
|
+
* `id` - Job ID to show logs for
|
|
18
|
+
|
|
19
|
+
## Options
|
|
20
|
+
|
|
21
|
+
* `--team TEAM` - Team to check logs for (default: from config)
|
|
22
|
+
* `-h`, `--help` - Show the help message and exit.
|
|
23
|
+
* `-v`, `--verbose` - Be verbose.
|
|
24
|
+
* `-q`, `--quiet` - Be quiet.
|
|
25
|
+
|
|
26
|
+
## Examples
|
|
27
|
+
|
|
28
|
+
1. Display logs for a specific job:
|
|
29
|
+
```bash
|
|
30
|
+
datachain job logs job-123
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
2. Display logs for a job in a specific team:
|
|
34
|
+
```bash
|
|
35
|
+
datachain job logs --team my-team job-123
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
3. Display logs with verbose output:
|
|
39
|
+
```bash
|
|
40
|
+
datachain job logs -v job-123
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Notes
|
|
44
|
+
|
|
45
|
+
* Closing the logs command (e.g., with Ctrl+C) will only stop displaying the logs but will not cancel the job execution
|
|
46
|
+
* To cancel a running job, use the `datachain job cancel` command
|
|
47
|
+
* The job will continue running in Studio even after you stop viewing the logs
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# job run
|
|
2
|
+
|
|
3
|
+
Run a job in Studio.
|
|
4
|
+
|
|
5
|
+
## Synopsis
|
|
6
|
+
|
|
7
|
+
```usage
|
|
8
|
+
usage: datachain job run [-h] [-v] [-q] [--team TEAM] [--env-file ENV_FILE] [--env ENV [ENV ...]]
|
|
9
|
+
[--workers WORKERS] [--files FILES [FILES ...]] [--python-version PYTHON_VERSION]
|
|
10
|
+
[--req-file REQ_FILE] [--req REQ [REQ ...]]
|
|
11
|
+
file
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## Description
|
|
15
|
+
|
|
16
|
+
This command runs a job in Studio using the specified query file. You can configure various aspects of the job including environment variables, Python version, dependencies, and more.
|
|
17
|
+
|
|
18
|
+
## Arguments
|
|
19
|
+
|
|
20
|
+
* `file` - Query file to run
|
|
21
|
+
|
|
22
|
+
## Options
|
|
23
|
+
|
|
24
|
+
* `--team TEAM` - Team to run job for (default: from config)
|
|
25
|
+
* `--env-file ENV_FILE` - File with environment variables for the job
|
|
26
|
+
* `--env ENV` - Environment variables in KEY=VALUE format
|
|
27
|
+
* `--workers WORKERS` - Number of workers for the job
|
|
28
|
+
* `--files FILES` - Additional files to include in the job
|
|
29
|
+
* `--python-version PYTHON_VERSION` - Python version for the job (e.g., 3.9, 3.10, 3.11)
|
|
30
|
+
* `--req-file REQ_FILE` - Python requirements file
|
|
31
|
+
* `--req REQ` - Python package requirements
|
|
32
|
+
* `-h`, `--help` - Show the help message and exit.
|
|
33
|
+
* `-v`, `--verbose` - Be verbose.
|
|
34
|
+
* `-q`, `--quiet` - Be quiet.
|
|
35
|
+
|
|
36
|
+
## Examples
|
|
37
|
+
|
|
38
|
+
1. Run a basic job:
|
|
39
|
+
```bash
|
|
40
|
+
datachain job run query.py
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
2. Run a job with specific team and Python version:
|
|
44
|
+
```bash
|
|
45
|
+
datachain job run --team my-team --python-version 3.11 query.py
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
3. Run a job with environment variables and requirements:
|
|
49
|
+
```bash
|
|
50
|
+
datachain job run --env-file .env --req-file requirements.txt query.py
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
4. Run a job with multiple workers and additional files:
|
|
54
|
+
```bash
|
|
55
|
+
datachain job run --workers 4 --files utils.py config.json query.py
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
5. Run a job with inline environment variables and package requirements:
|
|
59
|
+
```bash
|
|
60
|
+
datachain job run --env API_KEY=123 --req pandas numpy query.py
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## Notes
|
|
64
|
+
|
|
65
|
+
* Closing the logs command (e.g., with Ctrl+C) will only stop displaying the logs but will not cancel the job execution
|
|
66
|
+
* To cancel a running job, use the `datachain job cancel` command
|
|
67
|
+
* The job will continue running in Studio even after you stop viewing the logs
|
|
@@ -94,7 +94,7 @@ dc.DataModel.register(MistralModel)
|
|
|
94
94
|
chain = (
|
|
95
95
|
dc
|
|
96
96
|
.read_storage("gs://datachain-demo/chatbot-KiT/", type="text")
|
|
97
|
-
.filter(dc.Column("file.
|
|
97
|
+
.filter(dc.Column("file.path").glob("*.txt"))
|
|
98
98
|
.limit(5)
|
|
99
99
|
.settings(parallel=4, cache=True)
|
|
100
100
|
.map(
|
|
@@ -228,7 +228,7 @@ Here is an example from MS COCO “captions” JSON which employs separate secti
|
|
|
228
228
|
|
|
229
229
|
Note how complicated the setup is. Every image is references by the name, and the metadata for this file is keyed by the “id” field. This same field is references later in the “annotations” array, which is present in JSON files describing captions and the detected instances. The categories for the instances are stored in the “categories” array.
|
|
230
230
|
|
|
231
|
-
However,
|
|
231
|
+
However, DataChain can easily parse the entire COCO structure via several reading and merging operators:
|
|
232
232
|
|
|
233
233
|
```python
|
|
234
234
|
import datachain as dc
|
|
@@ -240,7 +240,7 @@ images = dc.read_storage(images_uri)
|
|
|
240
240
|
meta = dc.read_json(captions_uri, jmespath="images")
|
|
241
241
|
captions = dc.read_json(captions_uri, jmespath="annotations")
|
|
242
242
|
|
|
243
|
-
images_meta = images.merge(meta, on="file.
|
|
243
|
+
images_meta = images.merge(meta, on="file.path", right_on="images.file_name")
|
|
244
244
|
captioned_images = images_meta.merge(captions, on="images.id", right_on="annotations.image_id")
|
|
245
245
|
```
|
|
246
246
|
|
|
@@ -248,12 +248,12 @@ The resulting dataset has image entries as files decorated with all the metadata
|
|
|
248
248
|
|
|
249
249
|
```python
|
|
250
250
|
images_with_dogs = captioned_images.filter(dc.Column("annotations.caption").glob("*dog*"))
|
|
251
|
-
images_with_dogs.select("annotations", "file.
|
|
251
|
+
images_with_dogs.select("annotations", "file.path").show()
|
|
252
252
|
```
|
|
253
253
|
|
|
254
254
|
```
|
|
255
255
|
captions captions captions file
|
|
256
|
-
image_id id caption
|
|
256
|
+
image_id id caption path
|
|
257
257
|
0 17029 778902 a dog jumping to catch a frisbee in a yard 000000017029.jpg
|
|
258
258
|
1 17029 779838 A dog jumping to catch a red frisbee in a garden 000000017029.jpg
|
|
259
259
|
2 17029 781941 The dog is catching the Frisbee in mid air in ... 000000017029.jpg
|
|
@@ -184,7 +184,7 @@ chain = (
|
|
|
184
184
|
.save("response")
|
|
185
185
|
)
|
|
186
186
|
|
|
187
|
-
chain.select("file.
|
|
187
|
+
chain.select("file.path", "status", "response.usage").show(5)
|
|
188
188
|
|
|
189
189
|
success_rate = chain.filter(dc.Column("status") == "success").count() / chain.count()
|
|
190
190
|
print(f"{100*success_rate:.1f}% dialogs were successful")
|
|
@@ -194,7 +194,7 @@ Output:
|
|
|
194
194
|
|
|
195
195
|
``` shell
|
|
196
196
|
file status response response response
|
|
197
|
-
|
|
197
|
+
path usage usage usage
|
|
198
198
|
prompt_tokens total_tokens completion_tokens
|
|
199
199
|
0 1.txt success 547 548 1
|
|
200
200
|
1 10.txt failure 3576 3578 2
|
|
@@ -277,7 +277,7 @@ processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
|
|
|
277
277
|
|
|
278
278
|
chain = (
|
|
279
279
|
dc.read_storage("gs://datachain-demo/dogs-and-cats/", type="image", anon=True)
|
|
280
|
-
.map(label=lambda name: name.split(".")[0], params=["file.
|
|
280
|
+
.map(label=lambda name: name.split(".")[0], params=["file.path"])
|
|
281
281
|
.select("file", "label").to_pytorch(
|
|
282
282
|
transform=processor.image_processor,
|
|
283
283
|
tokenizer=processor.tokenizer,
|
|
@@ -84,6 +84,18 @@ nav:
|
|
|
84
84
|
- Torch: references/torch.md
|
|
85
85
|
- Functions: references/func.md
|
|
86
86
|
- Toolkit: references/toolkit.md
|
|
87
|
+
- 📖 CLI Reference:
|
|
88
|
+
- Overview: commands/index.md
|
|
89
|
+
- Commands:
|
|
90
|
+
- auth:
|
|
91
|
+
- login: commands/auth/login.md
|
|
92
|
+
- logout: commands/auth/logout.md
|
|
93
|
+
- token: commands/auth/token.md
|
|
94
|
+
- team: commands/auth/team.md
|
|
95
|
+
- job:
|
|
96
|
+
- run: commands/job/run.md
|
|
97
|
+
- logs: commands/job/logs.md
|
|
98
|
+
- cancel: commands/job/cancel.md
|
|
87
99
|
- 📡 Interacting with remote storage: references/remotes.md
|
|
88
100
|
- 🤝 Contributing: contributing.md
|
|
89
101
|
|
|
@@ -165,7 +165,7 @@ def any_value(col: str) -> Func:
|
|
|
165
165
|
Example:
|
|
166
166
|
```py
|
|
167
167
|
dc.group_by(
|
|
168
|
-
file_example=func.any_value("file.
|
|
168
|
+
file_example=func.any_value("file.path"),
|
|
169
169
|
partition_by="signal.category",
|
|
170
170
|
)
|
|
171
171
|
```
|
|
@@ -227,7 +227,7 @@ def concat(col: str, separator="") -> Func:
|
|
|
227
227
|
Example:
|
|
228
228
|
```py
|
|
229
229
|
dc.group_by(
|
|
230
|
-
files=func.concat("file.
|
|
230
|
+
files=func.concat("file.path", separator=", "),
|
|
231
231
|
partition_by="signal.category",
|
|
232
232
|
)
|
|
233
233
|
```
|
|
@@ -343,7 +343,7 @@ def first(col: str) -> Func:
|
|
|
343
343
|
```py
|
|
344
344
|
window = func.window(partition_by="signal.category", order_by="created_at")
|
|
345
345
|
dc.mutate(
|
|
346
|
-
first_file=func.first("file.
|
|
346
|
+
first_file=func.first("file.path").over(window),
|
|
347
347
|
)
|
|
348
348
|
```
|
|
349
349
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import itertools
|
|
2
2
|
from collections.abc import Sequence
|
|
3
|
-
from typing import Any,
|
|
3
|
+
from typing import Any, Union
|
|
4
4
|
|
|
5
5
|
from datachain.lib.data_model import (
|
|
6
6
|
DataType,
|
|
@@ -71,14 +71,13 @@ def values_to_tuples( # noqa: C901, PLR0912
|
|
|
71
71
|
# If a non-None value appears early, it won't check the remaining items for
|
|
72
72
|
# `None` values.
|
|
73
73
|
try:
|
|
74
|
-
|
|
75
|
-
itertools.dropwhile(lambda
|
|
74
|
+
first_not_none_element = next(
|
|
75
|
+
itertools.dropwhile(lambda i: i is None, v)
|
|
76
76
|
)
|
|
77
77
|
except StopIteration:
|
|
78
|
-
|
|
79
|
-
|
|
78
|
+
# set default type to `str` if column is empty or all values are `None`
|
|
79
|
+
typ = str
|
|
80
80
|
else:
|
|
81
|
-
nullable = pos > 0
|
|
82
81
|
typ = type(first_not_none_element) # type: ignore[assignment]
|
|
83
82
|
if not is_chain_type(typ):
|
|
84
83
|
raise ValuesToTupleError(
|
|
@@ -88,8 +87,7 @@ def values_to_tuples( # noqa: C901, PLR0912
|
|
|
88
87
|
)
|
|
89
88
|
if isinstance(first_not_none_element, list):
|
|
90
89
|
typ = list[type(first_not_none_element[0])] # type: ignore[assignment, misc]
|
|
91
|
-
|
|
92
|
-
types_map[k] = Optional[typ] if nullable else typ # type: ignore[assignment]
|
|
90
|
+
types_map[k] = typ
|
|
93
91
|
|
|
94
92
|
if length < 0:
|
|
95
93
|
length = len_
|
|
@@ -756,7 +756,7 @@ class DataChain:
|
|
|
756
756
|
|
|
757
757
|
Example:
|
|
758
758
|
```py
|
|
759
|
-
dc.distinct("file.
|
|
759
|
+
dc.distinct("file.path")
|
|
760
760
|
```
|
|
761
761
|
"""
|
|
762
762
|
return self._evolve(
|
|
@@ -882,7 +882,7 @@ class DataChain:
|
|
|
882
882
|
```py
|
|
883
883
|
dc.mutate(
|
|
884
884
|
area=Column("image.height") * Column("image.width"),
|
|
885
|
-
extension=file_ext(Column("file.
|
|
885
|
+
extension=file_ext(Column("file.path")),
|
|
886
886
|
dist=cosine_distance(embedding_text, embedding_image)
|
|
887
887
|
)
|
|
888
888
|
```
|
|
@@ -1071,13 +1071,13 @@ class DataChain:
|
|
|
1071
1071
|
|
|
1072
1072
|
Iterating over all rows with selected columns:
|
|
1073
1073
|
```py
|
|
1074
|
-
for name, size in dc.collect("file.
|
|
1074
|
+
for name, size in dc.collect("file.path", "file.size"):
|
|
1075
1075
|
print(name, size)
|
|
1076
1076
|
```
|
|
1077
1077
|
|
|
1078
1078
|
Iterating over a single column:
|
|
1079
1079
|
```py
|
|
1080
|
-
for file in dc.collect("file.
|
|
1080
|
+
for file in dc.collect("file.path"):
|
|
1081
1081
|
print(file)
|
|
1082
1082
|
```
|
|
1083
1083
|
"""
|
|
@@ -1630,7 +1630,7 @@ class DataChain:
|
|
|
1630
1630
|
import datachain as dc
|
|
1631
1631
|
|
|
1632
1632
|
chain = dc.read_storage("s3://mybucket")
|
|
1633
|
-
chain = chain.filter(dc.C("file.
|
|
1633
|
+
chain = chain.filter(dc.C("file.path").glob("*.jsonl"))
|
|
1634
1634
|
chain = chain.parse_tabular(format="json")
|
|
1635
1635
|
```
|
|
1636
1636
|
"""
|
|
@@ -2089,25 +2089,31 @@ class DataChain:
|
|
|
2089
2089
|
|
|
2090
2090
|
Using glob to match patterns
|
|
2091
2091
|
```py
|
|
2092
|
-
dc.filter(C("file.
|
|
2092
|
+
dc.filter(C("file.path").glob("*.jpg"))
|
|
2093
|
+
```
|
|
2094
|
+
|
|
2095
|
+
Using in to match lists
|
|
2096
|
+
```py
|
|
2097
|
+
ids = [1,2,3]
|
|
2098
|
+
dc.filter(C("experiment_id").in_(ids))
|
|
2093
2099
|
```
|
|
2094
2100
|
|
|
2095
2101
|
Using `datachain.func`
|
|
2096
2102
|
```py
|
|
2097
2103
|
from datachain.func import string
|
|
2098
|
-
dc.filter(string.length(C("file.
|
|
2104
|
+
dc.filter(string.length(C("file.path")) > 5)
|
|
2099
2105
|
```
|
|
2100
2106
|
|
|
2101
2107
|
Combining filters with "or"
|
|
2102
2108
|
```py
|
|
2103
|
-
dc.filter(C("file.
|
|
2109
|
+
dc.filter(C("file.path").glob("cat*") | C("file.path").glob("dog*))
|
|
2104
2110
|
```
|
|
2105
2111
|
|
|
2106
2112
|
Combining filters with "and"
|
|
2107
2113
|
```py
|
|
2108
2114
|
dc.filter(
|
|
2109
|
-
C("file.
|
|
2110
|
-
(string.length(C("file.
|
|
2115
|
+
C("file.path").glob("*.jpg) &
|
|
2116
|
+
(string.length(C("file.path")) > 5)
|
|
2111
2117
|
)
|
|
2112
2118
|
```
|
|
2113
2119
|
"""
|
|
@@ -4,12 +4,9 @@ from typing import TYPE_CHECKING, Optional, Union
|
|
|
4
4
|
import sqlalchemy
|
|
5
5
|
|
|
6
6
|
from datachain.lib.data_model import DataType
|
|
7
|
-
from datachain.lib.file import
|
|
8
|
-
File,
|
|
9
|
-
)
|
|
7
|
+
from datachain.lib.file import File
|
|
10
8
|
from datachain.lib.signal_schema import SignalSchema
|
|
11
9
|
from datachain.query import Session
|
|
12
|
-
from datachain.query.schema import Column
|
|
13
10
|
|
|
14
11
|
if TYPE_CHECKING:
|
|
15
12
|
from typing_extensions import ParamSpec
|
|
@@ -41,6 +38,10 @@ def read_records(
|
|
|
41
38
|
single_record = dc.read_records(dc.DEFAULT_FILE_RECORD)
|
|
42
39
|
```
|
|
43
40
|
"""
|
|
41
|
+
from datachain.query.dataset import INSERT_BATCH_SIZE, adjust_outputs, get_col_types
|
|
42
|
+
from datachain.sql.types import SQLType
|
|
43
|
+
from datachain.utils import batched
|
|
44
|
+
|
|
44
45
|
from .datasets import read_dataset
|
|
45
46
|
|
|
46
47
|
session = Session.get(session, in_memory=in_memory)
|
|
@@ -52,11 +53,10 @@ def read_records(
|
|
|
52
53
|
|
|
53
54
|
if schema:
|
|
54
55
|
signal_schema = SignalSchema(schema)
|
|
55
|
-
columns = [
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
columns.append(sqlalchemy.Column(c.name, c.type, **kw))
|
|
56
|
+
columns = [
|
|
57
|
+
sqlalchemy.Column(c.name, c.type) # type: ignore[union-attr]
|
|
58
|
+
for c in signal_schema.db_signals(as_columns=True)
|
|
59
|
+
]
|
|
60
60
|
else:
|
|
61
61
|
columns = [
|
|
62
62
|
sqlalchemy.Column(name, typ)
|
|
@@ -83,6 +83,14 @@ def read_records(
|
|
|
83
83
|
warehouse = catalog.warehouse
|
|
84
84
|
dr = warehouse.dataset_rows(dsr)
|
|
85
85
|
table = dr.get_table()
|
|
86
|
-
|
|
86
|
+
|
|
87
|
+
# Optimization: Compute row types once, rather than for every row.
|
|
88
|
+
col_types = get_col_types(
|
|
89
|
+
warehouse,
|
|
90
|
+
{c.name: c.type for c in columns if isinstance(c.type, SQLType)},
|
|
91
|
+
)
|
|
92
|
+
records = (adjust_outputs(warehouse, record, col_types) for record in to_insert)
|
|
93
|
+
for chunk in batched(records, INSERT_BATCH_SIZE):
|
|
94
|
+
warehouse.insert_rows(table, chunk)
|
|
87
95
|
warehouse.insert_rows_done(table)
|
|
88
96
|
return read_dataset(name=dsr.name, session=session, settings=settings)
|
|
@@ -31,8 +31,8 @@ def resolve_columns(
|
|
|
31
31
|
) -> "Callable[Concatenate[D, P], D]":
|
|
32
32
|
"""Decorator that resolvs input column names to their actual DB names. This is
|
|
33
33
|
specially important for nested columns as user works with them by using dot
|
|
34
|
-
notation e.g (file.
|
|
35
|
-
in DB, e.g
|
|
34
|
+
notation e.g (file.path) but are actually defined with default delimiter
|
|
35
|
+
in DB, e.g file__path.
|
|
36
36
|
If there are any sql functions in arguments, they will just be transferred as is
|
|
37
37
|
to a method.
|
|
38
38
|
"""
|