datachain 0.8.3__tar.gz → 0.8.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.8.3 → datachain-0.8.4}/.github/workflows/tests-studio.yml +1 -1
- {datachain-0.8.3 → datachain-0.8.4}/.github/workflows/tests.yml +1 -1
- {datachain-0.8.3 → datachain-0.8.4}/.pre-commit-config.yaml +1 -1
- {datachain-0.8.3 → datachain-0.8.4}/PKG-INFO +5 -5
- {datachain-0.8.3 → datachain-0.8.4}/mkdocs.yml +1 -0
- {datachain-0.8.3 → datachain-0.8.4}/pyproject.toml +4 -4
- datachain-0.8.4/src/datachain/cli/__init__.py +311 -0
- datachain-0.8.4/src/datachain/cli/commands/__init__.py +29 -0
- datachain-0.8.4/src/datachain/cli/commands/datasets.py +129 -0
- datachain-0.8.4/src/datachain/cli/commands/du.py +14 -0
- datachain-0.8.4/src/datachain/cli/commands/index.py +12 -0
- datachain-0.8.4/src/datachain/cli/commands/ls.py +169 -0
- datachain-0.8.4/src/datachain/cli/commands/misc.py +28 -0
- datachain-0.8.4/src/datachain/cli/commands/query.py +53 -0
- datachain-0.8.4/src/datachain/cli/commands/show.py +38 -0
- datachain-0.8.4/src/datachain/cli/parser/__init__.py +547 -0
- datachain-0.8.4/src/datachain/cli/parser/job.py +120 -0
- datachain-0.8.4/src/datachain/cli/parser/studio.py +126 -0
- datachain-0.8.4/src/datachain/cli/parser/utils.py +63 -0
- datachain-0.8.3/src/datachain/cli_utils.py → datachain-0.8.4/src/datachain/cli/utils.py +27 -1
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/client/fsspec.py +8 -2
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/func/__init__.py +2 -2
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/func/conditional.py +52 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/func/func.py +5 -1
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/lib/arrow.py +4 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/lib/dc.py +3 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/lib/file.py +1 -1
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/lib/listing.py +19 -1
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/lib/signal_schema.py +89 -27
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/progress.py +2 -2
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/studio.py +58 -38
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/utils.py +1 -1
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain.egg-info/PKG-INFO +5 -5
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain.egg-info/SOURCES.txt +14 -2
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain.egg-info/requires.txt +4 -4
- {datachain-0.8.3 → datachain-0.8.4}/tests/conftest.py +1 -1
- {datachain-0.8.3 → datachain-0.8.4}/tests/test_cli_e2e.py +6 -6
- {datachain-0.8.3 → datachain-0.8.4}/tests/test_cli_studio.py +18 -15
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/lib/test_arrow.py +9 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/lib/test_datachain.py +13 -5
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/lib/test_signal_schema.py +280 -32
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/sql/test_conditional.py +43 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/test_cli_parsing.py +2 -17
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/test_config.py +9 -9
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/test_func.py +19 -1
- datachain-0.8.3/src/datachain/cli.py +0 -1475
- {datachain-0.8.3 → datachain-0.8.4}/.cruft.json +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/.gitattributes +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/.github/codecov.yaml +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/.github/dependabot.yml +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/.github/workflows/release.yml +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/.gitignore +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/LICENSE +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/README.rst +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/docs/assets/datachain.svg +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/docs/contributing.md +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/docs/examples.md +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/docs/index.md +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/docs/overrides/main.html +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/docs/quick-start.md +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/docs/references/datachain.md +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/docs/references/datatype.md +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/docs/references/file.md +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/docs/references/index.md +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/docs/references/sql.md +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/docs/references/torch.md +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/docs/references/udf.md +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/docs/tutorials.md +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/examples/llm_and_nlp/unstructured-embeddings-gen.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/examples/llm_and_nlp/unstructured-summary-map.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/examples/multimodal/wds.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/noxfile.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/setup.cfg +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/__init__.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/__main__.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/asyn.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/cache.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/catalog/catalog.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/client/__init__.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/client/azure.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/client/gcs.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/client/hf.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/client/local.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/client/s3.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/config.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/dataset.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/error.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/func/array.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/func/base.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/func/numeric.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/func/path.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/func/random.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/func/string.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/func/window.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/job.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/lib/clip.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/lib/diff.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/lib/hf.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/lib/image.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/lib/settings.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/lib/tar.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/lib/text.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/lib/udf.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/lib/utils.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/lib/vfile.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/listing.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/model/__init__.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/model/bbox.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/model/pose.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/model/segment.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/node.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/py.typed +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/query/__init__.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/query/batch.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/query/dataset.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/query/metrics.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/query/params.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/query/queue.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/query/schema.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/query/session.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/query/udf.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/query/utils.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/remote/studio.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/sql/types.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/sql/utils.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/telemetry.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/__init__.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/data.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/examples/__init__.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/examples/test_examples.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/examples/wds_data.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/func/__init__.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/func/test_catalog.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/func/test_client.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/func/test_datachain.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/func/test_datasets.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/func/test_listing.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/func/test_ls.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/func/test_metrics.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/func/test_pull.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/func/test_pytorch.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/func/test_query.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/func/test_session.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/func/test_toolkit.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/scripts/feature_class.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/test_atomicity.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/test_query_e2e.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/test_telemetry.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/__init__.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/lib/test_models.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/test_asyn.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/test_cache.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/test_catalog.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/test_client.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/test_dataset.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/test_listing.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/test_metastore.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/test_query.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/test_query_params.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/test_serializer.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/test_session.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/test_utils.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.8.3 → datachain-0.8.4}/tests/utils.py +0 -0
|
@@ -138,7 +138,7 @@ jobs:
|
|
|
138
138
|
matrix:
|
|
139
139
|
os: [ubuntu-latest, windows-latest]
|
|
140
140
|
pyv: ['3.9', '3.12']
|
|
141
|
-
group: ['get_started', '
|
|
141
|
+
group: ['get_started', 'computer_vision', 'llm_and_nlp', 'multimodal']
|
|
142
142
|
exclude:
|
|
143
143
|
- {os: ubuntu-latest, pyv: '3.9', group: 'multimodal'}
|
|
144
144
|
- {os: ubuntu-latest, pyv: '3.12', group: 'multimodal'}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.4
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -50,7 +50,7 @@ Requires-Dist: websockets
|
|
|
50
50
|
Provides-Extra: docs
|
|
51
51
|
Requires-Dist: mkdocs>=1.5.2; extra == "docs"
|
|
52
52
|
Requires-Dist: mkdocs-gen-files>=0.5.0; extra == "docs"
|
|
53
|
-
Requires-Dist: mkdocs-material
|
|
53
|
+
Requires-Dist: mkdocs-material==9.5.22; extra == "docs"
|
|
54
54
|
Requires-Dist: mkdocs-section-index>=0.3.6; extra == "docs"
|
|
55
55
|
Requires-Dist: mkdocstrings-python>=1.6.3; extra == "docs"
|
|
56
56
|
Requires-Dist: mkdocs-literate-nav>=0.6.1; extra == "docs"
|
|
@@ -84,7 +84,7 @@ Requires-Dist: requests-mock; extra == "tests"
|
|
|
84
84
|
Requires-Dist: scipy; extra == "tests"
|
|
85
85
|
Provides-Extra: dev
|
|
86
86
|
Requires-Dist: datachain[docs,tests]; extra == "dev"
|
|
87
|
-
Requires-Dist: mypy==1.14.
|
|
87
|
+
Requires-Dist: mypy==1.14.1; extra == "dev"
|
|
88
88
|
Requires-Dist: types-python-dateutil; extra == "dev"
|
|
89
89
|
Requires-Dist: types-pytz; extra == "dev"
|
|
90
90
|
Requires-Dist: types-PyYAML; extra == "dev"
|
|
@@ -95,11 +95,11 @@ Requires-Dist: datachain[tests]; extra == "examples"
|
|
|
95
95
|
Requires-Dist: defusedxml; extra == "examples"
|
|
96
96
|
Requires-Dist: accelerate; extra == "examples"
|
|
97
97
|
Requires-Dist: unstructured_ingest[embed-huggingface]; extra == "examples"
|
|
98
|
-
Requires-Dist: unstructured[pdf]; extra == "examples"
|
|
98
|
+
Requires-Dist: unstructured[pdf]<0.16.12; extra == "examples"
|
|
99
99
|
Requires-Dist: pdfplumber==0.11.4; extra == "examples"
|
|
100
100
|
Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
|
|
101
101
|
Requires-Dist: onnx==1.16.1; extra == "examples"
|
|
102
|
-
Requires-Dist: ultralytics==8.3.
|
|
102
|
+
Requires-Dist: ultralytics==8.3.55; extra == "examples"
|
|
103
103
|
|
|
104
104
|
================
|
|
105
105
|
|logo| DataChain
|
|
@@ -56,7 +56,7 @@ dependencies = [
|
|
|
56
56
|
docs = [
|
|
57
57
|
"mkdocs>=1.5.2",
|
|
58
58
|
"mkdocs-gen-files>=0.5.0",
|
|
59
|
-
"mkdocs-material
|
|
59
|
+
"mkdocs-material==9.5.22",
|
|
60
60
|
"mkdocs-section-index>=0.3.6",
|
|
61
61
|
"mkdocstrings-python>=1.6.3",
|
|
62
62
|
"mkdocs-literate-nav>=0.6.1"
|
|
@@ -96,7 +96,7 @@ tests = [
|
|
|
96
96
|
]
|
|
97
97
|
dev = [
|
|
98
98
|
"datachain[docs,tests]",
|
|
99
|
-
"mypy==1.14.
|
|
99
|
+
"mypy==1.14.1",
|
|
100
100
|
"types-python-dateutil",
|
|
101
101
|
"types-pytz",
|
|
102
102
|
"types-PyYAML",
|
|
@@ -108,11 +108,11 @@ examples = [
|
|
|
108
108
|
"defusedxml",
|
|
109
109
|
"accelerate",
|
|
110
110
|
"unstructured_ingest[embed-huggingface]",
|
|
111
|
-
"unstructured[pdf]",
|
|
111
|
+
"unstructured[pdf]<0.16.12",
|
|
112
112
|
"pdfplumber==0.11.4",
|
|
113
113
|
"huggingface_hub[hf_transfer]",
|
|
114
114
|
"onnx==1.16.1",
|
|
115
|
-
"ultralytics==8.3.
|
|
115
|
+
"ultralytics==8.3.55"
|
|
116
116
|
]
|
|
117
117
|
|
|
118
118
|
[project.urls]
|
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
import sys
|
|
4
|
+
import traceback
|
|
5
|
+
from multiprocessing import freeze_support
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
from datachain.cli.utils import get_logging_level
|
|
9
|
+
from datachain.telemetry import telemetry
|
|
10
|
+
|
|
11
|
+
from .commands import (
|
|
12
|
+
clear_cache,
|
|
13
|
+
completion,
|
|
14
|
+
dataset_stats,
|
|
15
|
+
du,
|
|
16
|
+
edit_dataset,
|
|
17
|
+
garbage_collect,
|
|
18
|
+
index,
|
|
19
|
+
list_datasets,
|
|
20
|
+
ls,
|
|
21
|
+
query,
|
|
22
|
+
rm_dataset,
|
|
23
|
+
show,
|
|
24
|
+
)
|
|
25
|
+
from .parser import get_parser
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger("datachain")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def main(argv: Optional[list[str]] = None) -> int:
|
|
31
|
+
from datachain.catalog import get_catalog
|
|
32
|
+
|
|
33
|
+
# Required for Windows multiprocessing support
|
|
34
|
+
freeze_support()
|
|
35
|
+
|
|
36
|
+
datachain_parser = get_parser()
|
|
37
|
+
args = datachain_parser.parse_args(argv)
|
|
38
|
+
|
|
39
|
+
if args.command in ("internal-run-udf", "internal-run-udf-worker"):
|
|
40
|
+
return handle_udf(args.command)
|
|
41
|
+
|
|
42
|
+
logger.addHandler(logging.StreamHandler())
|
|
43
|
+
logging_level = get_logging_level(args)
|
|
44
|
+
logger.setLevel(logging_level)
|
|
45
|
+
|
|
46
|
+
client_config = {
|
|
47
|
+
"aws_endpoint_url": args.aws_endpoint_url,
|
|
48
|
+
"anon": args.anon,
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
if args.debug_sql:
|
|
52
|
+
# This also sets this environment variable for any subprocesses
|
|
53
|
+
os.environ["DEBUG_SHOW_SQL_QUERIES"] = "True"
|
|
54
|
+
|
|
55
|
+
error = None
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
catalog = get_catalog(client_config=client_config)
|
|
59
|
+
return handle_command(args, catalog, client_config)
|
|
60
|
+
except BrokenPipeError as exc:
|
|
61
|
+
error, return_code = handle_broken_pipe_error(exc)
|
|
62
|
+
return return_code
|
|
63
|
+
except (KeyboardInterrupt, Exception) as exc:
|
|
64
|
+
error, return_code = handle_general_exception(exc, args, logging_level)
|
|
65
|
+
return return_code
|
|
66
|
+
finally:
|
|
67
|
+
telemetry.send_cli_call(args.command, error=error)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def handle_command(args, catalog, client_config) -> int:
|
|
71
|
+
"""Handle the different CLI commands."""
|
|
72
|
+
from datachain.studio import process_jobs_args, process_studio_cli_args
|
|
73
|
+
|
|
74
|
+
command_handlers = {
|
|
75
|
+
"cp": lambda: handle_cp_command(args, catalog),
|
|
76
|
+
"clone": lambda: handle_clone_command(args, catalog),
|
|
77
|
+
"dataset": lambda: handle_dataset_command(args, catalog),
|
|
78
|
+
"ds": lambda: handle_dataset_command(args, catalog),
|
|
79
|
+
"ls": lambda: handle_ls_command(args, client_config),
|
|
80
|
+
"show": lambda: handle_show_command(args, catalog),
|
|
81
|
+
"du": lambda: handle_du_command(args, catalog, client_config),
|
|
82
|
+
"find": lambda: handle_find_command(args, catalog),
|
|
83
|
+
"index": lambda: handle_index_command(args, catalog),
|
|
84
|
+
"completion": lambda: handle_completion_command(args),
|
|
85
|
+
"query": lambda: handle_query_command(args, catalog),
|
|
86
|
+
"clear-cache": lambda: clear_cache(catalog),
|
|
87
|
+
"gc": lambda: garbage_collect(catalog),
|
|
88
|
+
"studio": lambda: process_studio_cli_args(args),
|
|
89
|
+
"job": lambda: process_jobs_args(args),
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
handler = command_handlers.get(args.command)
|
|
93
|
+
if handler:
|
|
94
|
+
handler()
|
|
95
|
+
return 0
|
|
96
|
+
print(f"invalid command: {args.command}", file=sys.stderr)
|
|
97
|
+
return 1
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def handle_cp_command(args, catalog):
|
|
101
|
+
catalog.cp(
|
|
102
|
+
args.sources,
|
|
103
|
+
args.output,
|
|
104
|
+
force=bool(args.force),
|
|
105
|
+
update=bool(args.update),
|
|
106
|
+
recursive=bool(args.recursive),
|
|
107
|
+
edatachain_file=None,
|
|
108
|
+
edatachain_only=False,
|
|
109
|
+
no_edatachain_file=True,
|
|
110
|
+
no_glob=args.no_glob,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def handle_clone_command(args, catalog):
|
|
115
|
+
catalog.clone(
|
|
116
|
+
args.sources,
|
|
117
|
+
args.output,
|
|
118
|
+
force=bool(args.force),
|
|
119
|
+
update=bool(args.update),
|
|
120
|
+
recursive=bool(args.recursive),
|
|
121
|
+
no_glob=args.no_glob,
|
|
122
|
+
no_cp=args.no_cp,
|
|
123
|
+
edatachain=args.edatachain,
|
|
124
|
+
edatachain_file=args.edatachain_file,
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def handle_dataset_command(args, catalog):
|
|
129
|
+
dataset_commands = {
|
|
130
|
+
"pull": lambda: catalog.pull_dataset(
|
|
131
|
+
args.dataset,
|
|
132
|
+
args.output,
|
|
133
|
+
local_ds_name=args.local_name,
|
|
134
|
+
local_ds_version=args.local_version,
|
|
135
|
+
cp=args.cp,
|
|
136
|
+
force=bool(args.force),
|
|
137
|
+
edatachain=args.edatachain,
|
|
138
|
+
edatachain_file=args.edatachain_file,
|
|
139
|
+
),
|
|
140
|
+
"edit": lambda: edit_dataset(
|
|
141
|
+
catalog,
|
|
142
|
+
args.name,
|
|
143
|
+
new_name=args.new_name,
|
|
144
|
+
description=args.description,
|
|
145
|
+
labels=args.labels,
|
|
146
|
+
studio=args.studio,
|
|
147
|
+
local=args.local,
|
|
148
|
+
all=args.all,
|
|
149
|
+
team=args.team,
|
|
150
|
+
),
|
|
151
|
+
"ls": lambda: list_datasets(
|
|
152
|
+
catalog=catalog,
|
|
153
|
+
studio=args.studio,
|
|
154
|
+
local=args.local,
|
|
155
|
+
all=args.all,
|
|
156
|
+
team=args.team,
|
|
157
|
+
),
|
|
158
|
+
"rm": lambda: rm_dataset(
|
|
159
|
+
catalog,
|
|
160
|
+
args.name,
|
|
161
|
+
version=args.version,
|
|
162
|
+
force=args.force,
|
|
163
|
+
studio=args.studio,
|
|
164
|
+
local=args.local,
|
|
165
|
+
all=args.all,
|
|
166
|
+
team=args.team,
|
|
167
|
+
),
|
|
168
|
+
"remove": lambda: rm_dataset(
|
|
169
|
+
catalog,
|
|
170
|
+
args.name,
|
|
171
|
+
version=args.version,
|
|
172
|
+
force=args.force,
|
|
173
|
+
studio=args.studio,
|
|
174
|
+
local=args.local,
|
|
175
|
+
all=args.all,
|
|
176
|
+
team=args.team,
|
|
177
|
+
),
|
|
178
|
+
"stats": lambda: dataset_stats(
|
|
179
|
+
catalog,
|
|
180
|
+
args.name,
|
|
181
|
+
args.version,
|
|
182
|
+
show_bytes=args.bytes,
|
|
183
|
+
si=args.si,
|
|
184
|
+
),
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
handler = dataset_commands.get(args.datasets_cmd)
|
|
188
|
+
if handler:
|
|
189
|
+
return handler()
|
|
190
|
+
raise Exception(f"Unexpected command {args.datasets_cmd}")
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def handle_ls_command(args, client_config):
|
|
194
|
+
ls(
|
|
195
|
+
args.sources,
|
|
196
|
+
long=bool(args.long),
|
|
197
|
+
studio=args.studio,
|
|
198
|
+
local=args.local,
|
|
199
|
+
all=args.all,
|
|
200
|
+
team=args.team,
|
|
201
|
+
update=bool(args.update),
|
|
202
|
+
client_config=client_config,
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def handle_show_command(args, catalog):
|
|
207
|
+
show(
|
|
208
|
+
catalog,
|
|
209
|
+
args.name,
|
|
210
|
+
args.version,
|
|
211
|
+
limit=args.limit,
|
|
212
|
+
offset=args.offset,
|
|
213
|
+
columns=args.columns,
|
|
214
|
+
no_collapse=args.no_collapse,
|
|
215
|
+
schema=args.schema,
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def handle_du_command(args, catalog, client_config):
|
|
220
|
+
du(
|
|
221
|
+
catalog,
|
|
222
|
+
args.sources,
|
|
223
|
+
show_bytes=args.bytes,
|
|
224
|
+
depth=args.depth,
|
|
225
|
+
si=args.si,
|
|
226
|
+
update=bool(args.update),
|
|
227
|
+
client_config=client_config,
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def handle_find_command(args, catalog):
|
|
232
|
+
results_found = False
|
|
233
|
+
for result in catalog.find(
|
|
234
|
+
args.sources,
|
|
235
|
+
update=bool(args.update),
|
|
236
|
+
names=args.name,
|
|
237
|
+
inames=args.iname,
|
|
238
|
+
paths=args.path,
|
|
239
|
+
ipaths=args.ipath,
|
|
240
|
+
size=args.size,
|
|
241
|
+
typ=args.type,
|
|
242
|
+
columns=args.columns,
|
|
243
|
+
):
|
|
244
|
+
print(result)
|
|
245
|
+
results_found = True
|
|
246
|
+
if not results_found:
|
|
247
|
+
print("No results")
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def handle_index_command(args, catalog):
|
|
251
|
+
index(
|
|
252
|
+
catalog,
|
|
253
|
+
args.sources,
|
|
254
|
+
update=bool(args.update),
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def handle_completion_command(args):
|
|
259
|
+
print(completion(args.shell))
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def handle_query_command(args, catalog):
|
|
263
|
+
query(
|
|
264
|
+
catalog,
|
|
265
|
+
args.script,
|
|
266
|
+
parallel=args.parallel,
|
|
267
|
+
params=args.param,
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def handle_broken_pipe_error(exc):
|
|
272
|
+
# Python flushes standard streams on exit; redirect remaining output
|
|
273
|
+
# to devnull to avoid another BrokenPipeError at shutdown
|
|
274
|
+
# See: https://docs.python.org/3/library/signal.html#note-on-sigpipe
|
|
275
|
+
error = str(exc)
|
|
276
|
+
devnull = os.open(os.devnull, os.O_WRONLY)
|
|
277
|
+
os.dup2(devnull, sys.stdout.fileno())
|
|
278
|
+
return error, 141 # 128 + 13 (SIGPIPE)
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def handle_general_exception(exc, args, logging_level):
|
|
282
|
+
error = str(exc)
|
|
283
|
+
if isinstance(exc, KeyboardInterrupt):
|
|
284
|
+
msg = "Operation cancelled by the user"
|
|
285
|
+
else:
|
|
286
|
+
msg = str(exc)
|
|
287
|
+
print("Error:", msg, file=sys.stderr)
|
|
288
|
+
if logging_level <= logging.DEBUG:
|
|
289
|
+
traceback.print_exception(
|
|
290
|
+
type(exc),
|
|
291
|
+
exc,
|
|
292
|
+
exc.__traceback__,
|
|
293
|
+
file=sys.stderr,
|
|
294
|
+
)
|
|
295
|
+
if args.pdb:
|
|
296
|
+
import pdb # noqa: T100
|
|
297
|
+
|
|
298
|
+
pdb.post_mortem()
|
|
299
|
+
return error, 1
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def handle_udf(command):
|
|
303
|
+
if command == "internal-run-udf":
|
|
304
|
+
from datachain.query.dispatch import udf_entrypoint
|
|
305
|
+
|
|
306
|
+
return udf_entrypoint()
|
|
307
|
+
|
|
308
|
+
if command == "internal-run-udf-worker":
|
|
309
|
+
from datachain.query.dispatch import udf_worker_entrypoint
|
|
310
|
+
|
|
311
|
+
return udf_worker_entrypoint()
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from .datasets import (
|
|
2
|
+
dataset_stats,
|
|
3
|
+
edit_dataset,
|
|
4
|
+
list_datasets,
|
|
5
|
+
list_datasets_local,
|
|
6
|
+
rm_dataset,
|
|
7
|
+
)
|
|
8
|
+
from .du import du
|
|
9
|
+
from .index import index
|
|
10
|
+
from .ls import ls
|
|
11
|
+
from .misc import clear_cache, completion, garbage_collect
|
|
12
|
+
from .query import query
|
|
13
|
+
from .show import show
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"clear_cache",
|
|
17
|
+
"completion",
|
|
18
|
+
"dataset_stats",
|
|
19
|
+
"du",
|
|
20
|
+
"edit_dataset",
|
|
21
|
+
"garbage_collect",
|
|
22
|
+
"index",
|
|
23
|
+
"list_datasets",
|
|
24
|
+
"list_datasets_local",
|
|
25
|
+
"ls",
|
|
26
|
+
"query",
|
|
27
|
+
"rm_dataset",
|
|
28
|
+
"show",
|
|
29
|
+
]
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
from typing import TYPE_CHECKING, Optional
|
|
3
|
+
|
|
4
|
+
from tabulate import tabulate
|
|
5
|
+
|
|
6
|
+
from datachain import utils
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from datachain.catalog import Catalog
|
|
10
|
+
|
|
11
|
+
from datachain.cli.utils import determine_flavors
|
|
12
|
+
from datachain.config import Config
|
|
13
|
+
from datachain.error import DatasetNotFoundError
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def list_datasets(
|
|
17
|
+
catalog: "Catalog",
|
|
18
|
+
studio: bool = False,
|
|
19
|
+
local: bool = False,
|
|
20
|
+
all: bool = True,
|
|
21
|
+
team: Optional[str] = None,
|
|
22
|
+
):
|
|
23
|
+
from datachain.studio import list_datasets
|
|
24
|
+
|
|
25
|
+
token = Config().read().get("studio", {}).get("token")
|
|
26
|
+
all, local, studio = determine_flavors(studio, local, all, token)
|
|
27
|
+
|
|
28
|
+
local_datasets = set(list_datasets_local(catalog)) if all or local else set()
|
|
29
|
+
studio_datasets = (
|
|
30
|
+
set(list_datasets(team=team)) if (all or studio) and token else set()
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
rows = [
|
|
34
|
+
_datasets_tabulate_row(
|
|
35
|
+
name=name,
|
|
36
|
+
version=version,
|
|
37
|
+
both=(all or (local and studio)) and token,
|
|
38
|
+
local=(name, version) in local_datasets,
|
|
39
|
+
studio=(name, version) in studio_datasets,
|
|
40
|
+
)
|
|
41
|
+
for name, version in local_datasets.union(studio_datasets)
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
print(tabulate(rows, headers="keys"))
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def list_datasets_local(catalog: "Catalog"):
|
|
48
|
+
for d in catalog.ls_datasets():
|
|
49
|
+
for v in d.versions:
|
|
50
|
+
yield (d.name, v.version)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _datasets_tabulate_row(name, version, both, local, studio):
|
|
54
|
+
row = {
|
|
55
|
+
"Name": name,
|
|
56
|
+
"Version": version,
|
|
57
|
+
}
|
|
58
|
+
if both:
|
|
59
|
+
row["Studio"] = "\u2714" if studio else "\u2716"
|
|
60
|
+
row["Local"] = "\u2714" if local else "\u2716"
|
|
61
|
+
return row
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def rm_dataset(
|
|
65
|
+
catalog: "Catalog",
|
|
66
|
+
name: str,
|
|
67
|
+
version: Optional[int] = None,
|
|
68
|
+
force: Optional[bool] = False,
|
|
69
|
+
studio: bool = False,
|
|
70
|
+
local: bool = False,
|
|
71
|
+
all: bool = True,
|
|
72
|
+
team: Optional[str] = None,
|
|
73
|
+
):
|
|
74
|
+
from datachain.studio import remove_studio_dataset
|
|
75
|
+
|
|
76
|
+
token = Config().read().get("studio", {}).get("token")
|
|
77
|
+
all, local, studio = determine_flavors(studio, local, all, token)
|
|
78
|
+
|
|
79
|
+
if all or local:
|
|
80
|
+
try:
|
|
81
|
+
catalog.remove_dataset(name, version=version, force=force)
|
|
82
|
+
except DatasetNotFoundError:
|
|
83
|
+
print("Dataset not found in local", file=sys.stderr)
|
|
84
|
+
|
|
85
|
+
if (all or studio) and token:
|
|
86
|
+
remove_studio_dataset(team, name, version, force)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def edit_dataset(
|
|
90
|
+
catalog: "Catalog",
|
|
91
|
+
name: str,
|
|
92
|
+
new_name: Optional[str] = None,
|
|
93
|
+
description: Optional[str] = None,
|
|
94
|
+
labels: Optional[list[str]] = None,
|
|
95
|
+
studio: bool = False,
|
|
96
|
+
local: bool = False,
|
|
97
|
+
all: bool = True,
|
|
98
|
+
team: Optional[str] = None,
|
|
99
|
+
):
|
|
100
|
+
from datachain.studio import edit_studio_dataset
|
|
101
|
+
|
|
102
|
+
token = Config().read().get("studio", {}).get("token")
|
|
103
|
+
all, local, studio = determine_flavors(studio, local, all, token)
|
|
104
|
+
|
|
105
|
+
if all or local:
|
|
106
|
+
try:
|
|
107
|
+
catalog.edit_dataset(name, new_name, description, labels)
|
|
108
|
+
except DatasetNotFoundError:
|
|
109
|
+
print("Dataset not found in local", file=sys.stderr)
|
|
110
|
+
|
|
111
|
+
if (all or studio) and token:
|
|
112
|
+
edit_studio_dataset(team, name, new_name, description, labels)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def dataset_stats(
|
|
116
|
+
catalog: "Catalog",
|
|
117
|
+
name: str,
|
|
118
|
+
version: int,
|
|
119
|
+
show_bytes=False,
|
|
120
|
+
si=False,
|
|
121
|
+
):
|
|
122
|
+
stats = catalog.dataset_stats(name, version)
|
|
123
|
+
|
|
124
|
+
if stats:
|
|
125
|
+
print(f"Number of objects: {stats.num_objects}")
|
|
126
|
+
if show_bytes:
|
|
127
|
+
print(f"Total objects size: {stats.size}")
|
|
128
|
+
else:
|
|
129
|
+
print(f"Total objects size: {utils.sizeof_fmt(stats.size, si=si): >7}")
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from typing import TYPE_CHECKING
|
|
2
|
+
|
|
3
|
+
from datachain import utils
|
|
4
|
+
|
|
5
|
+
if TYPE_CHECKING:
|
|
6
|
+
from datachain.catalog import Catalog
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def du(catalog: "Catalog", sources, show_bytes=False, si=False, **kwargs):
|
|
10
|
+
for path, size in catalog.du(sources, **kwargs):
|
|
11
|
+
if show_bytes:
|
|
12
|
+
print(f"{size} {path}")
|
|
13
|
+
else:
|
|
14
|
+
print(f"{utils.sizeof_fmt(size, si=si): >7} {path}")
|