datachain 0.6.6__tar.gz → 0.6.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.6.6 → datachain-0.6.8}/.github/workflows/tests-studio.yml +3 -6
- {datachain-0.6.6/src/datachain.egg-info → datachain-0.6.8}/PKG-INFO +3 -1
- {datachain-0.6.6 → datachain-0.6.8}/pyproject.toml +4 -2
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/__init__.py +2 -1
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/catalog/catalog.py +2 -6
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/cli.py +137 -23
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/data_storage/schema.py +4 -2
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/lib/file.py +0 -3
- datachain-0.6.8/src/datachain/lib/models/__init__.py +5 -0
- datachain-0.6.8/src/datachain/lib/models/bbox.py +45 -0
- datachain-0.6.8/src/datachain/lib/models/pose.py +37 -0
- datachain-0.6.8/src/datachain/lib/models/yolo.py +39 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/remote/studio.py +12 -2
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/sql/types.py +2 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/studio.py +18 -6
- {datachain-0.6.6 → datachain-0.6.8/src/datachain.egg-info}/PKG-INFO +3 -1
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain.egg-info/SOURCES.txt +5 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain.egg-info/requires.txt +2 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/conftest.py +28 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/func/test_datasets.py +2 -5
- {datachain-0.6.6 → datachain-0.6.8}/tests/func/test_ls.py +4 -13
- {datachain-0.6.6 → datachain-0.6.8}/tests/func/test_pull.py +4 -6
- {datachain-0.6.6 → datachain-0.6.8}/tests/test_cli_e2e.py +17 -8
- {datachain-0.6.6 → datachain-0.6.8}/tests/test_cli_studio.py +64 -18
- datachain-0.6.8/tests/unit/lib/test_models.py +50 -0
- {datachain-0.6.6 → datachain-0.6.8}/.cruft.json +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/.gitattributes +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/.github/codecov.yaml +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/.github/dependabot.yml +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/.github/workflows/release.yml +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/.github/workflows/tests.yml +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/.gitignore +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/.pre-commit-config.yaml +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/CONTRIBUTING.rst +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/LICENSE +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/README.rst +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/docs/assets/datachain.svg +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/docs/assets/flowchart.png +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/docs/index.md +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/docs/references/datachain.md +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/docs/references/datatype.md +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/docs/references/file.md +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/docs/references/index.md +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/docs/references/sql.md +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/docs/references/torch.md +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/docs/references/udf.md +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/examples/llm_and_nlp/unstructured-embeddings-gen.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/examples/llm_and_nlp/unstructured-summary-map.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/examples/multimodal/wds.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/mkdocs.yml +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/noxfile.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/overrides/main.html +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/setup.cfg +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/__main__.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/asyn.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/cache.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/cli_utils.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/client/__init__.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/client/azure.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/client/gcs.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/client/hf.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/client/local.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/client/s3.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/config.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/data_storage/id_generator.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/dataset.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/error.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/job.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/lib/clip.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/lib/dc.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/lib/func/__init__.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/lib/func/aggregate.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/lib/func/func.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/lib/hf.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/lib/image.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/lib/listing.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/lib/settings.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/lib/tar.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/lib/text.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/lib/udf.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/lib/utils.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/lib/vfile.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/listing.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/node.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/progress.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/py.typed +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/query/__init__.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/query/batch.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/query/dataset.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/query/metrics.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/query/params.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/query/queue.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/query/schema.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/query/session.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/sql/utils.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/telemetry.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain/utils.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/__init__.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/data.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/examples/__init__.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/examples/test_examples.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/examples/wds_data.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/func/__init__.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/func/test_catalog.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/func/test_client.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/func/test_datachain.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/func/test_listing.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/func/test_metrics.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/func/test_pytorch.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/func/test_query.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/scripts/feature_class.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/test_atomicity.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/test_query_e2e.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/test_telemetry.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/__init__.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/lib/test_datachain.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/test_asyn.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/test_cache.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/test_catalog.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/test_client.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/test_config.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/test_dataset.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/test_id_generator.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/test_listing.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/test_metastore.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/test_query.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/test_query_params.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/test_serializer.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/test_session.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/test_utils.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.6.6 → datachain-0.6.8}/tests/utils.py +0 -0
|
@@ -32,7 +32,7 @@ jobs:
|
|
|
32
32
|
POSTGRES_DB: database
|
|
33
33
|
POSTGRES_HOST_AUTH_METHOD: trust
|
|
34
34
|
clickhouse:
|
|
35
|
-
image: clickhouse/clickhouse-server:24
|
|
35
|
+
image: clickhouse/clickhouse-server:24.6
|
|
36
36
|
ports:
|
|
37
37
|
- 8123:8123
|
|
38
38
|
- 9010:9000
|
|
@@ -52,14 +52,11 @@ jobs:
|
|
|
52
52
|
STUDIO_READ_ACCESS_TOKEN: ${{ secrets.ITERATIVE_STUDIO_READ_ACCESS_TOKEN }}
|
|
53
53
|
run: |
|
|
54
54
|
echo "DataChain branch: $BRANCH"
|
|
55
|
-
if
|
|
56
|
-
then
|
|
57
|
-
STUDIO_BRANCH=develop
|
|
58
|
-
elif git ls-remote --heads https://"$STUDIO_READ_ACCESS_TOKEN"@github.com/iterative/studio.git "$BRANCH" | grep -F "$BRANCH" 2>&1>/dev/null
|
|
55
|
+
if git ls-remote --heads https://"$STUDIO_READ_ACCESS_TOKEN"@github.com/iterative/studio.git "$BRANCH" | grep -F "$BRANCH" 2>&1>/dev/null
|
|
59
56
|
then
|
|
60
57
|
STUDIO_BRANCH="$BRANCH"
|
|
61
58
|
else
|
|
62
|
-
STUDIO_BRANCH=
|
|
59
|
+
STUDIO_BRANCH=main
|
|
63
60
|
fi
|
|
64
61
|
echo "STUDIO_BRANCH=$STUDIO_BRANCH" >> $GITHUB_ENV
|
|
65
62
|
echo "Studio branch: $STUDIO_BRANCH"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.8
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -45,6 +45,7 @@ Requires-Dist: huggingface_hub
|
|
|
45
45
|
Requires-Dist: iterative-telemetry>=0.0.9
|
|
46
46
|
Requires-Dist: platformdirs
|
|
47
47
|
Requires-Dist: dvc-studio-client<1,>=0.21
|
|
48
|
+
Requires-Dist: tabulate
|
|
48
49
|
Provides-Extra: docs
|
|
49
50
|
Requires-Dist: mkdocs>=1.5.2; extra == "docs"
|
|
50
51
|
Requires-Dist: mkdocs-gen-files>=0.5.0; extra == "docs"
|
|
@@ -87,6 +88,7 @@ Requires-Dist: types-python-dateutil; extra == "dev"
|
|
|
87
88
|
Requires-Dist: types-pytz; extra == "dev"
|
|
88
89
|
Requires-Dist: types-PyYAML; extra == "dev"
|
|
89
90
|
Requires-Dist: types-requests; extra == "dev"
|
|
91
|
+
Requires-Dist: types-tabulate; extra == "dev"
|
|
90
92
|
Provides-Extra: examples
|
|
91
93
|
Requires-Dist: datachain[tests]; extra == "examples"
|
|
92
94
|
Requires-Dist: numpy<2,>=1; extra == "examples"
|
|
@@ -47,7 +47,8 @@ dependencies = [
|
|
|
47
47
|
"huggingface_hub",
|
|
48
48
|
"iterative-telemetry>=0.0.9",
|
|
49
49
|
"platformdirs",
|
|
50
|
-
"dvc-studio-client>=0.21,<1"
|
|
50
|
+
"dvc-studio-client>=0.21,<1",
|
|
51
|
+
"tabulate"
|
|
51
52
|
]
|
|
52
53
|
|
|
53
54
|
[project.optional-dependencies]
|
|
@@ -98,7 +99,8 @@ dev = [
|
|
|
98
99
|
"types-python-dateutil",
|
|
99
100
|
"types-pytz",
|
|
100
101
|
"types-PyYAML",
|
|
101
|
-
"types-requests"
|
|
102
|
+
"types-requests",
|
|
103
|
+
"types-tabulate"
|
|
102
104
|
]
|
|
103
105
|
examples = [
|
|
104
106
|
"datachain[tests]",
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from datachain.lib import func
|
|
1
|
+
from datachain.lib import func, models
|
|
2
2
|
from datachain.lib.data_model import DataModel, DataType, is_chain_type
|
|
3
3
|
from datachain.lib.dc import C, Column, DataChain, Sys
|
|
4
4
|
from datachain.lib.file import (
|
|
@@ -38,5 +38,6 @@ __all__ = [
|
|
|
38
38
|
"func",
|
|
39
39
|
"is_chain_type",
|
|
40
40
|
"metrics",
|
|
41
|
+
"models",
|
|
41
42
|
"param",
|
|
42
43
|
]
|
|
@@ -58,7 +58,7 @@ from datachain.listing import Listing
|
|
|
58
58
|
from datachain.node import DirType, Node, NodeWithPath
|
|
59
59
|
from datachain.nodes_thread_pool import NodesThreadPool
|
|
60
60
|
from datachain.remote.studio import StudioClient
|
|
61
|
-
from datachain.sql.types import DateTime, SQLType
|
|
61
|
+
from datachain.sql.types import DateTime, SQLType
|
|
62
62
|
from datachain.utils import (
|
|
63
63
|
DataChainDir,
|
|
64
64
|
batched,
|
|
@@ -196,11 +196,6 @@ class DatasetRowsFetcher(NodesThreadPool):
|
|
|
196
196
|
for c in [c for c, t in self.schema.items() if t == DateTime]:
|
|
197
197
|
df[c] = pd.to_datetime(df[c], unit="s")
|
|
198
198
|
|
|
199
|
-
# strings are represented as binaries in parquet export so need to
|
|
200
|
-
# decode it back to strings
|
|
201
|
-
for c in [c for c, t in self.schema.items() if t == String]:
|
|
202
|
-
df[c] = df[c].str.decode("utf-8")
|
|
203
|
-
|
|
204
199
|
def do_task(self, urls):
|
|
205
200
|
import lz4.frame
|
|
206
201
|
import pandas as pd
|
|
@@ -1403,6 +1398,7 @@ class Catalog:
|
|
|
1403
1398
|
query_script=remote_dataset_version.query_script,
|
|
1404
1399
|
create_rows=True,
|
|
1405
1400
|
columns=columns,
|
|
1401
|
+
feature_schema=remote_dataset_version.feature_schema,
|
|
1406
1402
|
validate_version=False,
|
|
1407
1403
|
)
|
|
1408
1404
|
|
|
@@ -4,18 +4,21 @@ import shlex
|
|
|
4
4
|
import sys
|
|
5
5
|
import traceback
|
|
6
6
|
from argparse import Action, ArgumentParser, ArgumentTypeError, Namespace
|
|
7
|
-
from collections.abc import Iterable, Iterator,
|
|
7
|
+
from collections.abc import Iterable, Iterator, Sequence
|
|
8
8
|
from importlib.metadata import PackageNotFoundError, version
|
|
9
9
|
from itertools import chain
|
|
10
10
|
from multiprocessing import freeze_support
|
|
11
11
|
from typing import TYPE_CHECKING, Optional, Union
|
|
12
12
|
|
|
13
13
|
import shtab
|
|
14
|
+
from tabulate import tabulate
|
|
14
15
|
|
|
15
16
|
from datachain import Session, utils
|
|
16
17
|
from datachain.cli_utils import BooleanOptionalAction, CommaSeparatedArgs, KeyValueArgs
|
|
18
|
+
from datachain.config import Config
|
|
19
|
+
from datachain.error import DataChainError
|
|
17
20
|
from datachain.lib.dc import DataChain
|
|
18
|
-
from datachain.studio import process_studio_cli_args
|
|
21
|
+
from datachain.studio import list_datasets, process_studio_cli_args
|
|
19
22
|
from datachain.telemetry import telemetry
|
|
20
23
|
|
|
21
24
|
if TYPE_CHECKING:
|
|
@@ -416,7 +419,36 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
|
|
|
416
419
|
help="Dataset labels",
|
|
417
420
|
)
|
|
418
421
|
|
|
419
|
-
subp.add_parser(
|
|
422
|
+
datasets_parser = subp.add_parser(
|
|
423
|
+
"datasets", parents=[parent_parser], description="List datasets"
|
|
424
|
+
)
|
|
425
|
+
datasets_parser.add_argument(
|
|
426
|
+
"--studio",
|
|
427
|
+
action="store_true",
|
|
428
|
+
default=False,
|
|
429
|
+
help="List the files in the Studio",
|
|
430
|
+
)
|
|
431
|
+
datasets_parser.add_argument(
|
|
432
|
+
"-L",
|
|
433
|
+
"--local",
|
|
434
|
+
action="store_true",
|
|
435
|
+
default=False,
|
|
436
|
+
help="List local files only",
|
|
437
|
+
)
|
|
438
|
+
datasets_parser.add_argument(
|
|
439
|
+
"-a",
|
|
440
|
+
"--all",
|
|
441
|
+
action="store_true",
|
|
442
|
+
default=True,
|
|
443
|
+
help="List all files including hidden files",
|
|
444
|
+
)
|
|
445
|
+
datasets_parser.add_argument(
|
|
446
|
+
"--team",
|
|
447
|
+
action="store",
|
|
448
|
+
default=None,
|
|
449
|
+
help="The team to list datasets for. By default, it will use team from config.",
|
|
450
|
+
)
|
|
451
|
+
|
|
420
452
|
rm_dataset_parser = subp.add_parser(
|
|
421
453
|
"rm-dataset", parents=[parent_parser], description="Removes dataset"
|
|
422
454
|
)
|
|
@@ -474,10 +506,30 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
|
|
|
474
506
|
help="List files in the long format",
|
|
475
507
|
)
|
|
476
508
|
parse_ls.add_argument(
|
|
477
|
-
"--
|
|
509
|
+
"--studio",
|
|
510
|
+
action="store_true",
|
|
511
|
+
default=False,
|
|
512
|
+
help="List the files in the Studio",
|
|
513
|
+
)
|
|
514
|
+
parse_ls.add_argument(
|
|
515
|
+
"-L",
|
|
516
|
+
"--local",
|
|
517
|
+
action="store_true",
|
|
518
|
+
default=False,
|
|
519
|
+
help="List local files only",
|
|
520
|
+
)
|
|
521
|
+
parse_ls.add_argument(
|
|
522
|
+
"-a",
|
|
523
|
+
"--all",
|
|
524
|
+
action="store_true",
|
|
525
|
+
default=True,
|
|
526
|
+
help="List all files including hidden files",
|
|
527
|
+
)
|
|
528
|
+
parse_ls.add_argument(
|
|
529
|
+
"--team",
|
|
478
530
|
action="store",
|
|
479
|
-
default=
|
|
480
|
-
help="
|
|
531
|
+
default=None,
|
|
532
|
+
help="The team to list datasets for. By default, it will use team from config.",
|
|
481
533
|
)
|
|
482
534
|
|
|
483
535
|
parse_du = subp.add_parser(
|
|
@@ -758,11 +810,12 @@ def format_ls_entry(entry: str) -> str:
|
|
|
758
810
|
def ls_remote(
|
|
759
811
|
paths: Iterable[str],
|
|
760
812
|
long: bool = False,
|
|
813
|
+
team: Optional[str] = None,
|
|
761
814
|
):
|
|
762
815
|
from datachain.node import long_line_str
|
|
763
816
|
from datachain.remote.studio import StudioClient
|
|
764
817
|
|
|
765
|
-
client = StudioClient()
|
|
818
|
+
client = StudioClient(team=team)
|
|
766
819
|
first = True
|
|
767
820
|
for path, response in client.ls(paths):
|
|
768
821
|
if not first:
|
|
@@ -789,28 +842,66 @@ def ls_remote(
|
|
|
789
842
|
def ls(
|
|
790
843
|
sources,
|
|
791
844
|
long: bool = False,
|
|
792
|
-
|
|
793
|
-
|
|
845
|
+
studio: bool = False,
|
|
846
|
+
local: bool = False,
|
|
847
|
+
all: bool = True,
|
|
848
|
+
team: Optional[str] = None,
|
|
794
849
|
**kwargs,
|
|
795
850
|
):
|
|
796
|
-
|
|
797
|
-
|
|
851
|
+
token = Config().read().get("studio", {}).get("token")
|
|
852
|
+
all, local, studio = _determine_flavors(studio, local, all, token)
|
|
798
853
|
|
|
799
|
-
|
|
800
|
-
remote_type = config["type"]
|
|
801
|
-
if remote_type == "local":
|
|
854
|
+
if all or local:
|
|
802
855
|
ls_local(sources, long=long, **kwargs)
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
856
|
+
|
|
857
|
+
if (all or studio) and token:
|
|
858
|
+
ls_remote(sources, long=long, team=team)
|
|
859
|
+
|
|
860
|
+
|
|
861
|
+
def datasets(
|
|
862
|
+
catalog: "Catalog",
|
|
863
|
+
studio: bool = False,
|
|
864
|
+
local: bool = False,
|
|
865
|
+
all: bool = True,
|
|
866
|
+
team: Optional[str] = None,
|
|
867
|
+
):
|
|
868
|
+
token = Config().read().get("studio", {}).get("token")
|
|
869
|
+
all, local, studio = _determine_flavors(studio, local, all, token)
|
|
870
|
+
|
|
871
|
+
local_datasets = set(list_datasets_local(catalog)) if all or local else set()
|
|
872
|
+
studio_datasets = (
|
|
873
|
+
set(list_datasets(team=team)) if (all or studio) and token else set()
|
|
874
|
+
)
|
|
875
|
+
|
|
876
|
+
rows = [
|
|
877
|
+
_datasets_tabulate_row(
|
|
878
|
+
name=name,
|
|
879
|
+
version=version,
|
|
880
|
+
both=(all or (local and studio)) and token,
|
|
881
|
+
local=(name, version) in local_datasets,
|
|
882
|
+
studio=(name, version) in studio_datasets,
|
|
807
883
|
)
|
|
884
|
+
for name, version in local_datasets.union(studio_datasets)
|
|
885
|
+
]
|
|
886
|
+
|
|
887
|
+
print(tabulate(rows, headers="keys"))
|
|
808
888
|
|
|
809
889
|
|
|
810
|
-
def
|
|
890
|
+
def list_datasets_local(catalog: "Catalog"):
|
|
811
891
|
for d in catalog.ls_datasets():
|
|
812
892
|
for v in d.versions:
|
|
813
|
-
|
|
893
|
+
yield (d.name, v.version)
|
|
894
|
+
|
|
895
|
+
|
|
896
|
+
def _datasets_tabulate_row(name, version, both, local, studio):
|
|
897
|
+
row = {
|
|
898
|
+
"Name": name,
|
|
899
|
+
"Version": version,
|
|
900
|
+
}
|
|
901
|
+
if both:
|
|
902
|
+
row["Studio"] = "\u2714" if studio else "\u2716"
|
|
903
|
+
row["Local"] = "\u2714" if local else "\u2716"
|
|
904
|
+
return row
|
|
814
905
|
|
|
815
906
|
|
|
816
907
|
def rm_dataset(
|
|
@@ -953,6 +1044,20 @@ def completion(shell: str) -> str:
|
|
|
953
1044
|
)
|
|
954
1045
|
|
|
955
1046
|
|
|
1047
|
+
def _determine_flavors(studio: bool, local: bool, all: bool, token: Optional[str]):
|
|
1048
|
+
if studio and not token:
|
|
1049
|
+
raise DataChainError(
|
|
1050
|
+
"Not logged in to Studio. Log in with 'datachain studio login'."
|
|
1051
|
+
)
|
|
1052
|
+
|
|
1053
|
+
if local or studio:
|
|
1054
|
+
all = False
|
|
1055
|
+
|
|
1056
|
+
all = all and not (local or studio)
|
|
1057
|
+
|
|
1058
|
+
return all, local, studio
|
|
1059
|
+
|
|
1060
|
+
|
|
956
1061
|
def main(argv: Optional[list[str]] = None) -> int: # noqa: C901, PLR0912, PLR0915
|
|
957
1062
|
# Required for Windows multiprocessing support
|
|
958
1063
|
freeze_support()
|
|
@@ -1032,12 +1137,21 @@ def main(argv: Optional[list[str]] = None) -> int: # noqa: C901, PLR0912, PLR09
|
|
|
1032
1137
|
ls(
|
|
1033
1138
|
args.sources,
|
|
1034
1139
|
long=bool(args.long),
|
|
1035
|
-
|
|
1140
|
+
studio=args.studio,
|
|
1141
|
+
local=args.local,
|
|
1142
|
+
all=args.all,
|
|
1143
|
+
team=args.team,
|
|
1036
1144
|
update=bool(args.update),
|
|
1037
1145
|
client_config=client_config,
|
|
1038
1146
|
)
|
|
1039
|
-
elif args.command == "
|
|
1040
|
-
|
|
1147
|
+
elif args.command == "datasets":
|
|
1148
|
+
datasets(
|
|
1149
|
+
catalog=catalog,
|
|
1150
|
+
studio=args.studio,
|
|
1151
|
+
local=args.local,
|
|
1152
|
+
all=args.all,
|
|
1153
|
+
team=args.team,
|
|
1154
|
+
)
|
|
1041
1155
|
elif args.command == "show":
|
|
1042
1156
|
show(
|
|
1043
1157
|
catalog,
|
|
@@ -145,6 +145,8 @@ class DirExpansion:
|
|
|
145
145
|
|
|
146
146
|
|
|
147
147
|
class DataTable:
|
|
148
|
+
MAX_RANDOM = 2**63 - 1
|
|
149
|
+
|
|
148
150
|
def __init__(
|
|
149
151
|
self,
|
|
150
152
|
name: str,
|
|
@@ -269,8 +271,8 @@ class DataTable:
|
|
|
269
271
|
def delete(self):
|
|
270
272
|
return self.apply_conditions(self.table.delete())
|
|
271
273
|
|
|
272
|
-
@
|
|
273
|
-
def sys_columns():
|
|
274
|
+
@classmethod
|
|
275
|
+
def sys_columns(cls):
|
|
274
276
|
return [
|
|
275
277
|
sa.Column("sys__id", Int, primary_key=True),
|
|
276
278
|
sa.Column(
|
|
@@ -20,9 +20,6 @@ from PIL import Image
|
|
|
20
20
|
from pyarrow.dataset import dataset
|
|
21
21
|
from pydantic import Field, field_validator
|
|
22
22
|
|
|
23
|
-
if TYPE_CHECKING:
|
|
24
|
-
from typing_extensions import Self
|
|
25
|
-
|
|
26
23
|
from datachain.client.fileslice import FileSlice
|
|
27
24
|
from datachain.lib.data_model import DataModel
|
|
28
25
|
from datachain.lib.utils import DataChainError
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from pydantic import Field
|
|
4
|
+
|
|
5
|
+
from datachain.lib.data_model import DataModel
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class BBox(DataModel):
|
|
9
|
+
"""
|
|
10
|
+
A data model for representing bounding boxes.
|
|
11
|
+
|
|
12
|
+
Attributes:
|
|
13
|
+
title (str): The title of the bounding box.
|
|
14
|
+
x1 (float): The x-coordinate of the top-left corner of the bounding box.
|
|
15
|
+
y1 (float): The y-coordinate of the top-left corner of the bounding box.
|
|
16
|
+
x2 (float): The x-coordinate of the bottom-right corner of the bounding box.
|
|
17
|
+
y2 (float): The y-coordinate of the bottom-right corner of the bounding box.
|
|
18
|
+
|
|
19
|
+
The bounding box is defined by two points:
|
|
20
|
+
- (x1, y1): The top-left corner of the box.
|
|
21
|
+
- (x2, y2): The bottom-right corner of the box.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
title: str = Field(default="")
|
|
25
|
+
x1: float = Field(default=0)
|
|
26
|
+
y1: float = Field(default=0)
|
|
27
|
+
x2: float = Field(default=0)
|
|
28
|
+
y2: float = Field(default=0)
|
|
29
|
+
|
|
30
|
+
@staticmethod
|
|
31
|
+
def from_xywh(bbox: list[float], title: Optional[str] = None) -> "BBox":
|
|
32
|
+
"""
|
|
33
|
+
Converts a bounding box in (x, y, width, height) format
|
|
34
|
+
to a BBox data model instance.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
bbox (list[float]): A bounding box, represented as a list
|
|
38
|
+
of four floats [x, y, width, height].
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
BBox2D: An instance of the BBox data model.
|
|
42
|
+
"""
|
|
43
|
+
assert len(bbox) == 4, f"Bounding box must have 4 elements, got f{len(bbox)}"
|
|
44
|
+
x, y, w, h = bbox
|
|
45
|
+
return BBox(title=title or "", x1=x, y1=y, x2=x + w, y2=y + h)
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from pydantic import Field
|
|
2
|
+
|
|
3
|
+
from datachain.lib.data_model import DataModel
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Pose(DataModel):
|
|
7
|
+
"""
|
|
8
|
+
A data model for representing pose keypoints.
|
|
9
|
+
|
|
10
|
+
Attributes:
|
|
11
|
+
x (list[float]): The x-coordinates of the keypoints.
|
|
12
|
+
y (list[float]): The y-coordinates of the keypoints.
|
|
13
|
+
|
|
14
|
+
The keypoints are represented as lists of x and y coordinates, where each index
|
|
15
|
+
corresponds to a specific body part.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
x: list[float] = Field(default=None)
|
|
19
|
+
y: list[float] = Field(default=None)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class Pose3D(DataModel):
|
|
23
|
+
"""
|
|
24
|
+
A data model for representing 3D pose keypoints.
|
|
25
|
+
|
|
26
|
+
Attributes:
|
|
27
|
+
x (list[float]): The x-coordinates of the keypoints.
|
|
28
|
+
y (list[float]): The y-coordinates of the keypoints.
|
|
29
|
+
visible (list[float]): The visibility of the keypoints.
|
|
30
|
+
|
|
31
|
+
The keypoints are represented as lists of x, y, and visibility values,
|
|
32
|
+
where each index corresponds to a specific body part.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
x: list[float] = Field(default=None)
|
|
36
|
+
y: list[float] = Field(default=None)
|
|
37
|
+
visible: list[float] = Field(default=None)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module contains the YOLO models.
|
|
3
|
+
|
|
4
|
+
YOLO stands for "You Only Look Once", a family of object detection models that
|
|
5
|
+
are designed to be fast and accurate. The models are trained to detect objects
|
|
6
|
+
in images by dividing the image into a grid and predicting the bounding boxes
|
|
7
|
+
and class probabilities for each grid cell.
|
|
8
|
+
|
|
9
|
+
More information about YOLO can be found here:
|
|
10
|
+
- https://pjreddie.com/darknet/yolo/
|
|
11
|
+
- https://docs.ultralytics.com/
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class PoseBodyPart:
|
|
16
|
+
"""
|
|
17
|
+
An enumeration of body parts for YOLO pose keypoints.
|
|
18
|
+
|
|
19
|
+
More information about the body parts can be found here:
|
|
20
|
+
https://docs.ultralytics.com/tasks/pose/
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
nose = 0
|
|
24
|
+
left_eye = 1
|
|
25
|
+
right_eye = 2
|
|
26
|
+
left_ear = 3
|
|
27
|
+
right_ear = 4
|
|
28
|
+
left_shoulder = 5
|
|
29
|
+
right_shoulder = 6
|
|
30
|
+
left_elbow = 7
|
|
31
|
+
right_elbow = 8
|
|
32
|
+
left_wrist = 9
|
|
33
|
+
right_wrist = 10
|
|
34
|
+
left_hip = 11
|
|
35
|
+
right_hip = 12
|
|
36
|
+
left_knee = 13
|
|
37
|
+
right_knee = 14
|
|
38
|
+
left_ankle = 15
|
|
39
|
+
right_ankle = 16
|
|
@@ -131,6 +131,12 @@ class StudioClient:
|
|
|
131
131
|
timeout=self.timeout,
|
|
132
132
|
)
|
|
133
133
|
ok = response.ok
|
|
134
|
+
if not ok:
|
|
135
|
+
if response.status_code == 403:
|
|
136
|
+
message = f"Not authorized for the team {self.team}"
|
|
137
|
+
raise DataChainError(message)
|
|
138
|
+
logger.error("Got bad response from Studio")
|
|
139
|
+
|
|
134
140
|
content = msgpack.unpackb(response.content, ext_hook=self._unpacker_hook)
|
|
135
141
|
response_data = content.get("data")
|
|
136
142
|
if ok and response_data is None:
|
|
@@ -177,8 +183,12 @@ class StudioClient:
|
|
|
177
183
|
response.content.decode("utf-8"),
|
|
178
184
|
)
|
|
179
185
|
if response.status_code == 403:
|
|
180
|
-
message = "Not authorized"
|
|
186
|
+
message = f"Not authorized for the team {self.team}"
|
|
181
187
|
else:
|
|
188
|
+
logger.error(
|
|
189
|
+
"Got bad response from Studio, content is %s",
|
|
190
|
+
response.content.decode("utf-8"),
|
|
191
|
+
)
|
|
182
192
|
message = data.get("message", "")
|
|
183
193
|
else:
|
|
184
194
|
message = ""
|
|
@@ -214,7 +224,7 @@ class StudioClient:
|
|
|
214
224
|
# to handle cases where a path will be expanded (i.e. globs)
|
|
215
225
|
response: Response[LsData]
|
|
216
226
|
for path in paths:
|
|
217
|
-
response = self._send_request_msgpack("ls", {"source": path})
|
|
227
|
+
response = self._send_request_msgpack("datachain/ls", {"source": path})
|
|
218
228
|
yield path, response
|
|
219
229
|
|
|
220
230
|
def ls_datasets(self) -> Response[LsData]:
|
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
import os
|
|
2
|
-
from typing import TYPE_CHECKING
|
|
2
|
+
from typing import TYPE_CHECKING, Optional
|
|
3
|
+
|
|
4
|
+
from tabulate import tabulate
|
|
3
5
|
|
|
4
6
|
from datachain.catalog.catalog import raise_remote_error
|
|
5
7
|
from datachain.config import Config, ConfigLevel
|
|
8
|
+
from datachain.dataset import QUERY_DATASET_PREFIX
|
|
6
9
|
from datachain.error import DataChainError
|
|
7
10
|
from datachain.remote.studio import StudioClient
|
|
8
11
|
from datachain.utils import STUDIO_URL
|
|
@@ -24,7 +27,13 @@ def process_studio_cli_args(args: "Namespace"):
|
|
|
24
27
|
if args.cmd == "token":
|
|
25
28
|
return token()
|
|
26
29
|
if args.cmd == "datasets":
|
|
27
|
-
|
|
30
|
+
rows = [
|
|
31
|
+
{"Name": name, "Version": version}
|
|
32
|
+
for name, version in list_datasets(args.team)
|
|
33
|
+
]
|
|
34
|
+
print(tabulate(rows, headers="keys"))
|
|
35
|
+
return 0
|
|
36
|
+
|
|
28
37
|
if args.cmd == "team":
|
|
29
38
|
return set_team(args)
|
|
30
39
|
raise DataChainError(f"Unknown command '{args.cmd}'.")
|
|
@@ -103,19 +112,22 @@ def token():
|
|
|
103
112
|
print(token)
|
|
104
113
|
|
|
105
114
|
|
|
106
|
-
def list_datasets(
|
|
107
|
-
client = StudioClient(team=
|
|
115
|
+
def list_datasets(team: Optional[str] = None):
|
|
116
|
+
client = StudioClient(team=team)
|
|
108
117
|
response = client.ls_datasets()
|
|
109
118
|
if not response.ok:
|
|
110
119
|
raise_remote_error(response.message)
|
|
111
120
|
if not response.data:
|
|
112
|
-
print("No datasets found.")
|
|
113
121
|
return
|
|
122
|
+
|
|
114
123
|
for d in response.data:
|
|
115
124
|
name = d.get("name")
|
|
125
|
+
if name and name.startswith(QUERY_DATASET_PREFIX):
|
|
126
|
+
continue
|
|
127
|
+
|
|
116
128
|
for v in d.get("versions", []):
|
|
117
129
|
version = v.get("version")
|
|
118
|
-
|
|
130
|
+
yield (name, version)
|
|
119
131
|
|
|
120
132
|
|
|
121
133
|
def save_config(hostname, token):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.8
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -45,6 +45,7 @@ Requires-Dist: huggingface_hub
|
|
|
45
45
|
Requires-Dist: iterative-telemetry>=0.0.9
|
|
46
46
|
Requires-Dist: platformdirs
|
|
47
47
|
Requires-Dist: dvc-studio-client<1,>=0.21
|
|
48
|
+
Requires-Dist: tabulate
|
|
48
49
|
Provides-Extra: docs
|
|
49
50
|
Requires-Dist: mkdocs>=1.5.2; extra == "docs"
|
|
50
51
|
Requires-Dist: mkdocs-gen-files>=0.5.0; extra == "docs"
|
|
@@ -87,6 +88,7 @@ Requires-Dist: types-python-dateutil; extra == "dev"
|
|
|
87
88
|
Requires-Dist: types-pytz; extra == "dev"
|
|
88
89
|
Requires-Dist: types-PyYAML; extra == "dev"
|
|
89
90
|
Requires-Dist: types-requests; extra == "dev"
|
|
91
|
+
Requires-Dist: types-tabulate; extra == "dev"
|
|
90
92
|
Provides-Extra: examples
|
|
91
93
|
Requires-Dist: datachain[tests]; extra == "examples"
|
|
92
94
|
Requires-Dist: numpy<2,>=1; extra == "examples"
|
|
@@ -129,6 +129,10 @@ src/datachain/lib/convert/values_to_tuples.py
|
|
|
129
129
|
src/datachain/lib/func/__init__.py
|
|
130
130
|
src/datachain/lib/func/aggregate.py
|
|
131
131
|
src/datachain/lib/func/func.py
|
|
132
|
+
src/datachain/lib/models/__init__.py
|
|
133
|
+
src/datachain/lib/models/bbox.py
|
|
134
|
+
src/datachain/lib/models/pose.py
|
|
135
|
+
src/datachain/lib/models/yolo.py
|
|
132
136
|
src/datachain/query/__init__.py
|
|
133
137
|
src/datachain/query/batch.py
|
|
134
138
|
src/datachain/query/dataset.py
|
|
@@ -237,6 +241,7 @@ tests/unit/lib/test_file.py
|
|
|
237
241
|
tests/unit/lib/test_hf.py
|
|
238
242
|
tests/unit/lib/test_image.py
|
|
239
243
|
tests/unit/lib/test_listing_info.py
|
|
244
|
+
tests/unit/lib/test_models.py
|
|
240
245
|
tests/unit/lib/test_schema.py
|
|
241
246
|
tests/unit/lib/test_signal_schema.py
|
|
242
247
|
tests/unit/lib/test_sql_to_python.py
|
|
@@ -27,6 +27,7 @@ huggingface_hub
|
|
|
27
27
|
iterative-telemetry>=0.0.9
|
|
28
28
|
platformdirs
|
|
29
29
|
dvc-studio-client<1,>=0.21
|
|
30
|
+
tabulate
|
|
30
31
|
|
|
31
32
|
[dev]
|
|
32
33
|
datachain[docs,tests]
|
|
@@ -35,6 +36,7 @@ types-python-dateutil
|
|
|
35
36
|
types-pytz
|
|
36
37
|
types-PyYAML
|
|
37
38
|
types-requests
|
|
39
|
+
types-tabulate
|
|
38
40
|
|
|
39
41
|
[docs]
|
|
40
42
|
mkdocs>=1.5.2
|