datachain 0.6.9__tar.gz → 0.6.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.6.9/src/datachain.egg-info → datachain-0.6.10}/PKG-INFO +2 -2
- {datachain-0.6.9 → datachain-0.6.10}/mkdocs.yml +1 -1
- {datachain-0.6.9 → datachain-0.6.10}/pyproject.toml +1 -1
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/catalog/catalog.py +15 -3
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/data_storage/sqlite.py +6 -2
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/lib/dc.py +53 -0
- datachain-0.6.10/src/datachain/lib/models/__init__.py +6 -0
- datachain-0.6.10/src/datachain/lib/models/bbox.py +116 -0
- datachain-0.6.10/src/datachain/lib/models/pose.py +108 -0
- datachain-0.6.10/src/datachain/lib/models/segment.py +53 -0
- datachain-0.6.10/src/datachain/lib/models/ultralytics/__init__.py +14 -0
- datachain-0.6.10/src/datachain/lib/models/ultralytics/bbox.py +189 -0
- datachain-0.6.10/src/datachain/lib/models/ultralytics/pose.py +126 -0
- datachain-0.6.10/src/datachain/lib/models/ultralytics/segment.py +121 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/listing.py +24 -7
- datachain-0.6.10/src/datachain/toolkit/__init__.py +3 -0
- datachain-0.6.10/src/datachain/toolkit/split.py +67 -0
- {datachain-0.6.9 → datachain-0.6.10/src/datachain.egg-info}/PKG-INFO +2 -2
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain.egg-info/SOURCES.txt +9 -2
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain.egg-info/requires.txt +1 -1
- {datachain-0.6.9 → datachain-0.6.10}/tests/conftest.py +41 -1
- {datachain-0.6.9 → datachain-0.6.10}/tests/func/test_dataset_query.py +66 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/func/test_pull.py +33 -6
- datachain-0.6.10/tests/func/test_toolkit.py +42 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/lib/test_datachain.py +42 -0
- datachain-0.6.10/tests/unit/lib/test_models.py +142 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/test_listing.py +2 -1
- datachain-0.6.9/src/datachain/lib/models/__init__.py +0 -5
- datachain-0.6.9/src/datachain/lib/models/bbox.py +0 -45
- datachain-0.6.9/src/datachain/lib/models/pose.py +0 -37
- datachain-0.6.9/src/datachain/lib/models/yolo.py +0 -39
- datachain-0.6.9/tests/unit/lib/test_models.py +0 -50
- {datachain-0.6.9 → datachain-0.6.10}/.cruft.json +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/.gitattributes +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/.github/codecov.yaml +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/.github/dependabot.yml +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/.github/workflows/release.yml +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/.github/workflows/tests.yml +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/.gitignore +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/.pre-commit-config.yaml +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/CONTRIBUTING.rst +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/LICENSE +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/README.rst +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/docs/assets/datachain.svg +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/docs/index.md +0 -0
- {datachain-0.6.9 → datachain-0.6.10/docs}/overrides/main.html +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/docs/references/datachain.md +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/docs/references/datatype.md +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/docs/references/file.md +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/docs/references/index.md +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/docs/references/sql.md +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/docs/references/torch.md +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/docs/references/udf.md +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/examples/llm_and_nlp/unstructured-embeddings-gen.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/examples/llm_and_nlp/unstructured-summary-map.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/examples/multimodal/wds.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/noxfile.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/setup.cfg +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/__init__.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/__main__.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/asyn.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/cache.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/cli.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/cli_utils.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/client/__init__.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/client/azure.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/client/gcs.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/client/hf.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/client/local.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/client/s3.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/config.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/data_storage/id_generator.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/dataset.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/error.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/job.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/lib/clip.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/lib/file.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/lib/func/__init__.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/lib/func/aggregate.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/lib/func/func.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/lib/hf.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/lib/image.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/lib/listing.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/lib/settings.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/lib/tar.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/lib/text.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/lib/udf.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/lib/utils.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/lib/vfile.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/node.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/progress.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/py.typed +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/query/__init__.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/query/batch.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/query/dataset.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/query/metrics.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/query/params.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/query/queue.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/query/schema.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/query/session.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/remote/studio.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/sql/types.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/sql/utils.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/studio.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/telemetry.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain/utils.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/__init__.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/data.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/examples/__init__.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/examples/test_examples.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/examples/wds_data.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/func/__init__.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/func/test_catalog.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/func/test_client.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/func/test_datachain.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/func/test_datasets.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/func/test_listing.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/func/test_ls.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/func/test_metrics.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/func/test_pytorch.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/func/test_query.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/scripts/feature_class.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/test_atomicity.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/test_cli_e2e.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/test_cli_studio.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/test_query_e2e.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/test_telemetry.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/__init__.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/test_asyn.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/test_cache.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/test_catalog.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/test_client.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/test_config.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/test_dataset.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/test_id_generator.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/test_metastore.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/test_query.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/test_query_params.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/test_serializer.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/test_session.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/test_utils.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.6.9 → datachain-0.6.10}/tests/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.10
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -71,7 +71,7 @@ Requires-Dist: pytest<9,>=8; extra == "tests"
|
|
|
71
71
|
Requires-Dist: pytest-sugar>=0.9.6; extra == "tests"
|
|
72
72
|
Requires-Dist: pytest-cov>=4.1.0; extra == "tests"
|
|
73
73
|
Requires-Dist: pytest-mock>=3.12.0; extra == "tests"
|
|
74
|
-
Requires-Dist: pytest-servers[all]>=0.5.
|
|
74
|
+
Requires-Dist: pytest-servers[all]>=0.5.8; extra == "tests"
|
|
75
75
|
Requires-Dist: pytest-benchmark[histogram]; extra == "tests"
|
|
76
76
|
Requires-Dist: pytest-xdist>=3.3.1; extra == "tests"
|
|
77
77
|
Requires-Dist: virtualenv; extra == "tests"
|
|
@@ -603,9 +603,10 @@ class Catalog:
|
|
|
603
603
|
)
|
|
604
604
|
|
|
605
605
|
lst = Listing(
|
|
606
|
+
self.metastore.clone(),
|
|
606
607
|
self.warehouse.clone(),
|
|
607
608
|
Client.get_client(list_uri, self.cache, **self.client_config),
|
|
608
|
-
|
|
609
|
+
dataset_name=list_ds_name,
|
|
609
610
|
object_name=object_name,
|
|
610
611
|
)
|
|
611
612
|
|
|
@@ -698,9 +699,13 @@ class Catalog:
|
|
|
698
699
|
|
|
699
700
|
client = self.get_client(source, **client_config)
|
|
700
701
|
uri = client.uri
|
|
701
|
-
st = self.warehouse.clone()
|
|
702
702
|
dataset_name, _, _, _ = DataChain.parse_uri(uri, self.session)
|
|
703
|
-
listing = Listing(
|
|
703
|
+
listing = Listing(
|
|
704
|
+
self.metastore.clone(),
|
|
705
|
+
self.warehouse.clone(),
|
|
706
|
+
client,
|
|
707
|
+
dataset_name=dataset_name,
|
|
708
|
+
)
|
|
704
709
|
rows = DatasetQuery(
|
|
705
710
|
name=dataset.name, version=ds_version, catalog=self
|
|
706
711
|
).to_db_records()
|
|
@@ -1354,6 +1359,13 @@ class Catalog:
|
|
|
1354
1359
|
# we will create new one if it doesn't exist
|
|
1355
1360
|
pass
|
|
1356
1361
|
|
|
1362
|
+
if dataset and version and dataset.has_version(version):
|
|
1363
|
+
"""No need to communicate with Studio at all"""
|
|
1364
|
+
dataset_uri = create_dataset_uri(remote_dataset_name, version)
|
|
1365
|
+
print(f"Local copy of dataset {dataset_uri} already present")
|
|
1366
|
+
_instantiate_dataset()
|
|
1367
|
+
return
|
|
1368
|
+
|
|
1357
1369
|
remote_dataset = self.get_remote_dataset(remote_dataset_name)
|
|
1358
1370
|
# if version is not specified in uri, take the latest one
|
|
1359
1371
|
if not version:
|
|
@@ -747,8 +747,12 @@ class SQLiteWarehouse(AbstractWarehouse):
|
|
|
747
747
|
|
|
748
748
|
ids = self.db.execute(select_ids).fetchall()
|
|
749
749
|
|
|
750
|
-
select_q =
|
|
751
|
-
|
|
750
|
+
select_q = (
|
|
751
|
+
query.with_only_columns(
|
|
752
|
+
*[c for c in query.selected_columns if c.name != "sys__id"]
|
|
753
|
+
)
|
|
754
|
+
.offset(None)
|
|
755
|
+
.limit(None)
|
|
752
756
|
)
|
|
753
757
|
|
|
754
758
|
for batch in batched_it(ids, 10_000):
|
|
@@ -642,6 +642,59 @@ class DataChain:
|
|
|
642
642
|
}
|
|
643
643
|
return chain.gen(**signal_dict) # type: ignore[misc, arg-type]
|
|
644
644
|
|
|
645
|
+
def explode(
|
|
646
|
+
self,
|
|
647
|
+
col: str,
|
|
648
|
+
model_name: Optional[str] = None,
|
|
649
|
+
object_name: Optional[str] = None,
|
|
650
|
+
) -> "DataChain":
|
|
651
|
+
"""Explodes a column containing JSON objects (dict or str DataChain type) into
|
|
652
|
+
individual columns based on the schema of the JSON. Schema is inferred from
|
|
653
|
+
the first row of the column.
|
|
654
|
+
|
|
655
|
+
Args:
|
|
656
|
+
col: the name of the column containing JSON to be exploded.
|
|
657
|
+
model_name: optional generated model name. By default generates the name
|
|
658
|
+
automatically.
|
|
659
|
+
object_name: optional generated object column name. By default generates the
|
|
660
|
+
name automatically.
|
|
661
|
+
|
|
662
|
+
Returns:
|
|
663
|
+
DataChain: A new DataChain instance with the new set of columns.
|
|
664
|
+
"""
|
|
665
|
+
import json
|
|
666
|
+
|
|
667
|
+
import pyarrow as pa
|
|
668
|
+
|
|
669
|
+
from datachain.lib.arrow import schema_to_output
|
|
670
|
+
|
|
671
|
+
json_value = next(self.limit(1).collect(col))
|
|
672
|
+
json_dict = (
|
|
673
|
+
json.loads(json_value) if isinstance(json_value, str) else json_value
|
|
674
|
+
)
|
|
675
|
+
|
|
676
|
+
if not isinstance(json_dict, dict):
|
|
677
|
+
raise TypeError(f"Column {col} should be a string or dict type with JSON")
|
|
678
|
+
|
|
679
|
+
schema = pa.Table.from_pylist([json_dict]).schema
|
|
680
|
+
output = schema_to_output(schema, None)
|
|
681
|
+
|
|
682
|
+
if not model_name:
|
|
683
|
+
model_name = f"{col.title()}ExplodedModel"
|
|
684
|
+
|
|
685
|
+
model = dict_to_data_model(model_name, output)
|
|
686
|
+
|
|
687
|
+
def json_to_model(json_value: Union[str, dict]):
|
|
688
|
+
json_dict = (
|
|
689
|
+
json.loads(json_value) if isinstance(json_value, str) else json_value
|
|
690
|
+
)
|
|
691
|
+
return model.model_validate(json_dict)
|
|
692
|
+
|
|
693
|
+
if not object_name:
|
|
694
|
+
object_name = f"{col}_expl"
|
|
695
|
+
|
|
696
|
+
return self.map(json_to_model, params=col, output={object_name: model})
|
|
697
|
+
|
|
645
698
|
@classmethod
|
|
646
699
|
def datasets(
|
|
647
700
|
cls,
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
from pydantic import Field
|
|
2
|
+
|
|
3
|
+
from datachain.lib.data_model import DataModel
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class BBox(DataModel):
|
|
7
|
+
"""
|
|
8
|
+
A data model for representing bounding boxes.
|
|
9
|
+
|
|
10
|
+
Attributes:
|
|
11
|
+
title (str): The title of the bounding box.
|
|
12
|
+
coords (list[int]): The coordinates of the bounding box.
|
|
13
|
+
|
|
14
|
+
The bounding box is defined by two points:
|
|
15
|
+
- (x1, y1): The top-left corner of the box.
|
|
16
|
+
- (x2, y2): The bottom-right corner of the box.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
title: str = Field(default="")
|
|
20
|
+
coords: list[int] = Field(default=None)
|
|
21
|
+
|
|
22
|
+
@staticmethod
|
|
23
|
+
def from_list(coords: list[float], title: str = "") -> "BBox":
|
|
24
|
+
assert len(coords) == 4, "Bounding box coordinates must be a list of 4 floats."
|
|
25
|
+
assert all(
|
|
26
|
+
isinstance(value, (int, float)) for value in coords
|
|
27
|
+
), "Bounding box coordinates must be integers or floats."
|
|
28
|
+
return BBox(
|
|
29
|
+
title=title,
|
|
30
|
+
coords=[round(c) for c in coords],
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
@staticmethod
|
|
34
|
+
def from_dict(coords: dict[str, float], title: str = "") -> "BBox":
|
|
35
|
+
assert (
|
|
36
|
+
len(coords) == 4
|
|
37
|
+
), "Bounding box coordinates must be a dictionary of 4 floats."
|
|
38
|
+
assert set(coords) == {
|
|
39
|
+
"x1",
|
|
40
|
+
"y1",
|
|
41
|
+
"x2",
|
|
42
|
+
"y2",
|
|
43
|
+
}, "Bounding box coordinates must contain keys with coordinates."
|
|
44
|
+
assert all(
|
|
45
|
+
isinstance(value, (int, float)) for value in coords.values()
|
|
46
|
+
), "Bounding box coordinates must be integers or floats."
|
|
47
|
+
return BBox(
|
|
48
|
+
title=title,
|
|
49
|
+
coords=[
|
|
50
|
+
round(coords["x1"]),
|
|
51
|
+
round(coords["y1"]),
|
|
52
|
+
round(coords["x2"]),
|
|
53
|
+
round(coords["y2"]),
|
|
54
|
+
],
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class OBBox(DataModel):
|
|
59
|
+
"""
|
|
60
|
+
A data model for representing oriented bounding boxes.
|
|
61
|
+
|
|
62
|
+
Attributes:
|
|
63
|
+
title (str): The title of the oriented bounding box.
|
|
64
|
+
coords (list[int]): The coordinates of the oriented bounding box.
|
|
65
|
+
|
|
66
|
+
The oriented bounding box is defined by four points:
|
|
67
|
+
- (x1, y1): The first corner of the box.
|
|
68
|
+
- (x2, y2): The second corner of the box.
|
|
69
|
+
- (x3, y3): The third corner of the box.
|
|
70
|
+
- (x4, y4): The fourth corner of the box.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
title: str = Field(default="")
|
|
74
|
+
coords: list[int] = Field(default=None)
|
|
75
|
+
|
|
76
|
+
@staticmethod
|
|
77
|
+
def from_list(coords: list[float], title: str = "") -> "OBBox":
|
|
78
|
+
assert (
|
|
79
|
+
len(coords) == 8
|
|
80
|
+
), "Oriented bounding box coordinates must be a list of 8 floats."
|
|
81
|
+
assert all(
|
|
82
|
+
isinstance(value, (int, float)) for value in coords
|
|
83
|
+
), "Oriented bounding box coordinates must be integers or floats."
|
|
84
|
+
return OBBox(
|
|
85
|
+
title=title,
|
|
86
|
+
coords=[round(c) for c in coords],
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
@staticmethod
|
|
90
|
+
def from_dict(coords: dict[str, float], title: str = "") -> "OBBox":
|
|
91
|
+
assert set(coords) == {
|
|
92
|
+
"x1",
|
|
93
|
+
"y1",
|
|
94
|
+
"x2",
|
|
95
|
+
"y2",
|
|
96
|
+
"x3",
|
|
97
|
+
"y3",
|
|
98
|
+
"x4",
|
|
99
|
+
"y4",
|
|
100
|
+
}, "Oriented bounding box coordinates must contain keys with coordinates."
|
|
101
|
+
assert all(
|
|
102
|
+
isinstance(value, (int, float)) for value in coords.values()
|
|
103
|
+
), "Oriented bounding box coordinates must be integers or floats."
|
|
104
|
+
return OBBox(
|
|
105
|
+
title=title,
|
|
106
|
+
coords=[
|
|
107
|
+
round(coords["x1"]),
|
|
108
|
+
round(coords["y1"]),
|
|
109
|
+
round(coords["x2"]),
|
|
110
|
+
round(coords["y2"]),
|
|
111
|
+
round(coords["x3"]),
|
|
112
|
+
round(coords["y3"]),
|
|
113
|
+
round(coords["x4"]),
|
|
114
|
+
round(coords["y4"]),
|
|
115
|
+
],
|
|
116
|
+
)
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
from pydantic import Field
|
|
2
|
+
|
|
3
|
+
from datachain.lib.data_model import DataModel
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Pose(DataModel):
|
|
7
|
+
"""
|
|
8
|
+
A data model for representing pose keypoints.
|
|
9
|
+
|
|
10
|
+
Attributes:
|
|
11
|
+
x (list[int]): The x-coordinates of the keypoints.
|
|
12
|
+
y (list[int]): The y-coordinates of the keypoints.
|
|
13
|
+
|
|
14
|
+
The keypoints are represented as lists of x and y coordinates, where each index
|
|
15
|
+
corresponds to a specific body part.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
x: list[int] = Field(default=None)
|
|
19
|
+
y: list[int] = Field(default=None)
|
|
20
|
+
|
|
21
|
+
@staticmethod
|
|
22
|
+
def from_list(points: list[list[float]]) -> "Pose":
|
|
23
|
+
assert len(points) == 2, "Pose coordinates must be a list of 2 lists."
|
|
24
|
+
points_x, points_y = points
|
|
25
|
+
assert (
|
|
26
|
+
len(points_x) == len(points_y) == 17
|
|
27
|
+
), "Pose x and y coordinates must have the same length of 17."
|
|
28
|
+
assert all(
|
|
29
|
+
isinstance(value, (int, float)) for value in [*points_x, *points_y]
|
|
30
|
+
), "Pose coordinates must be integers or floats."
|
|
31
|
+
return Pose(
|
|
32
|
+
x=[round(coord) for coord in points_x],
|
|
33
|
+
y=[round(coord) for coord in points_y],
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
@staticmethod
|
|
37
|
+
def from_dict(points: dict[str, list[float]]) -> "Pose":
|
|
38
|
+
assert set(points) == {
|
|
39
|
+
"x",
|
|
40
|
+
"y",
|
|
41
|
+
}, "Pose coordinates must contain keys 'x' and 'y'."
|
|
42
|
+
points_x, points_y = points["x"], points["y"]
|
|
43
|
+
assert (
|
|
44
|
+
len(points_x) == len(points_y) == 17
|
|
45
|
+
), "Pose x and y coordinates must have the same length of 17."
|
|
46
|
+
assert all(
|
|
47
|
+
isinstance(value, (int, float)) for value in [*points_x, *points_y]
|
|
48
|
+
), "Pose coordinates must be integers or floats."
|
|
49
|
+
return Pose(
|
|
50
|
+
x=[round(coord) for coord in points_x],
|
|
51
|
+
y=[round(coord) for coord in points_y],
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class Pose3D(DataModel):
|
|
56
|
+
"""
|
|
57
|
+
A data model for representing 3D pose keypoints.
|
|
58
|
+
|
|
59
|
+
Attributes:
|
|
60
|
+
x (list[int]): The x-coordinates of the keypoints.
|
|
61
|
+
y (list[int]): The y-coordinates of the keypoints.
|
|
62
|
+
visible (list[float]): The visibility of the keypoints.
|
|
63
|
+
|
|
64
|
+
The keypoints are represented as lists of x, y, and visibility values,
|
|
65
|
+
where each index corresponds to a specific body part.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
x: list[int] = Field(default=None)
|
|
69
|
+
y: list[int] = Field(default=None)
|
|
70
|
+
visible: list[float] = Field(default=None)
|
|
71
|
+
|
|
72
|
+
@staticmethod
|
|
73
|
+
def from_list(points: list[list[float]]) -> "Pose3D":
|
|
74
|
+
assert len(points) == 3, "Pose coordinates must be a list of 3 lists."
|
|
75
|
+
points_x, points_y, points_v = points
|
|
76
|
+
assert (
|
|
77
|
+
len(points_x) == len(points_y) == len(points_v) == 17
|
|
78
|
+
), "Pose x, y, and visibility coordinates must have the same length of 17."
|
|
79
|
+
assert all(
|
|
80
|
+
isinstance(value, (int, float))
|
|
81
|
+
for value in [*points_x, *points_y, *points_v]
|
|
82
|
+
), "Pose coordinates must be integers or floats."
|
|
83
|
+
return Pose3D(
|
|
84
|
+
x=[round(coord) for coord in points_x],
|
|
85
|
+
y=[round(coord) for coord in points_y],
|
|
86
|
+
visible=points_v,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
@staticmethod
|
|
90
|
+
def from_dict(points: dict[str, list[float]]) -> "Pose3D":
|
|
91
|
+
assert set(points) == {
|
|
92
|
+
"x",
|
|
93
|
+
"y",
|
|
94
|
+
"visible",
|
|
95
|
+
}, "Pose coordinates must contain keys 'x', 'y', and 'visible'."
|
|
96
|
+
points_x, points_y, points_v = points["x"], points["y"], points["visible"]
|
|
97
|
+
assert (
|
|
98
|
+
len(points_x) == len(points_y) == len(points_v) == 17
|
|
99
|
+
), "Pose x, y, and visibility coordinates must have the same length of 17."
|
|
100
|
+
assert all(
|
|
101
|
+
isinstance(value, (int, float))
|
|
102
|
+
for value in [*points_x, *points_y, *points_v]
|
|
103
|
+
), "Pose coordinates must be integers or floats."
|
|
104
|
+
return Pose3D(
|
|
105
|
+
x=[round(coord) for coord in points_x],
|
|
106
|
+
y=[round(coord) for coord in points_y],
|
|
107
|
+
visible=points_v,
|
|
108
|
+
)
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from pydantic import Field
|
|
2
|
+
|
|
3
|
+
from datachain.lib.data_model import DataModel
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Segments(DataModel):
|
|
7
|
+
"""
|
|
8
|
+
A data model for representing segments.
|
|
9
|
+
|
|
10
|
+
Attributes:
|
|
11
|
+
title (str): The title of the segments.
|
|
12
|
+
x (list[int]): The x-coordinates of the segments.
|
|
13
|
+
y (list[int]): The y-coordinates of the segments.
|
|
14
|
+
|
|
15
|
+
The segments are represented as lists of x and y coordinates, where each index
|
|
16
|
+
corresponds to a specific segment.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
title: str = Field(default="")
|
|
20
|
+
x: list[int] = Field(default=None)
|
|
21
|
+
y: list[int] = Field(default=None)
|
|
22
|
+
|
|
23
|
+
@staticmethod
|
|
24
|
+
def from_list(points: list[list[float]], title: str = "") -> "Segments":
|
|
25
|
+
assert len(points) == 2, "Segments coordinates must be a list of 2 lists."
|
|
26
|
+
points_x, points_y = points
|
|
27
|
+
assert len(points_x) == len(
|
|
28
|
+
points_y
|
|
29
|
+
), "Segments x and y coordinates must have the same length."
|
|
30
|
+
assert all(
|
|
31
|
+
isinstance(value, (int, float)) for value in [*points_x, *points_y]
|
|
32
|
+
), "Segments coordinates must be integers or floats."
|
|
33
|
+
return Segments(
|
|
34
|
+
title=title,
|
|
35
|
+
x=[round(coord) for coord in points_x],
|
|
36
|
+
y=[round(coord) for coord in points_y],
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
@staticmethod
|
|
40
|
+
def from_dict(points: dict[str, list[float]], title: str = "") -> "Segments":
|
|
41
|
+
assert set(points) == {
|
|
42
|
+
"x",
|
|
43
|
+
"y",
|
|
44
|
+
}, "Segments coordinates must contain keys 'x' and 'y'."
|
|
45
|
+
points_x, points_y = points["x"], points["y"]
|
|
46
|
+
assert all(
|
|
47
|
+
isinstance(value, (int, float)) for value in [*points_x, *points_y]
|
|
48
|
+
), "Segments coordinates must be integers or floats."
|
|
49
|
+
return Segments(
|
|
50
|
+
title=title,
|
|
51
|
+
x=[round(coord) for coord in points_x],
|
|
52
|
+
y=[round(coord) for coord in points_y],
|
|
53
|
+
)
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from .bbox import YoloBBox, YoloBBoxes, YoloOBBox, YoloOBBoxes
|
|
2
|
+
from .pose import YoloPose, YoloPoses
|
|
3
|
+
from .segment import YoloSegment, YoloSegments
|
|
4
|
+
|
|
5
|
+
__all__ = [
|
|
6
|
+
"YoloBBox",
|
|
7
|
+
"YoloBBoxes",
|
|
8
|
+
"YoloOBBox",
|
|
9
|
+
"YoloOBBoxes",
|
|
10
|
+
"YoloPose",
|
|
11
|
+
"YoloPoses",
|
|
12
|
+
"YoloSegment",
|
|
13
|
+
"YoloSegments",
|
|
14
|
+
]
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module contains the YOLO models.
|
|
3
|
+
|
|
4
|
+
YOLO stands for "You Only Look Once", a family of object detection models that
|
|
5
|
+
are designed to be fast and accurate. The models are trained to detect objects
|
|
6
|
+
in images by dividing the image into a grid and predicting the bounding boxes
|
|
7
|
+
and class probabilities for each grid cell.
|
|
8
|
+
|
|
9
|
+
More information about YOLO can be found here:
|
|
10
|
+
- https://pjreddie.com/darknet/yolo/
|
|
11
|
+
- https://docs.ultralytics.com/
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from io import BytesIO
|
|
15
|
+
from typing import TYPE_CHECKING
|
|
16
|
+
|
|
17
|
+
from PIL import Image
|
|
18
|
+
from pydantic import Field
|
|
19
|
+
|
|
20
|
+
from datachain.lib.data_model import DataModel
|
|
21
|
+
from datachain.lib.models.bbox import BBox, OBBox
|
|
22
|
+
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
from ultralytics.engine.results import Results
|
|
25
|
+
from ultralytics.models import YOLO
|
|
26
|
+
|
|
27
|
+
from datachain.lib.file import File
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class YoloBBox(DataModel):
|
|
31
|
+
"""
|
|
32
|
+
A class representing a bounding box detected by a YOLO model.
|
|
33
|
+
|
|
34
|
+
Attributes:
|
|
35
|
+
cls: The class of the detected object.
|
|
36
|
+
name: The name of the detected object.
|
|
37
|
+
confidence: The confidence score of the detection.
|
|
38
|
+
box: The bounding box of the detected object
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
cls: int = Field(default=-1)
|
|
42
|
+
name: str = Field(default="")
|
|
43
|
+
confidence: float = Field(default=0)
|
|
44
|
+
box: BBox = Field(default=None)
|
|
45
|
+
|
|
46
|
+
@staticmethod
|
|
47
|
+
def from_file(yolo: "YOLO", file: "File") -> "YoloBBox":
|
|
48
|
+
results = yolo(Image.open(BytesIO(file.read())))
|
|
49
|
+
if len(results) == 0:
|
|
50
|
+
return YoloBBox()
|
|
51
|
+
return YoloBBox.from_result(results[0])
|
|
52
|
+
|
|
53
|
+
@staticmethod
|
|
54
|
+
def from_result(result: "Results") -> "YoloBBox":
|
|
55
|
+
summary = result.summary()
|
|
56
|
+
if not summary:
|
|
57
|
+
return YoloBBox()
|
|
58
|
+
name = summary[0].get("name", "")
|
|
59
|
+
box = (
|
|
60
|
+
BBox.from_dict(summary[0]["box"], title=name)
|
|
61
|
+
if "box" in summary[0]
|
|
62
|
+
else BBox()
|
|
63
|
+
)
|
|
64
|
+
return YoloBBox(
|
|
65
|
+
cls=summary[0]["class"],
|
|
66
|
+
name=name,
|
|
67
|
+
confidence=summary[0]["confidence"],
|
|
68
|
+
box=box,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class YoloBBoxes(DataModel):
|
|
73
|
+
"""
|
|
74
|
+
A class representing a list of bounding boxes detected by a YOLO model.
|
|
75
|
+
|
|
76
|
+
Attributes:
|
|
77
|
+
cls: A list of classes of the detected objects.
|
|
78
|
+
name: A list of names of the detected objects.
|
|
79
|
+
confidence: A list of confidence scores of the detections.
|
|
80
|
+
box: A list of bounding boxes of the detected objects
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
cls: list[int]
|
|
84
|
+
name: list[str]
|
|
85
|
+
confidence: list[float]
|
|
86
|
+
box: list[BBox]
|
|
87
|
+
|
|
88
|
+
@staticmethod
|
|
89
|
+
def from_file(yolo: "YOLO", file: "File") -> "YoloBBoxes":
|
|
90
|
+
results = yolo(Image.open(BytesIO(file.read())))
|
|
91
|
+
return YoloBBoxes.from_results(results)
|
|
92
|
+
|
|
93
|
+
@staticmethod
|
|
94
|
+
def from_results(results: list["Results"]) -> "YoloBBoxes":
|
|
95
|
+
cls, names, confidence, box = [], [], [], []
|
|
96
|
+
for r in results:
|
|
97
|
+
for s in r.summary():
|
|
98
|
+
name = s.get("name", "")
|
|
99
|
+
cls.append(s["class"])
|
|
100
|
+
names.append(name)
|
|
101
|
+
confidence.append(s["confidence"])
|
|
102
|
+
box.append(BBox.from_dict(s.get("box", {}), title=name))
|
|
103
|
+
return YoloBBoxes(
|
|
104
|
+
cls=cls,
|
|
105
|
+
name=names,
|
|
106
|
+
confidence=confidence,
|
|
107
|
+
box=box,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class YoloOBBox(DataModel):
|
|
112
|
+
"""
|
|
113
|
+
A class representing an oriented bounding box detected by a YOLO model.
|
|
114
|
+
|
|
115
|
+
Attributes:
|
|
116
|
+
cls: The class of the detected object.
|
|
117
|
+
name: The name of the detected object.
|
|
118
|
+
confidence: The confidence score of the detection.
|
|
119
|
+
box: The oriented bounding box of the detected object.
|
|
120
|
+
"""
|
|
121
|
+
|
|
122
|
+
cls: int = Field(default=-1)
|
|
123
|
+
name: str = Field(default="")
|
|
124
|
+
confidence: float = Field(default=0)
|
|
125
|
+
box: OBBox = Field(default=None)
|
|
126
|
+
|
|
127
|
+
@staticmethod
|
|
128
|
+
def from_file(yolo: "YOLO", file: "File") -> "YoloOBBox":
|
|
129
|
+
results = yolo(Image.open(BytesIO(file.read())))
|
|
130
|
+
if len(results) == 0:
|
|
131
|
+
return YoloOBBox()
|
|
132
|
+
return YoloOBBox.from_result(results[0])
|
|
133
|
+
|
|
134
|
+
@staticmethod
|
|
135
|
+
def from_result(result: "Results") -> "YoloOBBox":
|
|
136
|
+
summary = result.summary()
|
|
137
|
+
if not summary:
|
|
138
|
+
return YoloOBBox()
|
|
139
|
+
name = summary[0].get("name", "")
|
|
140
|
+
box = (
|
|
141
|
+
OBBox.from_dict(summary[0]["box"], title=name)
|
|
142
|
+
if "box" in summary[0]
|
|
143
|
+
else OBBox()
|
|
144
|
+
)
|
|
145
|
+
return YoloOBBox(
|
|
146
|
+
cls=summary[0]["class"],
|
|
147
|
+
name=name,
|
|
148
|
+
confidence=summary[0]["confidence"],
|
|
149
|
+
box=box,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class YoloOBBoxes(DataModel):
|
|
154
|
+
"""
|
|
155
|
+
A class representing a list of oriented bounding boxes detected by a YOLO model.
|
|
156
|
+
|
|
157
|
+
Attributes:
|
|
158
|
+
cls: A list of classes of the detected objects.
|
|
159
|
+
name: A list of names of the detected objects.
|
|
160
|
+
confidence: A list of confidence scores of the detections.
|
|
161
|
+
box: A list of oriented bounding boxes of the detected objects.
|
|
162
|
+
"""
|
|
163
|
+
|
|
164
|
+
cls: list[int]
|
|
165
|
+
name: list[str]
|
|
166
|
+
confidence: list[float]
|
|
167
|
+
box: list[OBBox]
|
|
168
|
+
|
|
169
|
+
@staticmethod
|
|
170
|
+
def from_file(yolo: "YOLO", file: "File") -> "YoloOBBoxes":
|
|
171
|
+
results = yolo(Image.open(BytesIO(file.read())))
|
|
172
|
+
return YoloOBBoxes.from_results(results)
|
|
173
|
+
|
|
174
|
+
@staticmethod
|
|
175
|
+
def from_results(results: list["Results"]) -> "YoloOBBoxes":
|
|
176
|
+
cls, names, confidence, box = [], [], [], []
|
|
177
|
+
for r in results:
|
|
178
|
+
for s in r.summary():
|
|
179
|
+
name = s.get("name", "")
|
|
180
|
+
cls.append(s["class"])
|
|
181
|
+
names.append(name)
|
|
182
|
+
confidence.append(s["confidence"])
|
|
183
|
+
box.append(OBBox.from_dict(s.get("box", {}), title=name))
|
|
184
|
+
return YoloOBBoxes(
|
|
185
|
+
cls=cls,
|
|
186
|
+
name=names,
|
|
187
|
+
confidence=confidence,
|
|
188
|
+
box=box,
|
|
189
|
+
)
|