datachain 0.6.5__tar.gz → 0.6.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.6.5 → datachain-0.6.6}/.pre-commit-config.yaml +1 -1
- {datachain-0.6.5/src/datachain.egg-info → datachain-0.6.6}/PKG-INFO +1 -1
- {datachain-0.6.5 → datachain-0.6.6}/overrides/main.html +1 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/lib/dc.py +17 -4
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/lib/hf.py +4 -6
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/query/dataset.py +30 -1
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/sql/types.py +29 -0
- {datachain-0.6.5 → datachain-0.6.6/src/datachain.egg-info}/PKG-INFO +1 -1
- {datachain-0.6.5 → datachain-0.6.6}/tests/examples/test_examples.py +5 -1
- {datachain-0.6.5 → datachain-0.6.6}/tests/func/test_datachain.py +24 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/func/test_listing.py +1 -1
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/lib/test_datachain.py +28 -2
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/lib/test_signal_schema.py +2 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/test_data_storage.py +2 -0
- {datachain-0.6.5 → datachain-0.6.6}/.cruft.json +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/.gitattributes +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/.github/codecov.yaml +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/.github/dependabot.yml +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/.github/workflows/release.yml +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/.github/workflows/tests.yml +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/.gitignore +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/CONTRIBUTING.rst +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/LICENSE +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/README.rst +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/docs/assets/datachain.svg +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/docs/assets/flowchart.png +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/docs/index.md +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/docs/references/datachain.md +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/docs/references/datatype.md +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/docs/references/file.md +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/docs/references/index.md +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/docs/references/sql.md +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/docs/references/torch.md +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/docs/references/udf.md +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/examples/llm_and_nlp/unstructured-embeddings-gen.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/examples/llm_and_nlp/unstructured-summary-map.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/examples/multimodal/wds.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/mkdocs.yml +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/noxfile.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/pyproject.toml +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/setup.cfg +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/__main__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/asyn.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/cache.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/catalog/catalog.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/cli.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/cli_utils.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/client/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/client/azure.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/client/gcs.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/client/hf.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/client/local.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/client/s3.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/config.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/data_storage/id_generator.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/dataset.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/error.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/job.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/lib/clip.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/lib/file.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/lib/func/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/lib/func/aggregate.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/lib/func/func.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/lib/image.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/lib/listing.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/lib/settings.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/lib/tar.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/lib/text.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/lib/udf.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/lib/utils.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/lib/vfile.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/listing.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/node.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/progress.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/py.typed +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/query/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/query/batch.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/query/metrics.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/query/params.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/query/queue.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/query/schema.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/query/session.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/remote/studio.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/sql/utils.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/studio.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/telemetry.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain/utils.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain.egg-info/SOURCES.txt +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain.egg-info/requires.txt +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/conftest.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/data.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/examples/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/examples/wds_data.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/func/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/func/test_catalog.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/func/test_client.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/func/test_datasets.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/func/test_ls.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/func/test_metrics.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/func/test_pull.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/func/test_pytorch.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/func/test_query.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/scripts/feature_class.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/test_atomicity.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/test_cli_e2e.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/test_cli_studio.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/test_query_e2e.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/test_telemetry.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/test_asyn.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/test_cache.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/test_catalog.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/test_client.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/test_config.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/test_dataset.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/test_id_generator.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/test_listing.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/test_metastore.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/test_query.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/test_query_params.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/test_serializer.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/test_session.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/test_utils.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.6.5 → datachain-0.6.6}/tests/utils.py +0 -0
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
{{ super() }}
|
|
6
6
|
|
|
7
|
+
<script data-domain="docs.datachain.ai" src="https://plausible.io/js/script.outbound-links.js"></script>
|
|
7
8
|
<script type="text/javascript">
|
|
8
9
|
!function () { var e, t, n; e = "14ffd92a6cbf5f2", t = function () { Reo.init({ clientID: "14ffd92a6cbf5f2" }) }, (n = document.createElement("script")).src = "https://static.reo.dev/" + e + "/reo.js", n.async = !0, n.onload = t, document.head.appendChild(n) }();
|
|
9
10
|
</script>
|
|
@@ -981,10 +981,23 @@ class DataChain:
|
|
|
981
981
|
|
|
982
982
|
@resolve_columns
|
|
983
983
|
def order_by(self, *args, descending: bool = False) -> "Self":
|
|
984
|
-
"""Orders by specified set of
|
|
984
|
+
"""Orders by specified set of columns.
|
|
985
985
|
|
|
986
986
|
Parameters:
|
|
987
987
|
descending (bool): Whether to sort in descending order or not.
|
|
988
|
+
|
|
989
|
+
Example:
|
|
990
|
+
```py
|
|
991
|
+
dc.order_by("similarity_score", descending=True).limit(10)
|
|
992
|
+
```
|
|
993
|
+
|
|
994
|
+
Note:
|
|
995
|
+
Order is not guaranteed when steps are added after an `order_by` statement.
|
|
996
|
+
I.e. when using `from_dataset` an `order_by` statement should be used if
|
|
997
|
+
the order of the records in the chain is important.
|
|
998
|
+
Using `order_by` directly before `limit`, `collect` and `collect_flatten`
|
|
999
|
+
will give expected results.
|
|
1000
|
+
See https://github.com/iterative/datachain/issues/477 for further details.
|
|
988
1001
|
"""
|
|
989
1002
|
if descending:
|
|
990
1003
|
args = tuple(sqlalchemy.desc(a) for a in args)
|
|
@@ -1179,7 +1192,7 @@ class DataChain:
|
|
|
1179
1192
|
a tuple of row values.
|
|
1180
1193
|
"""
|
|
1181
1194
|
db_signals = self._effective_signals_schema.db_signals()
|
|
1182
|
-
with self._query.
|
|
1195
|
+
with self._query.ordered_select(*db_signals).as_iterable() as rows:
|
|
1183
1196
|
if row_factory:
|
|
1184
1197
|
rows = (row_factory(db_signals, r) for r in rows)
|
|
1185
1198
|
yield from rows
|
|
@@ -1270,7 +1283,7 @@ class DataChain:
|
|
|
1270
1283
|
chain = self.select(*cols) if cols else self
|
|
1271
1284
|
signals_schema = chain._effective_signals_schema
|
|
1272
1285
|
db_signals = signals_schema.db_signals()
|
|
1273
|
-
with self._query.
|
|
1286
|
+
with self._query.ordered_select(*db_signals).as_iterable() as rows:
|
|
1274
1287
|
for row in rows:
|
|
1275
1288
|
ret = signals_schema.row_to_features(
|
|
1276
1289
|
row, catalog=chain.session.catalog, cache=chain._settings.cache
|
|
@@ -1678,7 +1691,7 @@ class DataChain:
|
|
|
1678
1691
|
|
|
1679
1692
|
model_name = model_name or object_name or ""
|
|
1680
1693
|
hf_features = next(iter(ds_dict.values())).features
|
|
1681
|
-
output = output | get_output_schema(hf_features
|
|
1694
|
+
output = output | get_output_schema(hf_features)
|
|
1682
1695
|
model = dict_to_data_model(model_name, output)
|
|
1683
1696
|
if object_name:
|
|
1684
1697
|
output = {object_name: model}
|
|
@@ -138,17 +138,15 @@ def convert_feature(val: Any, feat: Any, anno: Any) -> Any: # noqa: PLR0911
|
|
|
138
138
|
return HFAudio(**val)
|
|
139
139
|
|
|
140
140
|
|
|
141
|
-
def get_output_schema(
|
|
142
|
-
features: Features, model_name: str = "", stream: bool = True
|
|
143
|
-
) -> dict[str, DataType]:
|
|
141
|
+
def get_output_schema(features: Features) -> dict[str, DataType]:
|
|
144
142
|
"""Generate UDF output schema from huggingface datasets features."""
|
|
145
143
|
fields_dict = {}
|
|
146
144
|
for name, val in features.items():
|
|
147
|
-
fields_dict[name] = _feature_to_chain_type(name, val)
|
|
148
|
-
return fields_dict
|
|
145
|
+
fields_dict[name] = _feature_to_chain_type(name, val)
|
|
146
|
+
return fields_dict
|
|
149
147
|
|
|
150
148
|
|
|
151
|
-
def _feature_to_chain_type(name: str, val: Any) ->
|
|
149
|
+
def _feature_to_chain_type(name: str, val: Any) -> DataType: # noqa: PLR0911
|
|
152
150
|
if isinstance(val, Value):
|
|
153
151
|
return arrow_type_mapper(val.pa_type)
|
|
154
152
|
if isinstance(val, ClassLabel):
|
|
@@ -1276,6 +1276,27 @@ class DatasetQuery:
|
|
|
1276
1276
|
query.steps.append(SQLSelect((*args, *named_args)))
|
|
1277
1277
|
return query
|
|
1278
1278
|
|
|
1279
|
+
@detach
|
|
1280
|
+
def ordered_select(self, *args, **kwargs) -> "Self":
|
|
1281
|
+
"""
|
|
1282
|
+
Select the given columns or expressions using a subquery whilst
|
|
1283
|
+
maintaining query ordering (only applicable if last step was order_by).
|
|
1284
|
+
|
|
1285
|
+
If used with no arguments, this simply creates a subquery and
|
|
1286
|
+
select all columns from it.
|
|
1287
|
+
|
|
1288
|
+
Example:
|
|
1289
|
+
>>> ds.ordered_select(C.name, C.size * 10)
|
|
1290
|
+
>>> ds.ordered_select(C.name, size10x=C.size * 10)
|
|
1291
|
+
"""
|
|
1292
|
+
named_args = [v.label(k) for k, v in kwargs.items()]
|
|
1293
|
+
query = self.clone()
|
|
1294
|
+
order_by = query.last_step if query.is_ordered else None
|
|
1295
|
+
query.steps.append(SQLSelect((*args, *named_args)))
|
|
1296
|
+
if order_by:
|
|
1297
|
+
query.steps.append(order_by)
|
|
1298
|
+
return query
|
|
1299
|
+
|
|
1279
1300
|
@detach
|
|
1280
1301
|
def select_except(self, *args) -> "Self":
|
|
1281
1302
|
"""
|
|
@@ -1338,7 +1359,7 @@ class DatasetQuery:
|
|
|
1338
1359
|
query = self.clone(new_table=False)
|
|
1339
1360
|
if (
|
|
1340
1361
|
query.steps
|
|
1341
|
-
and (last_step := query.
|
|
1362
|
+
and (last_step := query.last_step)
|
|
1342
1363
|
and isinstance(last_step, SQLLimit)
|
|
1343
1364
|
):
|
|
1344
1365
|
query.steps[-1] = SQLLimit(min(n, last_step.n))
|
|
@@ -1591,3 +1612,11 @@ class DatasetQuery:
|
|
|
1591
1612
|
finally:
|
|
1592
1613
|
self.cleanup()
|
|
1593
1614
|
return self.__class__(name=name, version=version, catalog=self.catalog)
|
|
1615
|
+
|
|
1616
|
+
@property
|
|
1617
|
+
def is_ordered(self) -> bool:
|
|
1618
|
+
return isinstance(self.last_step, SQLOrderBy)
|
|
1619
|
+
|
|
1620
|
+
@property
|
|
1621
|
+
def last_step(self) -> Optional[Step]:
|
|
1622
|
+
return self.steps[-1] if self.steps else None
|
|
@@ -187,6 +187,22 @@ class Int32(Int):
|
|
|
187
187
|
return read_converter(dialect).int32(value)
|
|
188
188
|
|
|
189
189
|
|
|
190
|
+
class UInt32(Int):
|
|
191
|
+
def load_dialect_impl(self, dialect):
|
|
192
|
+
return converter(dialect).uint32()
|
|
193
|
+
|
|
194
|
+
@staticmethod
|
|
195
|
+
def default_value(dialect):
|
|
196
|
+
return type_defaults(dialect).uint32()
|
|
197
|
+
|
|
198
|
+
@staticmethod
|
|
199
|
+
def db_default_value(dialect):
|
|
200
|
+
return db_defaults(dialect).uint32()
|
|
201
|
+
|
|
202
|
+
def on_read_convert(self, value, dialect):
|
|
203
|
+
return read_converter(dialect).uint32(value)
|
|
204
|
+
|
|
205
|
+
|
|
190
206
|
class Int64(Int):
|
|
191
207
|
def load_dialect_impl(self, dialect):
|
|
192
208
|
return converter(dialect).int64()
|
|
@@ -395,6 +411,9 @@ class TypeReadConverter:
|
|
|
395
411
|
def int32(self, value):
|
|
396
412
|
return value
|
|
397
413
|
|
|
414
|
+
def uint32(self, value):
|
|
415
|
+
return value
|
|
416
|
+
|
|
398
417
|
def int64(self, value):
|
|
399
418
|
return value
|
|
400
419
|
|
|
@@ -446,6 +465,9 @@ class TypeConverter:
|
|
|
446
465
|
def int32(self):
|
|
447
466
|
return self.int()
|
|
448
467
|
|
|
468
|
+
def uint32(self):
|
|
469
|
+
return self.int()
|
|
470
|
+
|
|
449
471
|
def int64(self):
|
|
450
472
|
return self.int()
|
|
451
473
|
|
|
@@ -487,6 +509,9 @@ class TypeDefaults:
|
|
|
487
509
|
def int32(self):
|
|
488
510
|
return None
|
|
489
511
|
|
|
512
|
+
def uint32(self):
|
|
513
|
+
return None
|
|
514
|
+
|
|
490
515
|
def int64(self):
|
|
491
516
|
return None
|
|
492
517
|
|
|
@@ -528,6 +553,9 @@ class DBDefaults:
|
|
|
528
553
|
def int32(self):
|
|
529
554
|
return self.int()
|
|
530
555
|
|
|
556
|
+
def uint32(self):
|
|
557
|
+
return self.int()
|
|
558
|
+
|
|
531
559
|
def int64(self):
|
|
532
560
|
return self.int()
|
|
533
561
|
|
|
@@ -561,6 +589,7 @@ TYPES = [
|
|
|
561
589
|
Boolean,
|
|
562
590
|
Int,
|
|
563
591
|
Int32,
|
|
592
|
+
UInt32,
|
|
564
593
|
Int64,
|
|
565
594
|
UInt64,
|
|
566
595
|
Float,
|
|
@@ -19,8 +19,12 @@ llm_and_nlp_examples = sorted(
|
|
|
19
19
|
[
|
|
20
20
|
filename
|
|
21
21
|
for filename in glob.glob("examples/llm_and_nlp/**/*.py", recursive=True)
|
|
22
|
-
# no anthropic token
|
|
22
|
+
# no anthropic token, HF runs against actual API - thus run it only once
|
|
23
23
|
if "claude" not in filename
|
|
24
|
+
and (
|
|
25
|
+
"hf-" not in filename
|
|
26
|
+
or (sys.platform == "darwin" and sys.version_info >= (3, 12))
|
|
27
|
+
)
|
|
24
28
|
]
|
|
25
29
|
)
|
|
26
30
|
|
|
@@ -448,6 +448,30 @@ def test_show_no_truncate(capsys, test_session):
|
|
|
448
448
|
assert details[i] in normalized_output
|
|
449
449
|
|
|
450
450
|
|
|
451
|
+
@pytest.mark.parametrize("ordered_by", ["letter", "number"])
|
|
452
|
+
def test_show_ordered(capsys, test_session, ordered_by):
|
|
453
|
+
numbers = [6, 2, 3, 1, 5, 7, 4]
|
|
454
|
+
letters = ["u", "y", "x", "z", "v", "t", "w"]
|
|
455
|
+
|
|
456
|
+
DataChain.from_values(
|
|
457
|
+
number=numbers, letter=letters, session=test_session
|
|
458
|
+
).order_by(ordered_by).show()
|
|
459
|
+
|
|
460
|
+
captured = capsys.readouterr()
|
|
461
|
+
normalized_lines = [
|
|
462
|
+
re.sub(r"\s+", " ", line).strip() for line in captured.out.strip().split("\n")
|
|
463
|
+
]
|
|
464
|
+
|
|
465
|
+
ordered_entries = sorted(
|
|
466
|
+
zip(numbers, letters), key=lambda x: x[0 if ordered_by == "number" else 1]
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
assert normalized_lines[0].strip() == "number letter"
|
|
470
|
+
for i, line in enumerate(normalized_lines[1:]):
|
|
471
|
+
number, letter = ordered_entries[i]
|
|
472
|
+
assert line == f"{i} {number} {letter}"
|
|
473
|
+
|
|
474
|
+
|
|
451
475
|
def test_from_storage_dataset_stats(tmp_dir, test_session):
|
|
452
476
|
for i in range(4):
|
|
453
477
|
(tmp_dir / f"file{i}.txt").write_text(f"file{i}")
|
|
@@ -17,7 +17,7 @@ def test_listing_generator(cloud_test_catalog, cloud_type):
|
|
|
17
17
|
entries = sorted(
|
|
18
18
|
[e for e in ENTRIES if e.path.startswith("cats/")], key=lambda e: e.path
|
|
19
19
|
)
|
|
20
|
-
files =
|
|
20
|
+
files = dc.order_by("file.path").collect("file")
|
|
21
21
|
|
|
22
22
|
for cat_file, cat_entry in zip(files, entries):
|
|
23
23
|
assert cat_file.source == ctc.src_uri
|
|
@@ -1824,6 +1824,32 @@ def test_order_by_with_nested_columns(test_session, with_function):
|
|
|
1824
1824
|
]
|
|
1825
1825
|
|
|
1826
1826
|
|
|
1827
|
+
def test_order_by_collect(test_session):
|
|
1828
|
+
numbers = [6, 2, 3, 1, 5, 7, 4]
|
|
1829
|
+
letters = ["u", "y", "x", "z", "v", "t", "w"]
|
|
1830
|
+
|
|
1831
|
+
dc = DataChain.from_values(number=numbers, letter=letters, session=test_session)
|
|
1832
|
+
assert list(dc.order_by("number").collect()) == [
|
|
1833
|
+
(1, "z"),
|
|
1834
|
+
(2, "y"),
|
|
1835
|
+
(3, "x"),
|
|
1836
|
+
(4, "w"),
|
|
1837
|
+
(5, "v"),
|
|
1838
|
+
(6, "u"),
|
|
1839
|
+
(7, "t"),
|
|
1840
|
+
]
|
|
1841
|
+
|
|
1842
|
+
assert list(dc.order_by("letter").collect()) == [
|
|
1843
|
+
(7, "t"),
|
|
1844
|
+
(6, "u"),
|
|
1845
|
+
(5, "v"),
|
|
1846
|
+
(4, "w"),
|
|
1847
|
+
(3, "x"),
|
|
1848
|
+
(2, "y"),
|
|
1849
|
+
(1, "z"),
|
|
1850
|
+
]
|
|
1851
|
+
|
|
1852
|
+
|
|
1827
1853
|
@pytest.mark.parametrize("with_function", [True, False])
|
|
1828
1854
|
def test_order_by_descending(test_session, with_function):
|
|
1829
1855
|
names = ["a.txt", "c.txt", "d.txt", "a.txt", "b.txt"]
|
|
@@ -1852,7 +1878,7 @@ def test_union(test_session):
|
|
|
1852
1878
|
chain2 = DataChain.from_values(value=[3, 4], session=test_session)
|
|
1853
1879
|
chain3 = chain1 | chain2
|
|
1854
1880
|
assert chain3.count() == 4
|
|
1855
|
-
assert
|
|
1881
|
+
assert list(chain3.order_by("value").collect("value")) == [1, 2, 3, 4]
|
|
1856
1882
|
|
|
1857
1883
|
|
|
1858
1884
|
def test_union_different_columns(test_session):
|
|
@@ -1887,7 +1913,7 @@ def test_union_different_column_order(test_session):
|
|
|
1887
1913
|
chain2 = DataChain.from_values(
|
|
1888
1914
|
name=["different", "order"], value=[9, 10], session=test_session
|
|
1889
1915
|
)
|
|
1890
|
-
assert
|
|
1916
|
+
assert list(chain1.union(chain2).order_by("value").collect()) == [
|
|
1891
1917
|
(1, "chain"),
|
|
1892
1918
|
(2, "more"),
|
|
1893
1919
|
(9, "different"),
|
|
@@ -27,6 +27,7 @@ from datachain.sql.types import (
|
|
|
27
27
|
Int32,
|
|
28
28
|
Int64,
|
|
29
29
|
String,
|
|
30
|
+
UInt32,
|
|
30
31
|
UInt64,
|
|
31
32
|
)
|
|
32
33
|
|
|
@@ -721,6 +722,7 @@ def test_mutate_change_type():
|
|
|
721
722
|
[Boolean, bool],
|
|
722
723
|
[Int, int],
|
|
723
724
|
[Int32, int],
|
|
725
|
+
[UInt32, int],
|
|
724
726
|
[Int64, int],
|
|
725
727
|
[UInt64, int],
|
|
726
728
|
[Float, float],
|
|
@@ -18,6 +18,7 @@ from datachain.sql.types import (
|
|
|
18
18
|
Int32,
|
|
19
19
|
Int64,
|
|
20
20
|
String,
|
|
21
|
+
UInt32,
|
|
21
22
|
UInt64,
|
|
22
23
|
)
|
|
23
24
|
from tests.utils import (
|
|
@@ -173,6 +174,7 @@ def test_convert_type(cloud_test_catalog):
|
|
|
173
174
|
[Boolean(), False],
|
|
174
175
|
[Int(), 0],
|
|
175
176
|
[Int32(), 0],
|
|
177
|
+
[UInt32(), 0],
|
|
176
178
|
[Int64(), 0],
|
|
177
179
|
[UInt64(), 0],
|
|
178
180
|
[Float(), lambda val: math.isnan(val)],
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|