datachain 0.6.5__tar.gz → 0.6.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.6.5 → datachain-0.6.7}/.github/workflows/tests-studio.yml +2 -5
- {datachain-0.6.5 → datachain-0.6.7}/.pre-commit-config.yaml +1 -1
- {datachain-0.6.5/src/datachain.egg-info → datachain-0.6.7}/PKG-INFO +1 -1
- {datachain-0.6.5 → datachain-0.6.7}/overrides/main.html +1 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/catalog/catalog.py +2 -6
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/data_storage/schema.py +4 -2
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/lib/dc.py +17 -4
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/lib/hf.py +4 -6
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/query/dataset.py +30 -1
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/sql/types.py +31 -0
- {datachain-0.6.5 → datachain-0.6.7/src/datachain.egg-info}/PKG-INFO +1 -1
- {datachain-0.6.5 → datachain-0.6.7}/tests/examples/test_examples.py +5 -1
- {datachain-0.6.5 → datachain-0.6.7}/tests/func/test_datachain.py +24 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/func/test_datasets.py +2 -5
- {datachain-0.6.5 → datachain-0.6.7}/tests/func/test_listing.py +1 -1
- {datachain-0.6.5 → datachain-0.6.7}/tests/func/test_pull.py +4 -6
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/lib/test_datachain.py +28 -2
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/lib/test_signal_schema.py +2 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/test_data_storage.py +2 -0
- {datachain-0.6.5 → datachain-0.6.7}/.cruft.json +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/.gitattributes +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/.github/codecov.yaml +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/.github/dependabot.yml +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/.github/workflows/release.yml +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/.github/workflows/tests.yml +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/.gitignore +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/CONTRIBUTING.rst +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/LICENSE +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/README.rst +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/docs/assets/datachain.svg +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/docs/assets/flowchart.png +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/docs/index.md +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/docs/references/datachain.md +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/docs/references/datatype.md +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/docs/references/file.md +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/docs/references/index.md +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/docs/references/sql.md +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/docs/references/torch.md +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/docs/references/udf.md +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/examples/llm_and_nlp/unstructured-embeddings-gen.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/examples/llm_and_nlp/unstructured-summary-map.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/examples/multimodal/wds.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/mkdocs.yml +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/noxfile.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/pyproject.toml +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/setup.cfg +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/__main__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/asyn.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/cache.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/cli.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/cli_utils.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/client/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/client/azure.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/client/gcs.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/client/hf.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/client/local.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/client/s3.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/config.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/data_storage/id_generator.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/dataset.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/error.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/job.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/lib/clip.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/lib/file.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/lib/func/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/lib/func/aggregate.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/lib/func/func.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/lib/image.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/lib/listing.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/lib/settings.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/lib/tar.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/lib/text.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/lib/udf.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/lib/utils.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/lib/vfile.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/listing.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/node.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/progress.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/py.typed +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/query/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/query/batch.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/query/metrics.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/query/params.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/query/queue.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/query/schema.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/query/session.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/remote/studio.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/sql/utils.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/studio.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/telemetry.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain/utils.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain.egg-info/SOURCES.txt +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain.egg-info/requires.txt +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/conftest.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/data.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/examples/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/examples/wds_data.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/func/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/func/test_catalog.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/func/test_client.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/func/test_ls.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/func/test_metrics.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/func/test_pytorch.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/func/test_query.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/scripts/feature_class.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/test_atomicity.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/test_cli_e2e.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/test_cli_studio.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/test_query_e2e.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/test_telemetry.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/test_asyn.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/test_cache.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/test_catalog.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/test_client.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/test_config.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/test_dataset.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/test_id_generator.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/test_listing.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/test_metastore.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/test_query.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/test_query_params.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/test_serializer.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/test_session.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/test_utils.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.6.5 → datachain-0.6.7}/tests/utils.py +0 -0
|
@@ -52,14 +52,11 @@ jobs:
|
|
|
52
52
|
STUDIO_READ_ACCESS_TOKEN: ${{ secrets.ITERATIVE_STUDIO_READ_ACCESS_TOKEN }}
|
|
53
53
|
run: |
|
|
54
54
|
echo "DataChain branch: $BRANCH"
|
|
55
|
-
if
|
|
56
|
-
then
|
|
57
|
-
STUDIO_BRANCH=develop
|
|
58
|
-
elif git ls-remote --heads https://"$STUDIO_READ_ACCESS_TOKEN"@github.com/iterative/studio.git "$BRANCH" | grep -F "$BRANCH" 2>&1>/dev/null
|
|
55
|
+
if git ls-remote --heads https://"$STUDIO_READ_ACCESS_TOKEN"@github.com/iterative/studio.git "$BRANCH" | grep -F "$BRANCH" 2>&1>/dev/null
|
|
59
56
|
then
|
|
60
57
|
STUDIO_BRANCH="$BRANCH"
|
|
61
58
|
else
|
|
62
|
-
STUDIO_BRANCH=
|
|
59
|
+
STUDIO_BRANCH=main
|
|
63
60
|
fi
|
|
64
61
|
echo "STUDIO_BRANCH=$STUDIO_BRANCH" >> $GITHUB_ENV
|
|
65
62
|
echo "Studio branch: $STUDIO_BRANCH"
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
{{ super() }}
|
|
6
6
|
|
|
7
|
+
<script data-domain="docs.datachain.ai" src="https://plausible.io/js/script.outbound-links.js"></script>
|
|
7
8
|
<script type="text/javascript">
|
|
8
9
|
!function () { var e, t, n; e = "14ffd92a6cbf5f2", t = function () { Reo.init({ clientID: "14ffd92a6cbf5f2" }) }, (n = document.createElement("script")).src = "https://static.reo.dev/" + e + "/reo.js", n.async = !0, n.onload = t, document.head.appendChild(n) }();
|
|
9
10
|
</script>
|
|
@@ -58,7 +58,7 @@ from datachain.listing import Listing
|
|
|
58
58
|
from datachain.node import DirType, Node, NodeWithPath
|
|
59
59
|
from datachain.nodes_thread_pool import NodesThreadPool
|
|
60
60
|
from datachain.remote.studio import StudioClient
|
|
61
|
-
from datachain.sql.types import DateTime, SQLType
|
|
61
|
+
from datachain.sql.types import DateTime, SQLType
|
|
62
62
|
from datachain.utils import (
|
|
63
63
|
DataChainDir,
|
|
64
64
|
batched,
|
|
@@ -196,11 +196,6 @@ class DatasetRowsFetcher(NodesThreadPool):
|
|
|
196
196
|
for c in [c for c, t in self.schema.items() if t == DateTime]:
|
|
197
197
|
df[c] = pd.to_datetime(df[c], unit="s")
|
|
198
198
|
|
|
199
|
-
# strings are represented as binaries in parquet export so need to
|
|
200
|
-
# decode it back to strings
|
|
201
|
-
for c in [c for c, t in self.schema.items() if t == String]:
|
|
202
|
-
df[c] = df[c].str.decode("utf-8")
|
|
203
|
-
|
|
204
199
|
def do_task(self, urls):
|
|
205
200
|
import lz4.frame
|
|
206
201
|
import pandas as pd
|
|
@@ -1403,6 +1398,7 @@ class Catalog:
|
|
|
1403
1398
|
query_script=remote_dataset_version.query_script,
|
|
1404
1399
|
create_rows=True,
|
|
1405
1400
|
columns=columns,
|
|
1401
|
+
feature_schema=remote_dataset_version.feature_schema,
|
|
1406
1402
|
validate_version=False,
|
|
1407
1403
|
)
|
|
1408
1404
|
|
|
@@ -145,6 +145,8 @@ class DirExpansion:
|
|
|
145
145
|
|
|
146
146
|
|
|
147
147
|
class DataTable:
|
|
148
|
+
MAX_RANDOM = 2**63 - 1
|
|
149
|
+
|
|
148
150
|
def __init__(
|
|
149
151
|
self,
|
|
150
152
|
name: str,
|
|
@@ -269,8 +271,8 @@ class DataTable:
|
|
|
269
271
|
def delete(self):
|
|
270
272
|
return self.apply_conditions(self.table.delete())
|
|
271
273
|
|
|
272
|
-
@
|
|
273
|
-
def sys_columns():
|
|
274
|
+
@classmethod
|
|
275
|
+
def sys_columns(cls):
|
|
274
276
|
return [
|
|
275
277
|
sa.Column("sys__id", Int, primary_key=True),
|
|
276
278
|
sa.Column(
|
|
@@ -981,10 +981,23 @@ class DataChain:
|
|
|
981
981
|
|
|
982
982
|
@resolve_columns
|
|
983
983
|
def order_by(self, *args, descending: bool = False) -> "Self":
|
|
984
|
-
"""Orders by specified set of
|
|
984
|
+
"""Orders by specified set of columns.
|
|
985
985
|
|
|
986
986
|
Parameters:
|
|
987
987
|
descending (bool): Whether to sort in descending order or not.
|
|
988
|
+
|
|
989
|
+
Example:
|
|
990
|
+
```py
|
|
991
|
+
dc.order_by("similarity_score", descending=True).limit(10)
|
|
992
|
+
```
|
|
993
|
+
|
|
994
|
+
Note:
|
|
995
|
+
Order is not guaranteed when steps are added after an `order_by` statement.
|
|
996
|
+
I.e. when using `from_dataset` an `order_by` statement should be used if
|
|
997
|
+
the order of the records in the chain is important.
|
|
998
|
+
Using `order_by` directly before `limit`, `collect` and `collect_flatten`
|
|
999
|
+
will give expected results.
|
|
1000
|
+
See https://github.com/iterative/datachain/issues/477 for further details.
|
|
988
1001
|
"""
|
|
989
1002
|
if descending:
|
|
990
1003
|
args = tuple(sqlalchemy.desc(a) for a in args)
|
|
@@ -1179,7 +1192,7 @@ class DataChain:
|
|
|
1179
1192
|
a tuple of row values.
|
|
1180
1193
|
"""
|
|
1181
1194
|
db_signals = self._effective_signals_schema.db_signals()
|
|
1182
|
-
with self._query.
|
|
1195
|
+
with self._query.ordered_select(*db_signals).as_iterable() as rows:
|
|
1183
1196
|
if row_factory:
|
|
1184
1197
|
rows = (row_factory(db_signals, r) for r in rows)
|
|
1185
1198
|
yield from rows
|
|
@@ -1270,7 +1283,7 @@ class DataChain:
|
|
|
1270
1283
|
chain = self.select(*cols) if cols else self
|
|
1271
1284
|
signals_schema = chain._effective_signals_schema
|
|
1272
1285
|
db_signals = signals_schema.db_signals()
|
|
1273
|
-
with self._query.
|
|
1286
|
+
with self._query.ordered_select(*db_signals).as_iterable() as rows:
|
|
1274
1287
|
for row in rows:
|
|
1275
1288
|
ret = signals_schema.row_to_features(
|
|
1276
1289
|
row, catalog=chain.session.catalog, cache=chain._settings.cache
|
|
@@ -1678,7 +1691,7 @@ class DataChain:
|
|
|
1678
1691
|
|
|
1679
1692
|
model_name = model_name or object_name or ""
|
|
1680
1693
|
hf_features = next(iter(ds_dict.values())).features
|
|
1681
|
-
output = output | get_output_schema(hf_features
|
|
1694
|
+
output = output | get_output_schema(hf_features)
|
|
1682
1695
|
model = dict_to_data_model(model_name, output)
|
|
1683
1696
|
if object_name:
|
|
1684
1697
|
output = {object_name: model}
|
|
@@ -138,17 +138,15 @@ def convert_feature(val: Any, feat: Any, anno: Any) -> Any: # noqa: PLR0911
|
|
|
138
138
|
return HFAudio(**val)
|
|
139
139
|
|
|
140
140
|
|
|
141
|
-
def get_output_schema(
|
|
142
|
-
features: Features, model_name: str = "", stream: bool = True
|
|
143
|
-
) -> dict[str, DataType]:
|
|
141
|
+
def get_output_schema(features: Features) -> dict[str, DataType]:
|
|
144
142
|
"""Generate UDF output schema from huggingface datasets features."""
|
|
145
143
|
fields_dict = {}
|
|
146
144
|
for name, val in features.items():
|
|
147
|
-
fields_dict[name] = _feature_to_chain_type(name, val)
|
|
148
|
-
return fields_dict
|
|
145
|
+
fields_dict[name] = _feature_to_chain_type(name, val)
|
|
146
|
+
return fields_dict
|
|
149
147
|
|
|
150
148
|
|
|
151
|
-
def _feature_to_chain_type(name: str, val: Any) ->
|
|
149
|
+
def _feature_to_chain_type(name: str, val: Any) -> DataType: # noqa: PLR0911
|
|
152
150
|
if isinstance(val, Value):
|
|
153
151
|
return arrow_type_mapper(val.pa_type)
|
|
154
152
|
if isinstance(val, ClassLabel):
|
|
@@ -1276,6 +1276,27 @@ class DatasetQuery:
|
|
|
1276
1276
|
query.steps.append(SQLSelect((*args, *named_args)))
|
|
1277
1277
|
return query
|
|
1278
1278
|
|
|
1279
|
+
@detach
|
|
1280
|
+
def ordered_select(self, *args, **kwargs) -> "Self":
|
|
1281
|
+
"""
|
|
1282
|
+
Select the given columns or expressions using a subquery whilst
|
|
1283
|
+
maintaining query ordering (only applicable if last step was order_by).
|
|
1284
|
+
|
|
1285
|
+
If used with no arguments, this simply creates a subquery and
|
|
1286
|
+
select all columns from it.
|
|
1287
|
+
|
|
1288
|
+
Example:
|
|
1289
|
+
>>> ds.ordered_select(C.name, C.size * 10)
|
|
1290
|
+
>>> ds.ordered_select(C.name, size10x=C.size * 10)
|
|
1291
|
+
"""
|
|
1292
|
+
named_args = [v.label(k) for k, v in kwargs.items()]
|
|
1293
|
+
query = self.clone()
|
|
1294
|
+
order_by = query.last_step if query.is_ordered else None
|
|
1295
|
+
query.steps.append(SQLSelect((*args, *named_args)))
|
|
1296
|
+
if order_by:
|
|
1297
|
+
query.steps.append(order_by)
|
|
1298
|
+
return query
|
|
1299
|
+
|
|
1279
1300
|
@detach
|
|
1280
1301
|
def select_except(self, *args) -> "Self":
|
|
1281
1302
|
"""
|
|
@@ -1338,7 +1359,7 @@ class DatasetQuery:
|
|
|
1338
1359
|
query = self.clone(new_table=False)
|
|
1339
1360
|
if (
|
|
1340
1361
|
query.steps
|
|
1341
|
-
and (last_step := query.
|
|
1362
|
+
and (last_step := query.last_step)
|
|
1342
1363
|
and isinstance(last_step, SQLLimit)
|
|
1343
1364
|
):
|
|
1344
1365
|
query.steps[-1] = SQLLimit(min(n, last_step.n))
|
|
@@ -1591,3 +1612,11 @@ class DatasetQuery:
|
|
|
1591
1612
|
finally:
|
|
1592
1613
|
self.cleanup()
|
|
1593
1614
|
return self.__class__(name=name, version=version, catalog=self.catalog)
|
|
1615
|
+
|
|
1616
|
+
@property
|
|
1617
|
+
def is_ordered(self) -> bool:
|
|
1618
|
+
return isinstance(self.last_step, SQLOrderBy)
|
|
1619
|
+
|
|
1620
|
+
@property
|
|
1621
|
+
def last_step(self) -> Optional[Step]:
|
|
1622
|
+
return self.steps[-1] if self.steps else None
|
|
@@ -187,6 +187,22 @@ class Int32(Int):
|
|
|
187
187
|
return read_converter(dialect).int32(value)
|
|
188
188
|
|
|
189
189
|
|
|
190
|
+
class UInt32(Int):
|
|
191
|
+
def load_dialect_impl(self, dialect):
|
|
192
|
+
return converter(dialect).uint32()
|
|
193
|
+
|
|
194
|
+
@staticmethod
|
|
195
|
+
def default_value(dialect):
|
|
196
|
+
return type_defaults(dialect).uint32()
|
|
197
|
+
|
|
198
|
+
@staticmethod
|
|
199
|
+
def db_default_value(dialect):
|
|
200
|
+
return db_defaults(dialect).uint32()
|
|
201
|
+
|
|
202
|
+
def on_read_convert(self, value, dialect):
|
|
203
|
+
return read_converter(dialect).uint32(value)
|
|
204
|
+
|
|
205
|
+
|
|
190
206
|
class Int64(Int):
|
|
191
207
|
def load_dialect_impl(self, dialect):
|
|
192
208
|
return converter(dialect).int64()
|
|
@@ -395,6 +411,9 @@ class TypeReadConverter:
|
|
|
395
411
|
def int32(self, value):
|
|
396
412
|
return value
|
|
397
413
|
|
|
414
|
+
def uint32(self, value):
|
|
415
|
+
return value
|
|
416
|
+
|
|
398
417
|
def int64(self, value):
|
|
399
418
|
return value
|
|
400
419
|
|
|
@@ -421,6 +440,8 @@ class TypeReadConverter:
|
|
|
421
440
|
|
|
422
441
|
def json(self, value):
|
|
423
442
|
if isinstance(value, str):
|
|
443
|
+
if value == "":
|
|
444
|
+
return {}
|
|
424
445
|
return orjson.loads(value)
|
|
425
446
|
return value
|
|
426
447
|
|
|
@@ -446,6 +467,9 @@ class TypeConverter:
|
|
|
446
467
|
def int32(self):
|
|
447
468
|
return self.int()
|
|
448
469
|
|
|
470
|
+
def uint32(self):
|
|
471
|
+
return self.int()
|
|
472
|
+
|
|
449
473
|
def int64(self):
|
|
450
474
|
return self.int()
|
|
451
475
|
|
|
@@ -487,6 +511,9 @@ class TypeDefaults:
|
|
|
487
511
|
def int32(self):
|
|
488
512
|
return None
|
|
489
513
|
|
|
514
|
+
def uint32(self):
|
|
515
|
+
return None
|
|
516
|
+
|
|
490
517
|
def int64(self):
|
|
491
518
|
return None
|
|
492
519
|
|
|
@@ -528,6 +555,9 @@ class DBDefaults:
|
|
|
528
555
|
def int32(self):
|
|
529
556
|
return self.int()
|
|
530
557
|
|
|
558
|
+
def uint32(self):
|
|
559
|
+
return self.int()
|
|
560
|
+
|
|
531
561
|
def int64(self):
|
|
532
562
|
return self.int()
|
|
533
563
|
|
|
@@ -561,6 +591,7 @@ TYPES = [
|
|
|
561
591
|
Boolean,
|
|
562
592
|
Int,
|
|
563
593
|
Int32,
|
|
594
|
+
UInt32,
|
|
564
595
|
Int64,
|
|
565
596
|
UInt64,
|
|
566
597
|
Float,
|
|
@@ -19,8 +19,12 @@ llm_and_nlp_examples = sorted(
|
|
|
19
19
|
[
|
|
20
20
|
filename
|
|
21
21
|
for filename in glob.glob("examples/llm_and_nlp/**/*.py", recursive=True)
|
|
22
|
-
# no anthropic token
|
|
22
|
+
# no anthropic token, HF runs against actual API - thus run it only once
|
|
23
23
|
if "claude" not in filename
|
|
24
|
+
and (
|
|
25
|
+
"hf-" not in filename
|
|
26
|
+
or (sys.platform == "darwin" and sys.version_info >= (3, 12))
|
|
27
|
+
)
|
|
24
28
|
]
|
|
25
29
|
)
|
|
26
30
|
|
|
@@ -448,6 +448,30 @@ def test_show_no_truncate(capsys, test_session):
|
|
|
448
448
|
assert details[i] in normalized_output
|
|
449
449
|
|
|
450
450
|
|
|
451
|
+
@pytest.mark.parametrize("ordered_by", ["letter", "number"])
|
|
452
|
+
def test_show_ordered(capsys, test_session, ordered_by):
|
|
453
|
+
numbers = [6, 2, 3, 1, 5, 7, 4]
|
|
454
|
+
letters = ["u", "y", "x", "z", "v", "t", "w"]
|
|
455
|
+
|
|
456
|
+
DataChain.from_values(
|
|
457
|
+
number=numbers, letter=letters, session=test_session
|
|
458
|
+
).order_by(ordered_by).show()
|
|
459
|
+
|
|
460
|
+
captured = capsys.readouterr()
|
|
461
|
+
normalized_lines = [
|
|
462
|
+
re.sub(r"\s+", " ", line).strip() for line in captured.out.strip().split("\n")
|
|
463
|
+
]
|
|
464
|
+
|
|
465
|
+
ordered_entries = sorted(
|
|
466
|
+
zip(numbers, letters), key=lambda x: x[0 if ordered_by == "number" else 1]
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
assert normalized_lines[0].strip() == "number letter"
|
|
470
|
+
for i, line in enumerate(normalized_lines[1:]):
|
|
471
|
+
number, letter = ordered_entries[i]
|
|
472
|
+
assert line == f"{i} {number} {letter}"
|
|
473
|
+
|
|
474
|
+
|
|
451
475
|
def test_from_storage_dataset_stats(tmp_dir, test_session):
|
|
452
476
|
for i in range(4):
|
|
453
477
|
(tmp_dir / f"file{i}.txt").write_text(f"file{i}")
|
|
@@ -5,7 +5,7 @@ from unittest.mock import ANY
|
|
|
5
5
|
import pytest
|
|
6
6
|
import sqlalchemy as sa
|
|
7
7
|
|
|
8
|
-
from datachain.data_storage.
|
|
8
|
+
from datachain.data_storage.schema import DataTable
|
|
9
9
|
from datachain.dataset import DatasetDependencyType, DatasetStatus
|
|
10
10
|
from datachain.error import (
|
|
11
11
|
DatasetInvalidVersionError,
|
|
@@ -827,10 +827,7 @@ def test_row_random(cloud_test_catalog):
|
|
|
827
827
|
# Random values are unique
|
|
828
828
|
assert len(set(random_values)) == len(random_values)
|
|
829
829
|
|
|
830
|
-
|
|
831
|
-
RAND_MAX = 2**63 # noqa: N806
|
|
832
|
-
else:
|
|
833
|
-
RAND_MAX = 2**64 # noqa: N806
|
|
830
|
+
RAND_MAX = DataTable.MAX_RANDOM # noqa: N806
|
|
834
831
|
|
|
835
832
|
# Values are drawn uniformly from range(2**63)
|
|
836
833
|
assert 0 <= min(random_values) < 0.4 * RAND_MAX
|
|
@@ -17,7 +17,7 @@ def test_listing_generator(cloud_test_catalog, cloud_type):
|
|
|
17
17
|
entries = sorted(
|
|
18
18
|
[e for e in ENTRIES if e.path.startswith("cats/")], key=lambda e: e.path
|
|
19
19
|
)
|
|
20
|
-
files =
|
|
20
|
+
files = dc.order_by("file.path").collect("file")
|
|
21
21
|
|
|
22
22
|
for cat_file, cat_entry in zip(files, entries):
|
|
23
23
|
assert cat_file.source == ctc.src_uri
|
|
@@ -49,19 +49,17 @@ def dog_entries_parquet_lz4(dog_entries) -> bytes:
|
|
|
49
49
|
"""
|
|
50
50
|
adapted = {}
|
|
51
51
|
for k, v in row.items():
|
|
52
|
-
if isinstance(v,
|
|
53
|
-
adapted[k] = v.encode("utf-8")
|
|
54
|
-
elif isinstance(v, datetime):
|
|
52
|
+
if isinstance(v, datetime):
|
|
55
53
|
adapted[k] = v.timestamp()
|
|
56
54
|
elif v is None:
|
|
57
|
-
adapted[k] =
|
|
55
|
+
adapted[k] = ""
|
|
58
56
|
else:
|
|
59
57
|
adapted[k] = v
|
|
60
58
|
|
|
61
59
|
adapted["sys__id"] = 1
|
|
62
60
|
adapted["sys__rand"] = 1
|
|
63
|
-
adapted["file__location"] =
|
|
64
|
-
adapted["file__source"] =
|
|
61
|
+
adapted["file__location"] = ""
|
|
62
|
+
adapted["file__source"] = "s3://dogs"
|
|
65
63
|
return adapted
|
|
66
64
|
|
|
67
65
|
dog_entries = [_adapt_row(e) for e in dog_entries]
|
|
@@ -1824,6 +1824,32 @@ def test_order_by_with_nested_columns(test_session, with_function):
|
|
|
1824
1824
|
]
|
|
1825
1825
|
|
|
1826
1826
|
|
|
1827
|
+
def test_order_by_collect(test_session):
|
|
1828
|
+
numbers = [6, 2, 3, 1, 5, 7, 4]
|
|
1829
|
+
letters = ["u", "y", "x", "z", "v", "t", "w"]
|
|
1830
|
+
|
|
1831
|
+
dc = DataChain.from_values(number=numbers, letter=letters, session=test_session)
|
|
1832
|
+
assert list(dc.order_by("number").collect()) == [
|
|
1833
|
+
(1, "z"),
|
|
1834
|
+
(2, "y"),
|
|
1835
|
+
(3, "x"),
|
|
1836
|
+
(4, "w"),
|
|
1837
|
+
(5, "v"),
|
|
1838
|
+
(6, "u"),
|
|
1839
|
+
(7, "t"),
|
|
1840
|
+
]
|
|
1841
|
+
|
|
1842
|
+
assert list(dc.order_by("letter").collect()) == [
|
|
1843
|
+
(7, "t"),
|
|
1844
|
+
(6, "u"),
|
|
1845
|
+
(5, "v"),
|
|
1846
|
+
(4, "w"),
|
|
1847
|
+
(3, "x"),
|
|
1848
|
+
(2, "y"),
|
|
1849
|
+
(1, "z"),
|
|
1850
|
+
]
|
|
1851
|
+
|
|
1852
|
+
|
|
1827
1853
|
@pytest.mark.parametrize("with_function", [True, False])
|
|
1828
1854
|
def test_order_by_descending(test_session, with_function):
|
|
1829
1855
|
names = ["a.txt", "c.txt", "d.txt", "a.txt", "b.txt"]
|
|
@@ -1852,7 +1878,7 @@ def test_union(test_session):
|
|
|
1852
1878
|
chain2 = DataChain.from_values(value=[3, 4], session=test_session)
|
|
1853
1879
|
chain3 = chain1 | chain2
|
|
1854
1880
|
assert chain3.count() == 4
|
|
1855
|
-
assert
|
|
1881
|
+
assert list(chain3.order_by("value").collect("value")) == [1, 2, 3, 4]
|
|
1856
1882
|
|
|
1857
1883
|
|
|
1858
1884
|
def test_union_different_columns(test_session):
|
|
@@ -1887,7 +1913,7 @@ def test_union_different_column_order(test_session):
|
|
|
1887
1913
|
chain2 = DataChain.from_values(
|
|
1888
1914
|
name=["different", "order"], value=[9, 10], session=test_session
|
|
1889
1915
|
)
|
|
1890
|
-
assert
|
|
1916
|
+
assert list(chain1.union(chain2).order_by("value").collect()) == [
|
|
1891
1917
|
(1, "chain"),
|
|
1892
1918
|
(2, "more"),
|
|
1893
1919
|
(9, "different"),
|
|
@@ -27,6 +27,7 @@ from datachain.sql.types import (
|
|
|
27
27
|
Int32,
|
|
28
28
|
Int64,
|
|
29
29
|
String,
|
|
30
|
+
UInt32,
|
|
30
31
|
UInt64,
|
|
31
32
|
)
|
|
32
33
|
|
|
@@ -721,6 +722,7 @@ def test_mutate_change_type():
|
|
|
721
722
|
[Boolean, bool],
|
|
722
723
|
[Int, int],
|
|
723
724
|
[Int32, int],
|
|
725
|
+
[UInt32, int],
|
|
724
726
|
[Int64, int],
|
|
725
727
|
[UInt64, int],
|
|
726
728
|
[Float, float],
|
|
@@ -18,6 +18,7 @@ from datachain.sql.types import (
|
|
|
18
18
|
Int32,
|
|
19
19
|
Int64,
|
|
20
20
|
String,
|
|
21
|
+
UInt32,
|
|
21
22
|
UInt64,
|
|
22
23
|
)
|
|
23
24
|
from tests.utils import (
|
|
@@ -173,6 +174,7 @@ def test_convert_type(cloud_test_catalog):
|
|
|
173
174
|
[Boolean(), False],
|
|
174
175
|
[Int(), 0],
|
|
175
176
|
[Int32(), 0],
|
|
177
|
+
[UInt32(), 0],
|
|
176
178
|
[Int64(), 0],
|
|
177
179
|
[UInt64(), 0],
|
|
178
180
|
[Float(), lambda val: math.isnan(val)],
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|