datachain 0.8.9__tar.gz → 0.8.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.8.9 → datachain-0.8.10}/PKG-INFO +1 -1
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/cli/__init__.py +2 -3
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/cli/parser/__init__.py +20 -15
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/cli/parser/job.py +1 -1
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/cli/parser/studio.py +29 -29
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/cli/parser/utils.py +1 -1
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/cli/utils.py +1 -1
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/data_storage/sqlite.py +38 -7
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/data_storage/warehouse.py +2 -2
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/convert/python_to_sql.py +15 -3
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/dc.py +3 -1
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/query/dataset.py +8 -4
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/remote/studio.py +2 -2
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/sqlite/base.py +35 -14
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/studio.py +7 -7
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain.egg-info/PKG-INFO +1 -1
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain.egg-info/SOURCES.txt +1 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/test_cli_studio.py +9 -9
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_datachain_merge.py +48 -1
- datachain-0.8.10/tests/unit/lib/test_python_to_sql.py +45 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_utils.py +3 -19
- {datachain-0.8.9 → datachain-0.8.10}/.cruft.json +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/.gitattributes +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/.github/codecov.yaml +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/.github/dependabot.yml +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/.github/workflows/release.yml +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/.github/workflows/tests.yml +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/.gitignore +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/.pre-commit-config.yaml +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/LICENSE +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/README.rst +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/docs/assets/datachain.svg +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/docs/contributing.md +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/docs/examples.md +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/docs/index.md +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/docs/overrides/main.html +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/docs/quick-start.md +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/docs/references/datachain.md +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/docs/references/datatype.md +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/docs/references/file.md +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/docs/references/index.md +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/docs/references/sql.md +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/docs/references/torch.md +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/docs/references/udf.md +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/docs/tutorials.md +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/examples/llm_and_nlp/unstructured-embeddings-gen.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/examples/llm_and_nlp/unstructured-summary-map.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/examples/multimodal/wds.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/mkdocs.yml +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/noxfile.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/pyproject.toml +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/setup.cfg +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/__init__.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/__main__.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/asyn.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/cache.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/catalog/catalog.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/client/__init__.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/client/azure.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/client/gcs.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/client/hf.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/client/local.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/client/s3.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/config.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/dataset.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/error.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/func/__init__.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/func/array.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/func/base.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/func/conditional.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/func/func.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/func/numeric.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/func/path.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/func/random.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/func/string.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/func/window.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/job.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/clip.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/file.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/hf.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/image.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/listing.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/settings.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/tar.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/text.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/udf.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/utils.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/vfile.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/listing.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/model/__init__.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/model/bbox.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/model/pose.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/model/segment.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/node.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/progress.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/py.typed +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/query/__init__.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/query/batch.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/query/metrics.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/query/params.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/query/queue.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/query/schema.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/query/session.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/query/udf.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/query/utils.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/types.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/sql/utils.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/telemetry.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain/utils.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain.egg-info/requires.txt +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/__init__.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/conftest.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/data.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/examples/__init__.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/examples/test_examples.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/examples/wds_data.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/func/__init__.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/func/test_catalog.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/func/test_client.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/func/test_datachain.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/func/test_datasets.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/func/test_listing.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/func/test_ls.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/func/test_metrics.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/func/test_pull.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/func/test_pytorch.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/func/test_query.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/func/test_session.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/func/test_toolkit.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/scripts/feature_class.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/test_atomicity.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/test_cli_e2e.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/test_query_e2e.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/test_telemetry.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/__init__.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_datachain.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_models.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_asyn.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_cache.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_catalog.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_client.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_config.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_dataset.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_diff.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_func.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_listing.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_metastore.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_query.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_query_params.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_serializer.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_session.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_utils.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.8.9 → datachain-0.8.10}/tests/utils.py +0 -0
|
@@ -48,7 +48,6 @@ def main(argv: Optional[list[str]] = None) -> int:
|
|
|
48
48
|
logger.setLevel(logging_level)
|
|
49
49
|
|
|
50
50
|
client_config = {
|
|
51
|
-
"aws_endpoint_url": args.aws_endpoint_url,
|
|
52
51
|
"anon": args.anon,
|
|
53
52
|
}
|
|
54
53
|
|
|
@@ -73,7 +72,7 @@ def main(argv: Optional[list[str]] = None) -> int:
|
|
|
73
72
|
|
|
74
73
|
def handle_command(args, catalog, client_config) -> int:
|
|
75
74
|
"""Handle the different CLI commands."""
|
|
76
|
-
from datachain.studio import
|
|
75
|
+
from datachain.studio import process_auth_cli_args, process_jobs_args
|
|
77
76
|
|
|
78
77
|
command_handlers = {
|
|
79
78
|
"cp": lambda: handle_cp_command(args, catalog),
|
|
@@ -89,7 +88,7 @@ def handle_command(args, catalog, client_config) -> int:
|
|
|
89
88
|
"query": lambda: handle_query_command(args, catalog),
|
|
90
89
|
"clear-cache": lambda: clear_cache(catalog),
|
|
91
90
|
"gc": lambda: garbage_collect(catalog),
|
|
92
|
-
"
|
|
91
|
+
"auth": lambda: process_auth_cli_args(args),
|
|
93
92
|
"job": lambda: process_jobs_args(args),
|
|
94
93
|
}
|
|
95
94
|
|
|
@@ -7,7 +7,7 @@ import shtab
|
|
|
7
7
|
from datachain.cli.utils import BooleanOptionalAction, KeyValueArgs
|
|
8
8
|
|
|
9
9
|
from .job import add_jobs_parser
|
|
10
|
-
from .studio import
|
|
10
|
+
from .studio import add_auth_parser
|
|
11
11
|
from .utils import FIND_COLUMNS, add_show_args, add_sources_arg, find_columns_type
|
|
12
12
|
|
|
13
13
|
|
|
@@ -26,24 +26,25 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
|
|
|
26
26
|
|
|
27
27
|
parent_parser = ArgumentParser(add_help=False)
|
|
28
28
|
parent_parser.add_argument(
|
|
29
|
-
"
|
|
30
|
-
type=str,
|
|
31
|
-
help="AWS endpoint URL",
|
|
32
|
-
)
|
|
33
|
-
parent_parser.add_argument(
|
|
34
|
-
"--anon",
|
|
35
|
-
action="store_true",
|
|
36
|
-
help="anon flag for remote storage (like awscli's --no-sign-request)",
|
|
29
|
+
"-v", "--verbose", action="count", default=0, help="Be verbose"
|
|
37
30
|
)
|
|
38
31
|
parent_parser.add_argument(
|
|
39
|
-
"-
|
|
32
|
+
"-q", "--quiet", action="count", default=0, help="Be quiet"
|
|
40
33
|
)
|
|
34
|
+
|
|
41
35
|
parent_parser.add_argument(
|
|
42
|
-
"
|
|
36
|
+
"--anon",
|
|
37
|
+
action="store_true",
|
|
38
|
+
help="Use anonymous access to storage",
|
|
43
39
|
)
|
|
44
40
|
parent_parser.add_argument(
|
|
45
|
-
"-
|
|
41
|
+
"-u",
|
|
42
|
+
"--update",
|
|
43
|
+
action="count",
|
|
44
|
+
default=0,
|
|
45
|
+
help="Update cached list of files for the sources",
|
|
46
46
|
)
|
|
47
|
+
|
|
47
48
|
parent_parser.add_argument(
|
|
48
49
|
"--debug-sql",
|
|
49
50
|
action="store_true",
|
|
@@ -67,7 +68,9 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
|
|
|
67
68
|
"cp", parents=[parent_parser], description="Copy data files from the cloud."
|
|
68
69
|
)
|
|
69
70
|
add_sources_arg(parse_cp).complete = shtab.DIR # type: ignore[attr-defined]
|
|
70
|
-
parse_cp.add_argument(
|
|
71
|
+
parse_cp.add_argument(
|
|
72
|
+
"output", type=str, help="Path to a directory or file to put data to"
|
|
73
|
+
)
|
|
71
74
|
parse_cp.add_argument(
|
|
72
75
|
"-f",
|
|
73
76
|
"--force",
|
|
@@ -94,7 +97,9 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
|
|
|
94
97
|
"clone", parents=[parent_parser], description="Copy data files from the cloud."
|
|
95
98
|
)
|
|
96
99
|
add_sources_arg(parse_clone).complete = shtab.DIR # type: ignore[attr-defined]
|
|
97
|
-
parse_clone.add_argument(
|
|
100
|
+
parse_clone.add_argument(
|
|
101
|
+
"output", type=str, help="Path to a directory or file to put data to"
|
|
102
|
+
)
|
|
98
103
|
parse_clone.add_argument(
|
|
99
104
|
"-f",
|
|
100
105
|
"--force",
|
|
@@ -123,7 +128,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
|
|
|
123
128
|
help="Do not copy files, just create a dataset",
|
|
124
129
|
)
|
|
125
130
|
|
|
126
|
-
|
|
131
|
+
add_auth_parser(subp, parent_parser)
|
|
127
132
|
add_jobs_parser(subp, parent_parser)
|
|
128
133
|
|
|
129
134
|
datasets_parser = subp.add_parser(
|
|
@@ -6,7 +6,7 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
|
|
|
6
6
|
)
|
|
7
7
|
jobs_subparser = jobs_parser.add_subparsers(
|
|
8
8
|
dest="cmd",
|
|
9
|
-
help="Use `datachain
|
|
9
|
+
help="Use `datachain auth CMD --help` to display command-specific help",
|
|
10
10
|
)
|
|
11
11
|
|
|
12
12
|
studio_run_help = "Run a job in Studio"
|
|
@@ -1,31 +1,31 @@
|
|
|
1
|
-
def
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
def add_auth_parser(subparsers, parent_parser) -> None:
|
|
2
|
+
auth_help = "Manage Studio authentication"
|
|
3
|
+
auth_description = (
|
|
4
4
|
"Manage authentication and settings for Studio. "
|
|
5
5
|
"Configure tokens for sharing datasets and using Studio features."
|
|
6
6
|
)
|
|
7
7
|
|
|
8
|
-
|
|
9
|
-
"
|
|
8
|
+
auth_parser = subparsers.add_parser(
|
|
9
|
+
"auth",
|
|
10
10
|
parents=[parent_parser],
|
|
11
|
-
description=
|
|
12
|
-
help=
|
|
11
|
+
description=auth_description,
|
|
12
|
+
help=auth_help,
|
|
13
13
|
)
|
|
14
|
-
|
|
14
|
+
auth_subparser = auth_parser.add_subparsers(
|
|
15
15
|
dest="cmd",
|
|
16
|
-
help="Use `datachain
|
|
16
|
+
help="Use `datachain auth CMD --help` to display command-specific help",
|
|
17
17
|
)
|
|
18
18
|
|
|
19
|
-
|
|
20
|
-
|
|
19
|
+
auth_login_help = "Authenticate with Studio"
|
|
20
|
+
auth_login_description = (
|
|
21
21
|
"Authenticate with Studio using default scopes. "
|
|
22
22
|
"A random name will be assigned as the token name if not specified."
|
|
23
23
|
)
|
|
24
|
-
login_parser =
|
|
24
|
+
login_parser = auth_subparser.add_parser(
|
|
25
25
|
"login",
|
|
26
26
|
parents=[parent_parser],
|
|
27
|
-
description=
|
|
28
|
-
help=
|
|
27
|
+
description=auth_login_description,
|
|
28
|
+
help=auth_login_help,
|
|
29
29
|
)
|
|
30
30
|
|
|
31
31
|
login_parser.add_argument(
|
|
@@ -58,26 +58,26 @@ def add_studio_parser(subparsers, parent_parser) -> None:
|
|
|
58
58
|
help="Use code-based authentication without browser",
|
|
59
59
|
)
|
|
60
60
|
|
|
61
|
-
|
|
62
|
-
|
|
61
|
+
auth_logout_help = "Log out from Studio"
|
|
62
|
+
auth_logout_description = (
|
|
63
63
|
"Remove the Studio authentication token from global config."
|
|
64
64
|
)
|
|
65
65
|
|
|
66
|
-
|
|
66
|
+
auth_subparser.add_parser(
|
|
67
67
|
"logout",
|
|
68
68
|
parents=[parent_parser],
|
|
69
|
-
description=
|
|
70
|
-
help=
|
|
69
|
+
description=auth_logout_description,
|
|
70
|
+
help=auth_logout_help,
|
|
71
71
|
)
|
|
72
72
|
|
|
73
|
-
|
|
74
|
-
|
|
73
|
+
auth_team_help = "Set default team for Studio operations"
|
|
74
|
+
auth_team_description = "Set the default team for Studio operations."
|
|
75
75
|
|
|
76
|
-
team_parser =
|
|
76
|
+
team_parser = auth_subparser.add_parser(
|
|
77
77
|
"team",
|
|
78
78
|
parents=[parent_parser],
|
|
79
|
-
description=
|
|
80
|
-
help=
|
|
79
|
+
description=auth_team_description,
|
|
80
|
+
help=auth_team_help,
|
|
81
81
|
)
|
|
82
82
|
team_parser.add_argument(
|
|
83
83
|
"team_name",
|
|
@@ -91,12 +91,12 @@ def add_studio_parser(subparsers, parent_parser) -> None:
|
|
|
91
91
|
help="Set team globally for all projects",
|
|
92
92
|
)
|
|
93
93
|
|
|
94
|
-
|
|
95
|
-
|
|
94
|
+
auth_token_help = "View Studio authentication token" # noqa: S105
|
|
95
|
+
auth_token_description = "Display the current authentication token for Studio." # noqa: S105
|
|
96
96
|
|
|
97
|
-
|
|
97
|
+
auth_subparser.add_parser(
|
|
98
98
|
"token",
|
|
99
99
|
parents=[parent_parser],
|
|
100
|
-
description=
|
|
101
|
-
help=
|
|
100
|
+
description=auth_token_description,
|
|
101
|
+
help=auth_token_help,
|
|
102
102
|
)
|
|
@@ -30,7 +30,7 @@ def add_sources_arg(parser: ArgumentParser, nargs: Union[str, int] = "+") -> Act
|
|
|
30
30
|
"sources",
|
|
31
31
|
type=str,
|
|
32
32
|
nargs=nargs,
|
|
33
|
-
help="Data sources - paths to
|
|
33
|
+
help="Data sources - paths to source storage directories or files",
|
|
34
34
|
)
|
|
35
35
|
|
|
36
36
|
|
|
@@ -87,7 +87,7 @@ def get_logging_level(args: Namespace) -> int:
|
|
|
87
87
|
def determine_flavors(studio: bool, local: bool, all: bool, token: Optional[str]):
|
|
88
88
|
if studio and not token:
|
|
89
89
|
raise DataChainError(
|
|
90
|
-
"Not logged in to Studio. Log in with 'datachain
|
|
90
|
+
"Not logged in to Studio. Log in with 'datachain auth login'."
|
|
91
91
|
)
|
|
92
92
|
|
|
93
93
|
if local or studio:
|
|
@@ -19,6 +19,7 @@ from sqlalchemy import MetaData, Table, UniqueConstraint, exists, select
|
|
|
19
19
|
from sqlalchemy.dialects import sqlite
|
|
20
20
|
from sqlalchemy.schema import CreateIndex, CreateTable, DropTable
|
|
21
21
|
from sqlalchemy.sql import func
|
|
22
|
+
from sqlalchemy.sql.elements import BinaryExpression, BooleanClauseList
|
|
22
23
|
from sqlalchemy.sql.expression import bindparam, cast
|
|
23
24
|
from sqlalchemy.sql.selectable import Select
|
|
24
25
|
from tqdm.auto import tqdm
|
|
@@ -40,7 +41,6 @@ if TYPE_CHECKING:
|
|
|
40
41
|
from sqlalchemy.schema import SchemaItem
|
|
41
42
|
from sqlalchemy.sql._typing import _FromClauseArgument, _OnClauseArgument
|
|
42
43
|
from sqlalchemy.sql.elements import ColumnElement
|
|
43
|
-
from sqlalchemy.sql.selectable import Join
|
|
44
44
|
from sqlalchemy.types import TypeEngine
|
|
45
45
|
|
|
46
46
|
from datachain.lib.file import File
|
|
@@ -654,16 +654,47 @@ class SQLiteWarehouse(AbstractWarehouse):
|
|
|
654
654
|
right: "_FromClauseArgument",
|
|
655
655
|
onclause: "_OnClauseArgument",
|
|
656
656
|
inner: bool = True,
|
|
657
|
-
|
|
657
|
+
full: bool = False,
|
|
658
|
+
columns=None,
|
|
659
|
+
) -> "Select":
|
|
658
660
|
"""
|
|
659
661
|
Join two tables together.
|
|
660
662
|
"""
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
663
|
+
if not full:
|
|
664
|
+
join_query = sqlalchemy.join(
|
|
665
|
+
left,
|
|
666
|
+
right,
|
|
667
|
+
onclause,
|
|
668
|
+
isouter=not inner,
|
|
669
|
+
)
|
|
670
|
+
return sqlalchemy.select(*columns).select_from(join_query)
|
|
671
|
+
|
|
672
|
+
left_right_join = sqlalchemy.select(*columns).select_from(
|
|
673
|
+
sqlalchemy.join(left, right, onclause, isouter=True)
|
|
666
674
|
)
|
|
675
|
+
right_left_join = sqlalchemy.select(*columns).select_from(
|
|
676
|
+
sqlalchemy.join(right, left, onclause, isouter=True)
|
|
677
|
+
)
|
|
678
|
+
|
|
679
|
+
def add_left_rows_filter(exp: BinaryExpression):
|
|
680
|
+
"""
|
|
681
|
+
Adds filter to right_left_join to remove unmatched left table rows by
|
|
682
|
+
getting column names that need to be NULL from BinaryExpressions in onclause
|
|
683
|
+
"""
|
|
684
|
+
return right_left_join.where(
|
|
685
|
+
getattr(left.c, exp.left.name) == None # type: ignore[union-attr] # noqa: E711
|
|
686
|
+
)
|
|
687
|
+
|
|
688
|
+
if isinstance(onclause, BinaryExpression):
|
|
689
|
+
right_left_join = add_left_rows_filter(onclause)
|
|
690
|
+
|
|
691
|
+
if isinstance(onclause, BooleanClauseList):
|
|
692
|
+
for c in onclause.get_children():
|
|
693
|
+
if isinstance(c, BinaryExpression):
|
|
694
|
+
right_left_join = add_left_rows_filter(c)
|
|
695
|
+
|
|
696
|
+
union = sqlalchemy.union(left_right_join, right_left_join).subquery()
|
|
697
|
+
return sqlalchemy.select(*union.c).select_from(union)
|
|
667
698
|
|
|
668
699
|
def create_pre_udf_table(self, query: "Select") -> "Table":
|
|
669
700
|
"""
|
|
@@ -31,7 +31,7 @@ if TYPE_CHECKING:
|
|
|
31
31
|
_FromClauseArgument,
|
|
32
32
|
_OnClauseArgument,
|
|
33
33
|
)
|
|
34
|
-
from sqlalchemy.sql.selectable import
|
|
34
|
+
from sqlalchemy.sql.selectable import Select
|
|
35
35
|
from sqlalchemy.types import TypeEngine
|
|
36
36
|
|
|
37
37
|
from datachain.data_storage import schema
|
|
@@ -873,7 +873,7 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
873
873
|
right: "_FromClauseArgument",
|
|
874
874
|
onclause: "_OnClauseArgument",
|
|
875
875
|
inner: bool = True,
|
|
876
|
-
) -> "
|
|
876
|
+
) -> "Select":
|
|
877
877
|
"""
|
|
878
878
|
Join two tables together.
|
|
879
879
|
"""
|
|
@@ -52,15 +52,15 @@ def python_to_sql(typ): # noqa: PLR0911
|
|
|
52
52
|
|
|
53
53
|
args = get_args(typ)
|
|
54
54
|
if inspect.isclass(orig) and (issubclass(list, orig) or issubclass(tuple, orig)):
|
|
55
|
-
if args is None
|
|
55
|
+
if args is None:
|
|
56
56
|
raise TypeError(f"Cannot resolve type '{typ}' for flattening features")
|
|
57
57
|
|
|
58
58
|
args0 = args[0]
|
|
59
59
|
if ModelStore.is_pydantic(args0):
|
|
60
60
|
return Array(JSON())
|
|
61
61
|
|
|
62
|
-
|
|
63
|
-
return Array(
|
|
62
|
+
list_type = list_of_args_to_type(args)
|
|
63
|
+
return Array(list_type)
|
|
64
64
|
|
|
65
65
|
if orig is Annotated:
|
|
66
66
|
# Ignoring annotations
|
|
@@ -82,6 +82,18 @@ def python_to_sql(typ): # noqa: PLR0911
|
|
|
82
82
|
raise TypeError(f"Cannot recognize type {typ}")
|
|
83
83
|
|
|
84
84
|
|
|
85
|
+
def list_of_args_to_type(args) -> SQLType:
|
|
86
|
+
first_type = python_to_sql(args[0])
|
|
87
|
+
for next_arg in args[1:]:
|
|
88
|
+
try:
|
|
89
|
+
next_type = python_to_sql(next_arg)
|
|
90
|
+
if next_type != first_type:
|
|
91
|
+
return JSON()
|
|
92
|
+
except TypeError:
|
|
93
|
+
return JSON()
|
|
94
|
+
return first_type
|
|
95
|
+
|
|
96
|
+
|
|
85
97
|
def _is_json_inside_union(orig, args) -> bool:
|
|
86
98
|
if orig == Union and len(args) >= 2:
|
|
87
99
|
# List in JSON: Union[dict, list[dict]]
|
|
@@ -1332,6 +1332,7 @@ class DataChain:
|
|
|
1332
1332
|
on: Union[MergeColType, Sequence[MergeColType]],
|
|
1333
1333
|
right_on: Optional[Union[MergeColType, Sequence[MergeColType]]] = None,
|
|
1334
1334
|
inner=False,
|
|
1335
|
+
full=False,
|
|
1335
1336
|
rname="right_",
|
|
1336
1337
|
) -> "Self":
|
|
1337
1338
|
"""Merge two chains based on the specified criteria.
|
|
@@ -1345,6 +1346,7 @@ class DataChain:
|
|
|
1345
1346
|
right_on: Optional predicate or list of Predicates for the `right_ds`
|
|
1346
1347
|
to join.
|
|
1347
1348
|
inner (bool): Whether to run inner join or outer join.
|
|
1349
|
+
full (bool): Whether to run full outer join.
|
|
1348
1350
|
rname (str): Name prefix for conflicting signal names.
|
|
1349
1351
|
|
|
1350
1352
|
Examples:
|
|
@@ -1419,7 +1421,7 @@ class DataChain:
|
|
|
1419
1421
|
)
|
|
1420
1422
|
|
|
1421
1423
|
query = self._query.join(
|
|
1422
|
-
right_ds._query, sqlalchemy.and_(*ops), inner, rname + "{name}"
|
|
1424
|
+
right_ds._query, sqlalchemy.and_(*ops), inner, full, rname + "{name}"
|
|
1423
1425
|
)
|
|
1424
1426
|
query.feature_schema = None
|
|
1425
1427
|
ds = self._evolve(query=query)
|
|
@@ -875,6 +875,7 @@ class SQLJoin(Step):
|
|
|
875
875
|
query2: "DatasetQuery"
|
|
876
876
|
predicates: Union[JoinPredicateType, tuple[JoinPredicateType, ...]]
|
|
877
877
|
inner: bool
|
|
878
|
+
full: bool
|
|
878
879
|
rname: str
|
|
879
880
|
|
|
880
881
|
def get_query(self, dq: "DatasetQuery", temp_tables: list[str]) -> sa.Subquery:
|
|
@@ -977,14 +978,14 @@ class SQLJoin(Step):
|
|
|
977
978
|
self.validate_expression(join_expression, q1, q2)
|
|
978
979
|
|
|
979
980
|
def q(*columns):
|
|
980
|
-
|
|
981
|
+
return self.catalog.warehouse.join(
|
|
981
982
|
q1,
|
|
982
983
|
q2,
|
|
983
984
|
join_expression,
|
|
984
985
|
inner=self.inner,
|
|
986
|
+
full=self.full,
|
|
987
|
+
columns=columns,
|
|
985
988
|
)
|
|
986
|
-
return sqlalchemy.select(*columns).select_from(join_query)
|
|
987
|
-
# return sqlalchemy.select(*subquery.c).select_from(subquery)
|
|
988
989
|
|
|
989
990
|
return step_result(
|
|
990
991
|
q,
|
|
@@ -1489,6 +1490,7 @@ class DatasetQuery:
|
|
|
1489
1490
|
dataset_query: "DatasetQuery",
|
|
1490
1491
|
predicates: Union[JoinPredicateType, Sequence[JoinPredicateType]],
|
|
1491
1492
|
inner=False,
|
|
1493
|
+
full=False,
|
|
1492
1494
|
rname="{name}_right",
|
|
1493
1495
|
) -> "Self":
|
|
1494
1496
|
left = self.clone(new_table=False)
|
|
@@ -1504,7 +1506,9 @@ class DatasetQuery:
|
|
|
1504
1506
|
if isinstance(predicates, (str, ColumnClause, ColumnElement))
|
|
1505
1507
|
else tuple(predicates)
|
|
1506
1508
|
)
|
|
1507
|
-
new_query.steps = [
|
|
1509
|
+
new_query.steps = [
|
|
1510
|
+
SQLJoin(self.catalog, left, right, predicates, inner, full, rname)
|
|
1511
|
+
]
|
|
1508
1512
|
return new_query
|
|
1509
1513
|
|
|
1510
1514
|
@detach
|
|
@@ -75,7 +75,7 @@ class StudioClient:
|
|
|
75
75
|
|
|
76
76
|
if not token:
|
|
77
77
|
raise DataChainError(
|
|
78
|
-
"Studio token is not set. Use `datachain
|
|
78
|
+
"Studio token is not set. Use `datachain auth login` "
|
|
79
79
|
"or environment variable `DVC_STUDIO_TOKEN` to set it."
|
|
80
80
|
)
|
|
81
81
|
|
|
@@ -105,7 +105,7 @@ class StudioClient:
|
|
|
105
105
|
if not team:
|
|
106
106
|
raise DataChainError(
|
|
107
107
|
"Studio team is not set. "
|
|
108
|
-
"Use `datachain
|
|
108
|
+
"Use `datachain auth team <team_name>` "
|
|
109
109
|
"or environment variable `DVC_STUDIO_TEAM` to set it."
|
|
110
110
|
"You can also set it in the config file as team under studio."
|
|
111
111
|
)
|
|
@@ -4,6 +4,7 @@ import sqlite3
|
|
|
4
4
|
import warnings
|
|
5
5
|
from collections.abc import Iterable
|
|
6
6
|
from datetime import MAXYEAR, MINYEAR, datetime, timezone
|
|
7
|
+
from functools import cache
|
|
7
8
|
from types import MappingProxyType
|
|
8
9
|
from typing import Callable, Optional
|
|
9
10
|
|
|
@@ -526,24 +527,44 @@ def compile_collect(element, compiler, **kwargs):
|
|
|
526
527
|
return compiler.process(func.json_group_array(*element.clauses.clauses), **kwargs)
|
|
527
528
|
|
|
528
529
|
|
|
529
|
-
|
|
530
|
+
@cache
|
|
531
|
+
def usearch_sqlite_path() -> Optional[str]:
|
|
530
532
|
try:
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
533
|
+
import usearch
|
|
534
|
+
except ImportError:
|
|
535
|
+
return None
|
|
534
536
|
|
|
535
|
-
|
|
537
|
+
with warnings.catch_warnings():
|
|
538
|
+
# usearch binary is not available for Windows, see: https://github.com/unum-cloud/usearch/issues/427.
|
|
539
|
+
# and, sometimes fail to download the binary in other platforms
|
|
540
|
+
# triggering UserWarning.
|
|
536
541
|
|
|
537
|
-
|
|
538
|
-
# usearch binary is not available for Windows, see: https://github.com/unum-cloud/usearch/issues/427.
|
|
539
|
-
# and, sometimes fail to download the binary in other platforms
|
|
540
|
-
# triggering UserWarning.
|
|
542
|
+
warnings.filterwarnings("ignore", category=UserWarning, module="usearch")
|
|
541
543
|
|
|
542
|
-
|
|
543
|
-
|
|
544
|
+
try:
|
|
545
|
+
return usearch.sqlite_path()
|
|
546
|
+
except FileNotFoundError:
|
|
547
|
+
return None
|
|
544
548
|
|
|
545
|
-
conn.enable_load_extension(False)
|
|
546
|
-
return True
|
|
547
549
|
|
|
548
|
-
|
|
550
|
+
def load_usearch_extension(conn: sqlite3.Connection) -> bool:
|
|
551
|
+
# usearch is part of the vector optional dependencies
|
|
552
|
+
# we use the extension's cosine and euclidean distance functions
|
|
553
|
+
ext_path = usearch_sqlite_path()
|
|
554
|
+
if ext_path is None:
|
|
555
|
+
return False
|
|
556
|
+
|
|
557
|
+
try:
|
|
558
|
+
conn.enable_load_extension(True)
|
|
559
|
+
except AttributeError:
|
|
560
|
+
# sqlite3 module is not built with loadable extension support by default.
|
|
561
|
+
return False
|
|
562
|
+
|
|
563
|
+
try:
|
|
564
|
+
conn.load_extension(ext_path)
|
|
565
|
+
except sqlite3.OperationalError:
|
|
549
566
|
return False
|
|
567
|
+
else:
|
|
568
|
+
return True
|
|
569
|
+
finally:
|
|
570
|
+
conn.enable_load_extension(False)
|
|
@@ -47,7 +47,7 @@ def process_jobs_args(args: "Namespace"):
|
|
|
47
47
|
raise DataChainError(f"Unknown command '{args.cmd}'.")
|
|
48
48
|
|
|
49
49
|
|
|
50
|
-
def
|
|
50
|
+
def process_auth_cli_args(args: "Namespace"):
|
|
51
51
|
if args.cmd is None:
|
|
52
52
|
print(
|
|
53
53
|
f"Use 'datachain {args.command} --help' to see available options",
|
|
@@ -95,7 +95,7 @@ def login(args: "Namespace"):
|
|
|
95
95
|
raise DataChainError(
|
|
96
96
|
"Token already exists. "
|
|
97
97
|
"To login with a different token, "
|
|
98
|
-
"logout using `datachain
|
|
98
|
+
"logout using `datachain auth logout`."
|
|
99
99
|
)
|
|
100
100
|
|
|
101
101
|
open_browser = not args.no_open
|
|
@@ -121,12 +121,12 @@ def logout():
|
|
|
121
121
|
token = conf.get("studio", {}).get("token")
|
|
122
122
|
if not token:
|
|
123
123
|
raise DataChainError(
|
|
124
|
-
"Not logged in to Studio. Log in with 'datachain
|
|
124
|
+
"Not logged in to Studio. Log in with 'datachain auth login'."
|
|
125
125
|
)
|
|
126
126
|
|
|
127
127
|
del conf["studio"]["token"]
|
|
128
128
|
|
|
129
|
-
print("Logged out from Studio. (you can log back in with 'datachain
|
|
129
|
+
print("Logged out from Studio. (you can log back in with 'datachain auth login')")
|
|
130
130
|
|
|
131
131
|
|
|
132
132
|
def token():
|
|
@@ -134,7 +134,7 @@ def token():
|
|
|
134
134
|
token = config.get("token")
|
|
135
135
|
if not token:
|
|
136
136
|
raise DataChainError(
|
|
137
|
-
"Not logged in to Studio. Log in with 'datachain
|
|
137
|
+
"Not logged in to Studio. Log in with 'datachain auth login'."
|
|
138
138
|
)
|
|
139
139
|
|
|
140
140
|
print(token)
|
|
@@ -299,7 +299,7 @@ def cancel_job(job_id: str, team_name: Optional[str]):
|
|
|
299
299
|
token = Config().read().get("studio", {}).get("token")
|
|
300
300
|
if not token:
|
|
301
301
|
raise DataChainError(
|
|
302
|
-
"Not logged in to Studio. Log in with 'datachain
|
|
302
|
+
"Not logged in to Studio. Log in with 'datachain auth login'."
|
|
303
303
|
)
|
|
304
304
|
|
|
305
305
|
client = StudioClient(team=team_name)
|
|
@@ -314,7 +314,7 @@ def show_job_logs(job_id: str, team_name: Optional[str]):
|
|
|
314
314
|
token = Config().read().get("studio", {}).get("token")
|
|
315
315
|
if not token:
|
|
316
316
|
raise DataChainError(
|
|
317
|
-
"Not logged in to Studio. Log in with 'datachain
|
|
317
|
+
"Not logged in to Studio. Log in with 'datachain auth login'."
|
|
318
318
|
)
|
|
319
319
|
|
|
320
320
|
client = StudioClient(team=team_name)
|
|
@@ -284,6 +284,7 @@ tests/unit/lib/test_hf.py
|
|
|
284
284
|
tests/unit/lib/test_image.py
|
|
285
285
|
tests/unit/lib/test_listing_info.py
|
|
286
286
|
tests/unit/lib/test_models.py
|
|
287
|
+
tests/unit/lib/test_python_to_sql.py
|
|
287
288
|
tests/unit/lib/test_schema.py
|
|
288
289
|
tests/unit/lib/test_signal_schema.py
|
|
289
290
|
tests/unit/lib/test_sql_to_python.py
|