datachain 0.2.17__tar.gz → 0.2.18__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.2.17 → datachain-0.2.18}/.pre-commit-config.yaml +1 -1
- {datachain-0.2.17/src/datachain.egg-info → datachain-0.2.18}/PKG-INFO +1 -1
- datachain-0.2.18/src/datachain/lib/convert/sql_to_python.py +18 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/dc.py +24 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/sql/functions/__init__.py +3 -2
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/sql/functions/array.py +8 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/sql/sqlite/base.py +5 -0
- {datachain-0.2.17 → datachain-0.2.18/src/datachain.egg-info}/PKG-INFO +1 -1
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain.egg-info/SOURCES.txt +1 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/lib/test_datachain.py +94 -6
- datachain-0.2.18/tests/unit/lib/test_sql_to_python.py +28 -0
- datachain-0.2.17/src/datachain/lib/convert/sql_to_python.py +0 -23
- {datachain-0.2.17 → datachain-0.2.18}/.cruft.json +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/.gitattributes +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/.github/codecov.yaml +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/.github/dependabot.yml +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/.github/workflows/release.yml +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/.github/workflows/tests.yml +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/.gitignore +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/CONTRIBUTING.rst +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/LICENSE +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/README.rst +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/docs/assets/datachain.png +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/docs/assets/flowchart.png +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/docs/index.md +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/docs/references/datachain.md +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/docs/references/datatype.md +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/docs/references/file.md +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/docs/references/index.md +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/docs/references/sql.md +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/docs/references/torch.md +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/docs/references/udf.md +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/blip2_image_desc_lib.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/.gitignore +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/1-quick-start.ipynb +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/2-working-with-image-datachains.ipynb +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/3-train-model.ipynb +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/4-inference.ipynb +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/README.md +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/requirements.txt +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/scripts/1-quick-start.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/scripts/2-basic-operations.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/scripts/2-embeddings.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/scripts/3-split-train-test.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/scripts/3-train-model.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/src/clustering.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/src/train.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/static/images/basic-operations.png +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/static/images/core-concepts.png +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/static/images/datachain-logo.png +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/static/images/datachain-overview.png +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/static/images/dataset-1.png +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/static/images/dataset-2.png +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/static/images/dataset-3.png +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/static/images/studio.png +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/get_started/json-metadata-tutorial.ipynb +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/llm/llm_chatbot_evaluation.ipynb +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/llm_and_nlp/llm-claude-aggregate-query.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/llm_and_nlp/llm-claude-simple-query.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/llm_and_nlp/llm-claude.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/llm_and_nlp/unstructured-text.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/multimodal/clip_fine_tuning.ipynb +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/multimodal/wds.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/mkdocs.yml +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/noxfile.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/pyproject.toml +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/setup.cfg +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/__init__.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/__main__.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/asyn.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/cache.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/catalog/catalog.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/catalog/subclass.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/cli.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/cli_utils.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/client/__init__.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/client/azure.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/client/gcs.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/client/local.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/client/s3.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/config.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/data_storage/id_generator.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/dataset.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/error.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/job.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/clip.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/file.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/image.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/settings.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/text.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/udf.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/utils.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/vfile.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/listing.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/node.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/progress.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/py.typed +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/query/__init__.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/query/batch.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/query/builtins.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/query/dataset.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/query/metrics.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/query/params.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/query/schema.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/query/session.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/query/udf.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/remote/studio.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/sql/types.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/sql/utils.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/storage.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain/utils.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain.egg-info/requires.txt +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/__init__.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/conftest.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/data.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/examples/__init__.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/examples/wds_data.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/func/__init__.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/func/test_catalog.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/func/test_client.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/func/test_datachain.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/func/test_datasets.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/func/test_ls.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/func/test_pull.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/func/test_pytorch.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/func/test_query.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/scripts/feature_class.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/scripts/name_len_normal.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/test_cli_e2e.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/test_query_e2e.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/__init__.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_asyn.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_cache.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_catalog.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_client.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_dataset.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_id_generator.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_listing.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_metastore.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_query_params.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_serializer.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_session.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_storage.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_udf.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_utils.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.2.17 → datachain-0.2.18}/tests/utils.py +0 -0
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from decimal import Decimal
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
from sqlalchemy import ColumnElement
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def sql_to_python(args_map: dict[str, ColumnElement]) -> dict[str, Any]:
|
|
8
|
+
res = {}
|
|
9
|
+
for name, sql_exp in args_map.items():
|
|
10
|
+
try:
|
|
11
|
+
type_ = sql_exp.type.python_type
|
|
12
|
+
if type_ == Decimal:
|
|
13
|
+
type_ = float
|
|
14
|
+
except NotImplementedError:
|
|
15
|
+
type_ = str
|
|
16
|
+
res[name] = type_
|
|
17
|
+
|
|
18
|
+
return res
|
|
@@ -20,8 +20,10 @@ import pandas as pd
|
|
|
20
20
|
import sqlalchemy
|
|
21
21
|
from pydantic import BaseModel, create_model
|
|
22
22
|
from sqlalchemy.sql.functions import GenericFunction
|
|
23
|
+
from sqlalchemy.sql.sqltypes import NullType
|
|
23
24
|
|
|
24
25
|
from datachain import DataModel
|
|
26
|
+
from datachain.lib.convert.python_to_sql import python_to_sql
|
|
25
27
|
from datachain.lib.convert.values_to_tuples import values_to_tuples
|
|
26
28
|
from datachain.lib.data_model import DataType
|
|
27
29
|
from datachain.lib.dataset_info import DatasetInfo
|
|
@@ -110,6 +112,11 @@ class DatasetMergeError(DataChainParamsError): # noqa: D101
|
|
|
110
112
|
super().__init__(f"Merge error on='{on_str}'{right_on_str}: {msg}")
|
|
111
113
|
|
|
112
114
|
|
|
115
|
+
class DataChainColumnError(DataChainParamsError): # noqa: D101
|
|
116
|
+
def __init__(self, col_name, msg): # noqa: D107
|
|
117
|
+
super().__init__(f"Error for column {col_name}: {msg}")
|
|
118
|
+
|
|
119
|
+
|
|
113
120
|
OutputType = Union[None, DataType, Sequence[str], dict[str, DataType]]
|
|
114
121
|
|
|
115
122
|
|
|
@@ -225,6 +232,17 @@ class DataChain(DatasetQuery):
|
|
|
225
232
|
"""Get schema of the chain."""
|
|
226
233
|
return self._effective_signals_schema.values
|
|
227
234
|
|
|
235
|
+
def column(self, name: str) -> Column:
|
|
236
|
+
"""Returns Column instance with a type if name is found in current schema,
|
|
237
|
+
otherwise raises an exception.
|
|
238
|
+
"""
|
|
239
|
+
name_path = name.split(".")
|
|
240
|
+
for path, type_, _, _ in self.signals_schema.get_flat_tree():
|
|
241
|
+
if path == name_path:
|
|
242
|
+
return Column(name, python_to_sql(type_))
|
|
243
|
+
|
|
244
|
+
raise ValueError(f"Column with name {name} not found in the schema")
|
|
245
|
+
|
|
228
246
|
def print_schema(self) -> None:
|
|
229
247
|
"""Print schema of the chain."""
|
|
230
248
|
self._effective_signals_schema.print_tree()
|
|
@@ -829,6 +847,12 @@ class DataChain(DatasetQuery):
|
|
|
829
847
|
)
|
|
830
848
|
```
|
|
831
849
|
"""
|
|
850
|
+
for col_name, expr in kwargs.items():
|
|
851
|
+
if not isinstance(expr, Column) and isinstance(expr.type, NullType):
|
|
852
|
+
raise DataChainColumnError(
|
|
853
|
+
col_name, f"Cannot infer type with expression {expr}"
|
|
854
|
+
)
|
|
855
|
+
|
|
832
856
|
mutated = {}
|
|
833
857
|
schema = self.signals_schema
|
|
834
858
|
for name, value in kwargs.items():
|
|
@@ -1,16 +1,17 @@
|
|
|
1
1
|
from sqlalchemy.sql.expression import func
|
|
2
2
|
|
|
3
|
-
from . import path, string
|
|
3
|
+
from . import array, path, string
|
|
4
|
+
from .array import avg
|
|
4
5
|
from .conditional import greatest, least
|
|
5
6
|
from .random import rand
|
|
6
7
|
|
|
7
8
|
count = func.count
|
|
8
9
|
sum = func.sum
|
|
9
|
-
avg = func.avg
|
|
10
10
|
min = func.min
|
|
11
11
|
max = func.max
|
|
12
12
|
|
|
13
13
|
__all__ = [
|
|
14
|
+
"array",
|
|
14
15
|
"avg",
|
|
15
16
|
"count",
|
|
16
17
|
"func",
|
|
@@ -44,7 +44,15 @@ class sip_hash_64(GenericFunction): # noqa: N801
|
|
|
44
44
|
inherit_cache = True
|
|
45
45
|
|
|
46
46
|
|
|
47
|
+
class avg(GenericFunction): # noqa: N801
|
|
48
|
+
type = Float()
|
|
49
|
+
package = "array"
|
|
50
|
+
name = "avg"
|
|
51
|
+
inherit_cache = True
|
|
52
|
+
|
|
53
|
+
|
|
47
54
|
compiler_not_implemented(cosine_distance)
|
|
48
55
|
compiler_not_implemented(euclidean_distance)
|
|
49
56
|
compiler_not_implemented(length)
|
|
50
57
|
compiler_not_implemented(sip_hash_64)
|
|
58
|
+
compiler_not_implemented(avg)
|
|
@@ -78,6 +78,7 @@ def setup():
|
|
|
78
78
|
compiles(conditional.least, "sqlite")(compile_least)
|
|
79
79
|
compiles(Values, "sqlite")(compile_values)
|
|
80
80
|
compiles(random.rand, "sqlite")(compile_rand)
|
|
81
|
+
compiles(array.avg, "sqlite")(compile_avg)
|
|
81
82
|
|
|
82
83
|
if load_usearch_extension(sqlite3.connect(":memory:")):
|
|
83
84
|
compiles(array.cosine_distance, "sqlite")(compile_cosine_distance_ext)
|
|
@@ -349,6 +350,10 @@ def compile_rand(element, compiler, **kwargs):
|
|
|
349
350
|
return compiler.process(func.random(), **kwargs)
|
|
350
351
|
|
|
351
352
|
|
|
353
|
+
def compile_avg(element, compiler, **kwargs):
|
|
354
|
+
return compiler.process(func.avg(*element.clauses.clauses), **kwargs)
|
|
355
|
+
|
|
356
|
+
|
|
352
357
|
def load_usearch_extension(conn) -> bool:
|
|
353
358
|
try:
|
|
354
359
|
# usearch is part of the vector optional dependencies
|
|
@@ -239,6 +239,7 @@ tests/unit/lib/test_feature_utils.py
|
|
|
239
239
|
tests/unit/lib/test_file.py
|
|
240
240
|
tests/unit/lib/test_image.py
|
|
241
241
|
tests/unit/lib/test_signal_schema.py
|
|
242
|
+
tests/unit/lib/test_sql_to_python.py
|
|
242
243
|
tests/unit/lib/test_text.py
|
|
243
244
|
tests/unit/lib/test_udf_signature.py
|
|
244
245
|
tests/unit/lib/test_utils.py
|
|
@@ -10,7 +10,7 @@ from pydantic import BaseModel
|
|
|
10
10
|
|
|
11
11
|
from datachain import Column
|
|
12
12
|
from datachain.lib.data_model import DataModel
|
|
13
|
-
from datachain.lib.dc import C, DataChain, Sys
|
|
13
|
+
from datachain.lib.dc import C, DataChain, DataChainColumnError, Sys
|
|
14
14
|
from datachain.lib.file import File
|
|
15
15
|
from datachain.lib.signal_schema import (
|
|
16
16
|
SignalResolvingError,
|
|
@@ -19,6 +19,8 @@ from datachain.lib.signal_schema import (
|
|
|
19
19
|
)
|
|
20
20
|
from datachain.lib.udf_signature import UdfSignatureError
|
|
21
21
|
from datachain.lib.utils import DataChainParamsError
|
|
22
|
+
from datachain.sql import functions as func
|
|
23
|
+
from datachain.sql.types import Float, Int64, String
|
|
22
24
|
from tests.utils import skip_if_not_sqlite
|
|
23
25
|
|
|
24
26
|
DF_DATA = {
|
|
@@ -1254,14 +1256,20 @@ def test_column_math(test_session):
|
|
|
1254
1256
|
fib = [1, 1, 2, 3, 5, 8]
|
|
1255
1257
|
chain = DataChain.from_values(num=fib, session=test_session)
|
|
1256
1258
|
|
|
1257
|
-
ch = chain.mutate(add2=
|
|
1259
|
+
ch = chain.mutate(add2=chain.column("num") + 2)
|
|
1258
1260
|
assert list(ch.collect("add2")) == [x + 2 for x in fib]
|
|
1259
1261
|
|
|
1260
|
-
|
|
1261
|
-
assert list(
|
|
1262
|
+
ch2 = ch.mutate(x=1 - ch.column("add2"))
|
|
1263
|
+
assert list(ch2.collect("x")) == [1 - (x + 2.0) for x in fib]
|
|
1264
|
+
|
|
1265
|
+
|
|
1266
|
+
def test_column_math_division(test_session):
|
|
1267
|
+
skip_if_not_sqlite()
|
|
1268
|
+
fib = [1, 1, 2, 3, 5, 8]
|
|
1269
|
+
chain = DataChain.from_values(num=fib, session=test_session)
|
|
1262
1270
|
|
|
1263
|
-
|
|
1264
|
-
assert list(
|
|
1271
|
+
ch = chain.mutate(div2=chain.column("num") / 2.0)
|
|
1272
|
+
assert list(ch.collect("div2")) == [x / 2.0 for x in fib]
|
|
1265
1273
|
|
|
1266
1274
|
|
|
1267
1275
|
def test_from_values_array_of_floats(test_session):
|
|
@@ -1409,3 +1417,83 @@ def test_rename_object_name_with_mutate(catalog):
|
|
|
1409
1417
|
assert ds.signals_schema.values.get("ids") is int
|
|
1410
1418
|
assert "file" not in ds.signals_schema.values
|
|
1411
1419
|
assert list(ds.order_by("my_file.name").collect("my_file.name")) == ["a", "b", "c"]
|
|
1420
|
+
|
|
1421
|
+
|
|
1422
|
+
def test_column(catalog):
|
|
1423
|
+
ds = DataChain.from_values(
|
|
1424
|
+
ints=[1, 2], floats=[0.5, 0.5], file=[File(name="a"), File(name="b")]
|
|
1425
|
+
)
|
|
1426
|
+
|
|
1427
|
+
c = ds.column("ints")
|
|
1428
|
+
assert isinstance(c, Column)
|
|
1429
|
+
assert c.name == "ints"
|
|
1430
|
+
assert isinstance(c.type, Int64)
|
|
1431
|
+
|
|
1432
|
+
c = ds.column("floats")
|
|
1433
|
+
assert isinstance(c, Column)
|
|
1434
|
+
assert c.name == "floats"
|
|
1435
|
+
assert isinstance(c.type, Float)
|
|
1436
|
+
|
|
1437
|
+
c = ds.column("file.name")
|
|
1438
|
+
assert isinstance(c, Column)
|
|
1439
|
+
assert c.name == "file__name"
|
|
1440
|
+
assert isinstance(c.type, String)
|
|
1441
|
+
|
|
1442
|
+
with pytest.raises(ValueError):
|
|
1443
|
+
c = ds.column("missing")
|
|
1444
|
+
|
|
1445
|
+
|
|
1446
|
+
def test_mutate_with_subtraction():
|
|
1447
|
+
ds = DataChain.from_values(id=[1, 2])
|
|
1448
|
+
assert ds.mutate(new=ds.column("id") - 1).signals_schema.values["new"] is int
|
|
1449
|
+
|
|
1450
|
+
|
|
1451
|
+
def test_mutate_with_addition():
|
|
1452
|
+
ds = DataChain.from_values(id=[1, 2])
|
|
1453
|
+
assert ds.mutate(new=ds.column("id") + 1).signals_schema.values["new"] is int
|
|
1454
|
+
|
|
1455
|
+
|
|
1456
|
+
def test_mutate_with_division():
|
|
1457
|
+
ds = DataChain.from_values(id=[1, 2])
|
|
1458
|
+
assert ds.mutate(new=ds.column("id") / 10).signals_schema.values["new"] is float
|
|
1459
|
+
|
|
1460
|
+
|
|
1461
|
+
def test_mutate_with_multiplication():
|
|
1462
|
+
ds = DataChain.from_values(id=[1, 2])
|
|
1463
|
+
assert ds.mutate(new=ds.column("id") * 10).signals_schema.values["new"] is int
|
|
1464
|
+
|
|
1465
|
+
|
|
1466
|
+
def test_mutate_with_func():
|
|
1467
|
+
ds = DataChain.from_values(id=[1, 2])
|
|
1468
|
+
assert (
|
|
1469
|
+
ds.mutate(new=func.avg(ds.column("id"))).signals_schema.values["new"] is float
|
|
1470
|
+
)
|
|
1471
|
+
|
|
1472
|
+
|
|
1473
|
+
def test_mutate_with_complex_expression():
|
|
1474
|
+
ds = DataChain.from_values(id=[1, 2], name=["Jim", "Jon"])
|
|
1475
|
+
assert (
|
|
1476
|
+
ds.mutate(
|
|
1477
|
+
new=(func.sum(ds.column("id"))) * (5 - func.min(ds.column("id")))
|
|
1478
|
+
).signals_schema.values["new"]
|
|
1479
|
+
is int
|
|
1480
|
+
)
|
|
1481
|
+
|
|
1482
|
+
|
|
1483
|
+
def test_mutate_with_saving():
|
|
1484
|
+
skip_if_not_sqlite()
|
|
1485
|
+
ds = DataChain.from_values(id=[1, 2])
|
|
1486
|
+
ds = ds.mutate(new=ds.column("id") / 2).save("mutated")
|
|
1487
|
+
|
|
1488
|
+
ds = DataChain(name="mutated")
|
|
1489
|
+
assert ds.signals_schema.values["new"] is float
|
|
1490
|
+
assert list(ds.collect("new")) == [0.5, 1.0]
|
|
1491
|
+
|
|
1492
|
+
|
|
1493
|
+
def test_mutate_with_expression_without_type(catalog):
|
|
1494
|
+
with pytest.raises(DataChainColumnError) as excinfo:
|
|
1495
|
+
DataChain.from_values(id=[1, 2]).mutate(new=(Column("id") - 1)).save()
|
|
1496
|
+
|
|
1497
|
+
assert str(excinfo.value) == (
|
|
1498
|
+
"Error for column new: Cannot infer type with expression id - :id_1"
|
|
1499
|
+
)
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from sqlalchemy.sql.sqltypes import NullType
|
|
2
|
+
|
|
3
|
+
from datachain import Column
|
|
4
|
+
from datachain.lib.convert.sql_to_python import sql_to_python
|
|
5
|
+
from datachain.sql import functions as func
|
|
6
|
+
from datachain.sql.types import Float, Int64, String
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_sql_columns_to_python_types():
|
|
10
|
+
assert sql_to_python(
|
|
11
|
+
{
|
|
12
|
+
"name": Column("name", String),
|
|
13
|
+
"age": Column("age", Int64),
|
|
14
|
+
"score": Column("score", Float),
|
|
15
|
+
}
|
|
16
|
+
) == {"name": str, "age": int, "score": float}
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def test_sql_expression_to_python_types():
|
|
20
|
+
assert sql_to_python({"age": Column("age", Int64) - 2}) == {"age": int}
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_sql_function_to_python_types():
|
|
24
|
+
assert sql_to_python({"age": func.avg(Column("age", Int64))}) == {"age": float}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_sql_to_python_types_default_type():
|
|
28
|
+
assert sql_to_python({"null": Column("null", NullType)}) == {"null": str}
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
from datetime import datetime
|
|
2
|
-
from typing import Any
|
|
3
|
-
|
|
4
|
-
from sqlalchemy import ARRAY, JSON, Boolean, DateTime, Float, Integer, String
|
|
5
|
-
|
|
6
|
-
from datachain.data_storage.sqlite import Column
|
|
7
|
-
|
|
8
|
-
SQL_TO_PYTHON = {
|
|
9
|
-
String: str,
|
|
10
|
-
Integer: int,
|
|
11
|
-
Float: float,
|
|
12
|
-
Boolean: bool,
|
|
13
|
-
DateTime: datetime,
|
|
14
|
-
ARRAY: list,
|
|
15
|
-
JSON: dict,
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def sql_to_python(args_map: dict[str, Column]) -> dict[str, Any]:
|
|
20
|
-
return {
|
|
21
|
-
k: SQL_TO_PYTHON.get(type(v.type), str) # type: ignore[union-attr]
|
|
22
|
-
for k, v in args_map.items()
|
|
23
|
-
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/.gitignore
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/README.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{datachain-0.2.17 → datachain-0.2.18}/examples/computer_vision/fashion_product_images/src/train.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|