datachain 0.3.11__tar.gz → 0.3.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.3.11 → datachain-0.3.13}/.pre-commit-config.yaml +1 -1
- {datachain-0.3.11/src/datachain.egg-info → datachain-0.3.13}/PKG-INFO +6 -7
- {datachain-0.3.11 → datachain-0.3.13}/README.rst +5 -5
- datachain-0.3.13/docs/assets/datachain-white.svg +1 -0
- {datachain-0.3.11 → datachain-0.3.13}/examples/get_started/udfs/stateful.py +4 -0
- {datachain-0.3.11 → datachain-0.3.13}/examples/multimodal/wds.py +1 -1
- {datachain-0.3.11 → datachain-0.3.13}/mkdocs.yml +2 -2
- {datachain-0.3.11 → datachain-0.3.13}/pyproject.toml +4 -2
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/cache.py +0 -1
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/catalog/catalog.py +50 -153
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/cli.py +4 -6
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/client/fsspec.py +0 -1
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/client/s3.py +0 -4
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/data_storage/schema.py +4 -8
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/data_storage/warehouse.py +6 -17
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/error.py +0 -4
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/clip.py +1 -1
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/dc.py +17 -5
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/file.py +9 -11
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/image.py +1 -1
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/meta_formats.py +4 -8
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/model_store.py +6 -1
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/text.py +1 -1
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/webdataset.py +13 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/webdataset_laion.py +13 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/listing.py +2 -2
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/node.py +4 -26
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/query/builtins.py +0 -14
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/query/schema.py +1 -16
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/utils.py +0 -3
- {datachain-0.3.11 → datachain-0.3.13/src/datachain.egg-info}/PKG-INFO +6 -7
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain.egg-info/SOURCES.txt +1 -1
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain.egg-info/requires.txt +0 -1
- {datachain-0.3.11 → datachain-0.3.13}/tests/conftest.py +0 -3
- {datachain-0.3.11 → datachain-0.3.13}/tests/data.py +0 -20
- {datachain-0.3.11 → datachain-0.3.13}/tests/func/test_catalog.py +21 -43
- {datachain-0.3.11 → datachain-0.3.13}/tests/func/test_datachain.py +0 -1
- {datachain-0.3.11 → datachain-0.3.13}/tests/func/test_dataset_query.py +17 -42
- {datachain-0.3.11 → datachain-0.3.13}/tests/func/test_datasets.py +0 -2
- {datachain-0.3.11 → datachain-0.3.13}/tests/func/test_ls.py +0 -15
- {datachain-0.3.11 → datachain-0.3.13}/tests/func/test_meta_formats.py +0 -1
- {datachain-0.3.11 → datachain-0.3.13}/tests/func/test_pull.py +1 -10
- {datachain-0.3.11 → datachain-0.3.13}/tests/func/test_pytorch.py +10 -3
- datachain-0.3.13/tests/func/test_query.py +173 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/test_datachain.py +0 -1
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/test_file.py +3 -7
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_cache.py +3 -7
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_client_s3.py +0 -1
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_data_storage.py +28 -32
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_dataset.py +0 -6
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_udf.py +0 -3
- {datachain-0.3.11 → datachain-0.3.13}/tests/utils.py +1 -15
- datachain-0.3.11/docs/assets/datachain_logotype.svg +0 -33
- datachain-0.3.11/tests/func/test_query.py +0 -377
- {datachain-0.3.11 → datachain-0.3.13}/.cruft.json +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/.gitattributes +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/.github/codecov.yaml +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/.github/dependabot.yml +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/.github/workflows/release.yml +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/.github/workflows/tests.yml +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/.gitignore +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/CONTRIBUTING.rst +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/LICENSE +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/docs/assets/datachain.svg +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/docs/assets/flowchart.png +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/docs/index.md +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/docs/references/datachain.md +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/docs/references/datatype.md +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/docs/references/file.md +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/docs/references/index.md +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/docs/references/sql.md +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/docs/references/torch.md +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/docs/references/udf.md +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/examples/llm_and_nlp/unstructured-text.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/noxfile.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/setup.cfg +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/__init__.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/__main__.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/asyn.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/cli_utils.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/client/__init__.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/client/azure.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/client/gcs.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/client/hf.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/client/local.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/config.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/data_storage/id_generator.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/dataset.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/job.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/hf.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/listing.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/settings.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/udf.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/utils.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/lib/vfile.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/progress.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/py.typed +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/query/__init__.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/query/batch.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/query/dataset.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/query/metrics.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/query/params.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/query/queue.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/query/session.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/query/udf.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/remote/studio.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/sql/types.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/sql/utils.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/storage.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/__init__.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/examples/__init__.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/examples/test_examples.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/examples/wds_data.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/func/__init__.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/func/test_client.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/func/test_listing.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/func/test_metrics.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/scripts/feature_class.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/test_cli_e2e.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/test_query_e2e.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/__init__.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_asyn.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_catalog.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_client.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_id_generator.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_listing.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_metastore.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_query_params.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_serializer.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_session.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_storage.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_utils.py +0 -0
- {datachain-0.3.11 → datachain-0.3.13}/tests/unit/test_warehouse.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.13
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -71,7 +71,6 @@ Requires-Dist: pytest-cov>=4.1.0; extra == "tests"
|
|
|
71
71
|
Requires-Dist: pytest-mock>=3.12.0; extra == "tests"
|
|
72
72
|
Requires-Dist: pytest-servers[all]>=0.5.5; extra == "tests"
|
|
73
73
|
Requires-Dist: pytest-benchmark[histogram]; extra == "tests"
|
|
74
|
-
Requires-Dist: pytest-asyncio>=0.23.2; extra == "tests"
|
|
75
74
|
Requires-Dist: pytest-xdist>=3.3.1; extra == "tests"
|
|
76
75
|
Requires-Dist: virtualenv; extra == "tests"
|
|
77
76
|
Requires-Dist: dulwich; extra == "tests"
|
|
@@ -96,12 +95,14 @@ Requires-Dist: unstructured[pdf]; extra == "examples"
|
|
|
96
95
|
Requires-Dist: pdfplumber==0.11.4; extra == "examples"
|
|
97
96
|
Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
|
|
98
97
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
98
|
+
================
|
|
99
|
+
|logo| DataChain
|
|
100
|
+
================
|
|
102
101
|
|
|
103
102
|
|PyPI| |Python Version| |Codecov| |Tests|
|
|
104
103
|
|
|
104
|
+
.. |logo| image:: docs/assets/datachain.svg
|
|
105
|
+
:height: 24
|
|
105
106
|
.. |PyPI| image:: https://img.shields.io/pypi/v/datachain.svg
|
|
106
107
|
:target: https://pypi.org/project/datachain/
|
|
107
108
|
:alt: PyPI
|
|
@@ -115,8 +116,6 @@ Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
|
|
|
115
116
|
:target: https://github.com/iterative/datachain/actions/workflows/tests.yml
|
|
116
117
|
:alt: Tests
|
|
117
118
|
|
|
118
|
-
----------------
|
|
119
|
-
|
|
120
119
|
DataChain is a modern Pythonic data-frame library designed for artificial intelligence.
|
|
121
120
|
It is made to organize your unstructured data into datasets and wrangle it at scale on
|
|
122
121
|
your local machine. Datachain does not abstract or hide the AI models and API calls, but helps to integrate them into the postmodern data stack.
|
|
@@ -1,9 +1,11 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
================
|
|
2
|
+
|logo| DataChain
|
|
3
|
+
================
|
|
4
4
|
|
|
5
5
|
|PyPI| |Python Version| |Codecov| |Tests|
|
|
6
6
|
|
|
7
|
+
.. |logo| image:: docs/assets/datachain.svg
|
|
8
|
+
:height: 24
|
|
7
9
|
.. |PyPI| image:: https://img.shields.io/pypi/v/datachain.svg
|
|
8
10
|
:target: https://pypi.org/project/datachain/
|
|
9
11
|
:alt: PyPI
|
|
@@ -17,8 +19,6 @@
|
|
|
17
19
|
:target: https://github.com/iterative/datachain/actions/workflows/tests.yml
|
|
18
20
|
:alt: Tests
|
|
19
21
|
|
|
20
|
-
----------------
|
|
21
|
-
|
|
22
22
|
DataChain is a modern Pythonic data-frame library designed for artificial intelligence.
|
|
23
23
|
It is made to organize your unstructured data into datasets and wrangle it at scale on
|
|
24
24
|
your local machine. Datachain does not abstract or hide the AI models and API calls, but helps to integrate them into the postmodern data stack.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
<svg width="180" height="33" fill="none" xmlns="http://www.w3.org/2000/svg"><style>.prefix__logo-fill{fill:#fff}</style><path fill-rule="evenodd" clip-rule="evenodd" d="M23.997 24.53l2.34-2.342 3.14 3.135-2.357 2.342a5.533 5.533 0 01-7.822 0l-4.704-4.7a5.536 5.536 0 010-7.823l4.76-4.763 3.124 3.14-4.745 4.747a1.106 1.106 0 000 1.57l4.699 4.694a1.107 1.107 0 001.565 0z" fill="url(#prefix__paint0_linear_449_28)"/><path fill-rule="evenodd" clip-rule="evenodd" d="M37.733 10.65a1.184 1.184 0 01-.234.357L26.253 22.255l3.13 3.135 11.234-11.242a5.536 5.536 0 000-7.824l-4.699-4.705a5.534 5.534 0 00-7.822 0l-3.278 3.263 3.134 3.135 3.268-3.268a1.107 1.107 0 011.564 0l4.694 4.694a1.108 1.108 0 01.244 1.208h.011z" fill="url(#prefix__paint1_linear_449_28)"/><path d="M24.54 14.722L22.2 17.063v.016l-2.405 2.388 3.14 3.134 4.741-4.75a5.534 5.534 0 000-7.822l-4.704-4.704a5.535 5.535 0 00-7.824 0l-5.955 5.954 3.14 3.13 5.944-5.945a1.107 1.107 0 011.565 0l4.7 4.694a1.107 1.107 0 010 1.564z" fill="url(#prefix__paint2_linear_449_28)"/><path d="M4.514 22.335c.054-.133.139-.256.24-.357L7.1 19.632l-.005-.011 3.14-3.129 2.147-2.135-3.135-3.14-7.629 7.638a5.534 5.534 0 000 7.822l4.705 4.704a5.536 5.536 0 007.824 0l3.175-3.18-3.134-3.13-3.165 3.165a1.106 1.106 0 01-1.57 0l-4.7-4.693a1.107 1.107 0 01-.24-1.208z" fill="url(#prefix__paint3_linear_449_28)"/><path d="M55.645 26.613c-.994 0-1.908-.182-2.745-.547a6.407 6.407 0 01-2.169-1.538 7.037 7.037 0 01-1.41-2.294 8.126 8.126 0 01-.497-2.867v-.547c0-1.008.157-1.955.47-2.841a7.478 7.478 0 011.36-2.32 6.201 6.201 0 012.116-1.538c.836-.382 1.76-.573 2.77-.573 1.115 0 2.09.243 2.927.73.854.469 1.533 1.181 2.038 2.137.506.956.784 2.155.837 3.597L60.27 16.76V7.117h3.633v19.027h-2.875v-6.02h.627c-.052 1.441-.348 2.649-.888 3.622-.54.956-1.255 1.677-2.143 2.163-.871.47-1.864.704-2.98.704zm.81-3.05c.714 0 1.367-.156 1.96-.469.592-.33 1.063-.799 1.41-1.407.367-.626.55-1.355.55-2.19v-1.042c0-.834-.183-1.53-.55-2.085a3.572 3.572 0 00-1.436-1.303 4.078 4.078 0 00-1.934-.47c-.784 0-1.481.192-2.091.574-.592.365-1.063.886-1.411 1.564-.331.678-.497 1.468-.497 2.372 0 .903.174 1.694.523 2.372.348.66.819 1.172 1.411 1.537.61.365 1.298.548 2.065.548zM76.635 26.144v-4.196h-.6v-4.666c0-.817-.201-1.425-.602-1.824-.4-.4-1.019-.6-1.855-.6a68.629 68.629 0 00-3.423.104c-.61.018-1.16.044-1.647.079v-3.076c.4-.035.854-.07 1.359-.104.505-.035 1.02-.052 1.542-.052.54-.018 1.045-.026 1.515-.026 1.464 0 2.675.19 3.633.573.976.382 1.707.982 2.195 1.799.505.816.758 1.885.758 3.205v8.784h-2.875zm-4.573.365c-1.028 0-1.934-.183-2.718-.547a4.274 4.274 0 01-1.803-1.564c-.418-.678-.627-1.495-.627-2.45 0-1.043.252-1.894.758-2.555.522-.66 1.245-1.155 2.169-1.485.94-.33 2.038-.495 3.293-.495h3.292v2.163h-3.345c-.836 0-1.48.208-1.934.625-.435.4-.653.921-.653 1.564s.218 1.164.653 1.564c.453.4 1.098.6 1.934.6.505 0 .967-.087 1.385-.261a2.413 2.413 0 001.072-.938c.296-.452.462-1.06.496-1.825l.889 1.017c-.087.99-.331 1.824-.732 2.502a3.899 3.899 0 01-1.62 1.564c-.68.347-1.516.52-2.509.52zM89.569 26.326c-1.307 0-2.387-.165-3.24-.495a3.635 3.635 0 01-1.882-1.72c-.419-.817-.628-1.911-.628-3.284l.026-12.824h3.398l-.026 13.058c0 .695.183 1.234.548 1.616.384.365.924.548 1.62.548h2.222v3.101h-2.038zM81.572 14.65V11.99h10.035v2.659H81.572zM103.203 26.144v-4.196h-.601v-4.666c0-.817-.201-1.425-.601-1.824-.401-.4-1.02-.6-1.856-.6a68.629 68.629 0 00-3.423.104c-.61.018-1.159.044-1.647.079v-3.076c.4-.035.854-.07 1.36-.104.504-.035 1.018-.052 1.541-.052.54-.018 1.045-.026 1.516-.026 1.463 0 2.674.19 3.632.573.976.382 1.708.982 2.196 1.799.505.816.757 1.885.757 3.205v8.784h-2.874zm-4.574.365c-1.028 0-1.934-.183-2.718-.547a4.274 4.274 0 01-1.803-1.564c-.418-.678-.627-1.495-.627-2.45 0-1.043.253-1.894.758-2.555.523-.66 1.246-1.155 2.169-1.485.94-.33 2.038-.495 3.293-.495h3.293v2.163h-3.345c-.837 0-1.481.208-1.934.625-.436.4-.654.921-.654 1.564s.218 1.164.654 1.564c.453.4 1.097.6 1.934.6.505 0 .966-.087 1.385-.261a2.417 2.417 0 001.071-.938c.296-.452.462-1.06.497-1.825l.888 1.017c-.087.99-.331 1.824-.732 2.502a3.897 3.897 0 01-1.62 1.564c-.679.347-1.516.52-2.509.52zM116.267 26.64c-1.237 0-2.309-.21-3.215-.626a6.773 6.773 0 01-2.247-1.668A7.117 7.117 0 01109.472 22a8.19 8.19 0 01-.444-2.659v-.495c0-.956.148-1.868.444-2.737a6.905 6.905 0 011.385-2.346 6.488 6.488 0 012.247-1.642c.906-.417 1.952-.625 3.136-.625 1.237 0 2.344.243 3.319.73.976.469 1.751 1.13 2.326 1.98.593.852.924 1.843.993 2.972h-3.528a2.824 2.824 0 00-.941-1.825c-.522-.486-1.245-.73-2.169-.73-.801 0-1.472.192-2.012.574-.523.382-.915.912-1.176 1.59-.261.66-.392 1.425-.392 2.294 0 .834.122 1.59.366 2.267.261.678.653 1.208 1.176 1.59.54.382 1.228.574 2.065.574.627 0 1.167-.114 1.62-.34.453-.225.81-.538 1.071-.938.279-.4.453-.851.523-1.355h3.528c-.07 1.147-.409 2.155-1.019 3.023-.593.852-1.385 1.52-2.378 2.007-.976.487-2.091.73-3.345.73zM125.919 26.144V7.117h3.633v11.104h-.628c0-1.425.183-2.633.549-3.623.366-.99.906-1.747 1.62-2.268.732-.521 1.656-.782 2.771-.782h.156c1.621 0 2.849.556 3.685 1.668.836 1.112 1.255 2.728 1.255 4.848v8.08h-3.633v-8.419c0-.903-.261-1.616-.784-2.137-.505-.521-1.176-.782-2.012-.782-.889 0-1.612.296-2.169.886-.54.574-.81 1.33-.81 2.268v8.184h-3.633zM151.463 26.144v-4.196h-.601v-4.666c0-.817-.201-1.425-.601-1.824-.401-.4-1.02-.6-1.856-.6a68.524 68.524 0 00-3.423.104c-.61.018-1.159.044-1.647.079v-3.076c.401-.035.854-.07 1.359-.104a22.491 22.491 0 011.542-.052c.54-.018 1.045-.026 1.516-.026 1.463 0 2.674.19 3.632.573.976.382 1.708.982 2.196 1.799.505.816.757 1.885.757 3.205v8.784h-2.874zm-4.574.365c-1.027 0-1.933-.183-2.717-.547a4.277 4.277 0 01-1.804-1.564c-.418-.678-.627-1.495-.627-2.45 0-1.043.253-1.894.758-2.555.523-.66 1.246-1.155 2.169-1.485.941-.33 2.038-.495 3.293-.495h3.293v2.163h-3.345c-.837 0-1.481.208-1.934.625-.436.4-.654.921-.654 1.564s.218 1.164.654 1.564c.453.4 1.097.6 1.934.6.505 0 .967-.087 1.385-.261a2.417 2.417 0 001.071-.938c.296-.452.462-1.06.497-1.825l.888 1.017c-.087.99-.331 1.824-.731 2.502a3.905 3.905 0 01-1.621 1.564c-.679.347-1.516.52-2.509.52zM158.908 26.144V11.99h3.632v14.153h-3.632zm-1.986-11.442v-2.71h5.618v2.71h-5.618zm3.319-4.405c-.715 0-1.246-.183-1.594-.547-.331-.383-.497-.86-.497-1.434 0-.573.166-1.042.497-1.407.348-.365.879-.548 1.594-.548.714 0 1.237.183 1.568.548.331.365.496.834.496 1.407 0 .574-.165 1.051-.496 1.434-.331.364-.854.547-1.568.547zM166.727 26.144V11.99h2.875v6.073h-.262c0-1.442.192-2.641.575-3.597.384-.973.95-1.703 1.699-2.19.766-.486 1.716-.729 2.848-.729h.157c1.69 0 2.971.547 3.842 1.642.871 1.077 1.307 2.693 1.307 4.848v8.106h-3.633v-8.419c0-.869-.253-1.572-.758-2.11-.488-.54-1.167-.809-2.038-.809-.889 0-1.612.278-2.169.834-.54.539-.811 1.269-.811 2.19v8.314h-3.632z" class="prefix__logo-fill"/><defs><linearGradient id="prefix__paint0_linear_449_28" x1="36.032" y1="5.404" x2="18.067" y2="23.054" gradientUnits="userSpaceOnUse"><stop stop-color="#F46837"/><stop offset="1" stop-color="#945DD6"/></linearGradient><linearGradient id="prefix__paint1_linear_449_28" x1="36.045" y1="5.607" x2="18.067" y2="23.363" gradientUnits="userSpaceOnUse"><stop stop-color="#F46837"/><stop offset="1" stop-color="#945DD6"/></linearGradient><linearGradient id="prefix__paint2_linear_449_28" x1="5.924" y1="27.432" x2="23.883" y2="10.239" gradientUnits="userSpaceOnUse"><stop stop-color="#13ADC7"/><stop offset="1" stop-color="#945DD6"/></linearGradient><linearGradient id="prefix__paint3_linear_449_28" x1="5.77" y1="27.586" x2="23.574" y2="9.776" gradientUnits="userSpaceOnUse"><stop stop-color="#13ADC7"/><stop offset="1" stop-color="#945DD6"/></linearGradient></defs></svg>
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
site_name: ''
|
|
2
|
-
site_url: https://datachain.
|
|
2
|
+
site_url: https://docs.datachain.ai
|
|
3
3
|
site_description: Wrangle unstructured AI data at scale
|
|
4
4
|
|
|
5
5
|
repo_url: "https://github.com/iterative/datachain"
|
|
@@ -15,7 +15,7 @@ validation:
|
|
|
15
15
|
|
|
16
16
|
theme:
|
|
17
17
|
name: material
|
|
18
|
-
logo: assets/
|
|
18
|
+
logo: assets/datachain-white.svg
|
|
19
19
|
favicon: assets/datachain.svg
|
|
20
20
|
icon:
|
|
21
21
|
repo: fontawesome/brands/github
|
|
@@ -82,7 +82,6 @@ tests = [
|
|
|
82
82
|
"pytest-mock>=3.12.0",
|
|
83
83
|
"pytest-servers[all]>=0.5.5",
|
|
84
84
|
"pytest-benchmark[histogram]",
|
|
85
|
-
"pytest-asyncio>=0.23.2",
|
|
86
85
|
"pytest-xdist>=3.3.1",
|
|
87
86
|
"virtualenv",
|
|
88
87
|
"dulwich",
|
|
@@ -136,13 +135,16 @@ markers = [
|
|
|
136
135
|
"llm_and_nlp: LLM and NLP examples",
|
|
137
136
|
"multimodal: Multimodal examples"
|
|
138
137
|
]
|
|
139
|
-
asyncio_mode = "auto"
|
|
140
138
|
filterwarnings = [
|
|
141
139
|
"error::pandas.errors.PerformanceWarning",
|
|
142
140
|
"error::pydantic.warnings.PydanticDeprecatedSince20",
|
|
143
141
|
"error::pytest_mock.PytestMockWarning",
|
|
144
142
|
"error::pytest.PytestCollectionWarning",
|
|
145
143
|
"error::sqlalchemy.exc.SADeprecationWarning",
|
|
144
|
+
"ignore::DeprecationWarning:timm.*",
|
|
145
|
+
"ignore::DeprecationWarning:botocore.auth",
|
|
146
|
+
"ignore::DeprecationWarning:datasets.utils._dill",
|
|
147
|
+
"ignore::DeprecationWarning:librosa.core.intervals",
|
|
146
148
|
"ignore:Field name .* shadows an attribute in parent:UserWarning" # datachain.lib.feature
|
|
147
149
|
]
|
|
148
150
|
|
|
@@ -12,7 +12,6 @@ import sys
|
|
|
12
12
|
import time
|
|
13
13
|
import traceback
|
|
14
14
|
from collections.abc import Iterable, Iterator, Mapping, Sequence
|
|
15
|
-
from contextlib import contextmanager, nullcontext
|
|
16
15
|
from copy import copy
|
|
17
16
|
from dataclasses import dataclass
|
|
18
17
|
from functools import cached_property, reduce
|
|
@@ -23,7 +22,6 @@ from typing import (
|
|
|
23
22
|
TYPE_CHECKING,
|
|
24
23
|
Any,
|
|
25
24
|
Callable,
|
|
26
|
-
NamedTuple,
|
|
27
25
|
NoReturn,
|
|
28
26
|
Optional,
|
|
29
27
|
Union,
|
|
@@ -58,14 +56,13 @@ from datachain.error import (
|
|
|
58
56
|
PendingIndexingError,
|
|
59
57
|
QueryScriptCancelError,
|
|
60
58
|
QueryScriptCompileError,
|
|
61
|
-
QueryScriptDatasetNotFound,
|
|
62
59
|
QueryScriptRunError,
|
|
63
60
|
)
|
|
64
61
|
from datachain.listing import Listing
|
|
65
62
|
from datachain.node import DirType, Node, NodeWithPath
|
|
66
63
|
from datachain.nodes_thread_pool import NodesThreadPool
|
|
67
64
|
from datachain.remote.studio import StudioClient
|
|
68
|
-
from datachain.sql.types import JSON, Boolean, DateTime,
|
|
65
|
+
from datachain.sql.types import JSON, Boolean, DateTime, Int64, SQLType, String
|
|
69
66
|
from datachain.storage import Storage, StorageStatus, StorageURI
|
|
70
67
|
from datachain.utils import (
|
|
71
68
|
DataChainDir,
|
|
@@ -115,44 +112,19 @@ def noop(_: str):
|
|
|
115
112
|
pass
|
|
116
113
|
|
|
117
114
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
stream:
|
|
121
|
-
|
|
122
|
-
lines: list[str] = []
|
|
123
|
-
append = lines.append
|
|
115
|
+
def _process_stream(stream: "IO[bytes]", callback: Callable[[str], None]) -> None:
|
|
116
|
+
buffer = b""
|
|
117
|
+
while byt := stream.read(1): # Read one byte at a time
|
|
118
|
+
buffer += byt
|
|
124
119
|
|
|
125
|
-
|
|
126
|
-
buffer = b""
|
|
127
|
-
while byt := stream.read(1): # Read one byte at a time
|
|
128
|
-
buffer += byt.encode("utf-8") if isinstance(byt, str) else byt
|
|
129
|
-
|
|
130
|
-
if byt in (b"\n", b"\r"): # Check for newline or carriage return
|
|
131
|
-
line = buffer.decode("utf-8")
|
|
132
|
-
print(line, end="")
|
|
133
|
-
callback(line)
|
|
134
|
-
append(line)
|
|
135
|
-
buffer = b"" # Clear buffer for next line
|
|
136
|
-
|
|
137
|
-
if buffer: # Handle any remaining data in the buffer
|
|
120
|
+
if byt in (b"\n", b"\r"): # Check for newline or carriage return
|
|
138
121
|
line = buffer.decode("utf-8")
|
|
139
|
-
print(line, end="")
|
|
140
122
|
callback(line)
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
thread = Thread(target=loop, daemon=True)
|
|
144
|
-
thread.start()
|
|
145
|
-
|
|
146
|
-
try:
|
|
147
|
-
yield lines
|
|
148
|
-
finally:
|
|
149
|
-
thread.join()
|
|
150
|
-
|
|
123
|
+
buffer = b"" # Clear buffer for next line
|
|
151
124
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
output: str
|
|
125
|
+
if buffer: # Handle any remaining data in the buffer
|
|
126
|
+
line = buffer.decode("utf-8")
|
|
127
|
+
callback(line)
|
|
156
128
|
|
|
157
129
|
|
|
158
130
|
class DatasetRowsFetcher(NodesThreadPool):
|
|
@@ -541,8 +513,6 @@ def find_column_to_str( # noqa: PLR0911
|
|
|
541
513
|
)
|
|
542
514
|
if column == "name":
|
|
543
515
|
return posixpath.basename(row[field_lookup["path"]]) or ""
|
|
544
|
-
if column == "owner":
|
|
545
|
-
return row[field_lookup["owner_name"]] or ""
|
|
546
516
|
if column == "path":
|
|
547
517
|
is_dir = row[field_lookup["dir_type"]] == DirType.DIR
|
|
548
518
|
path = row[field_lookup["path"]]
|
|
@@ -651,11 +621,6 @@ class Catalog:
|
|
|
651
621
|
code_ast.body[-1:] = new_expressions
|
|
652
622
|
return code_ast
|
|
653
623
|
|
|
654
|
-
def compile_query_script(self, script: str) -> str:
|
|
655
|
-
code_ast = ast.parse(script)
|
|
656
|
-
code_ast = self.attach_query_wrapper(code_ast)
|
|
657
|
-
return ast.unparse(code_ast)
|
|
658
|
-
|
|
659
624
|
def parse_url(self, uri: str, **config: Any) -> tuple[Client, str]:
|
|
660
625
|
config = config or self.client_config
|
|
661
626
|
return Client.parse_url(uri, self.cache, **config)
|
|
@@ -699,16 +664,12 @@ class Catalog:
|
|
|
699
664
|
source_metastore = self.metastore.clone(client.uri)
|
|
700
665
|
|
|
701
666
|
columns = [
|
|
702
|
-
Column("vtype", String),
|
|
703
|
-
Column("dir_type", Int),
|
|
704
667
|
Column("path", String),
|
|
705
668
|
Column("etag", String),
|
|
706
669
|
Column("version", String),
|
|
707
670
|
Column("is_latest", Boolean),
|
|
708
671
|
Column("last_modified", DateTime(timezone=True)),
|
|
709
672
|
Column("size", Int64),
|
|
710
|
-
Column("owner_name", String),
|
|
711
|
-
Column("owner_id", String),
|
|
712
673
|
Column("location", JSON),
|
|
713
674
|
Column("source", String),
|
|
714
675
|
]
|
|
@@ -1549,7 +1510,6 @@ class Catalog:
|
|
|
1549
1510
|
row["etag"],
|
|
1550
1511
|
row["version"],
|
|
1551
1512
|
row["is_latest"],
|
|
1552
|
-
row["vtype"],
|
|
1553
1513
|
row["location"],
|
|
1554
1514
|
row["last_modified"],
|
|
1555
1515
|
)
|
|
@@ -1805,14 +1765,15 @@ class Catalog:
|
|
|
1805
1765
|
def query(
|
|
1806
1766
|
self,
|
|
1807
1767
|
query_script: str,
|
|
1808
|
-
|
|
1809
|
-
python_executable:
|
|
1768
|
+
env: Optional[Mapping[str, str]] = None,
|
|
1769
|
+
python_executable: str = sys.executable,
|
|
1810
1770
|
save: bool = False,
|
|
1811
1771
|
capture_output: bool = True,
|
|
1812
1772
|
output_hook: Callable[[str], None] = noop,
|
|
1813
1773
|
params: Optional[dict[str, str]] = None,
|
|
1814
1774
|
job_id: Optional[str] = None,
|
|
1815
|
-
|
|
1775
|
+
_execute_last_expression: bool = False,
|
|
1776
|
+
) -> None:
|
|
1816
1777
|
"""
|
|
1817
1778
|
Method to run custom user Python script to run a query and, as result,
|
|
1818
1779
|
creates new dataset from the results of a query.
|
|
@@ -1835,92 +1796,21 @@ class Catalog:
|
|
|
1835
1796
|
C.size > 1000
|
|
1836
1797
|
)
|
|
1837
1798
|
"""
|
|
1838
|
-
if
|
|
1839
|
-
|
|
1840
|
-
|
|
1841
|
-
|
|
1842
|
-
|
|
1843
|
-
|
|
1844
|
-
|
|
1845
|
-
|
|
1846
|
-
|
|
1847
|
-
|
|
1848
|
-
|
|
1849
|
-
|
|
1850
|
-
envs,
|
|
1851
|
-
capture_output,
|
|
1852
|
-
output_hook,
|
|
1853
|
-
params,
|
|
1854
|
-
save,
|
|
1855
|
-
job_id,
|
|
1856
|
-
)
|
|
1857
|
-
output = "".join(lines)
|
|
1858
|
-
|
|
1859
|
-
if proc.returncode:
|
|
1860
|
-
if proc.returncode == QUERY_SCRIPT_CANCELED_EXIT_CODE:
|
|
1861
|
-
raise QueryScriptCancelError(
|
|
1862
|
-
"Query script was canceled by user",
|
|
1863
|
-
return_code=proc.returncode,
|
|
1864
|
-
output=output,
|
|
1865
|
-
)
|
|
1866
|
-
raise QueryScriptRunError(
|
|
1867
|
-
f"Query script exited with error code {proc.returncode}",
|
|
1868
|
-
return_code=proc.returncode,
|
|
1869
|
-
output=output,
|
|
1870
|
-
)
|
|
1871
|
-
|
|
1872
|
-
def _get_dataset_versions_by_job_id():
|
|
1873
|
-
for dr, dv, job in self.list_datasets_versions():
|
|
1874
|
-
if job and str(job.id) == job_id:
|
|
1875
|
-
yield dr, dv
|
|
1876
|
-
|
|
1877
|
-
try:
|
|
1878
|
-
dr, dv = max(
|
|
1879
|
-
_get_dataset_versions_by_job_id(), key=lambda x: x[1].created_at
|
|
1880
|
-
)
|
|
1881
|
-
except ValueError as e:
|
|
1882
|
-
if not save:
|
|
1883
|
-
return QueryResult(dataset=None, version=None, output=output)
|
|
1884
|
-
|
|
1885
|
-
raise QueryScriptDatasetNotFound(
|
|
1886
|
-
"No dataset found after running Query script",
|
|
1887
|
-
output=output,
|
|
1888
|
-
) from e
|
|
1889
|
-
|
|
1890
|
-
dr = self.update_dataset(
|
|
1891
|
-
dr,
|
|
1892
|
-
script_output=output,
|
|
1893
|
-
query_script=query_script,
|
|
1894
|
-
)
|
|
1895
|
-
self.update_dataset_version_with_warehouse_info(
|
|
1896
|
-
dr,
|
|
1897
|
-
dv.version,
|
|
1898
|
-
script_output=output,
|
|
1899
|
-
query_script=query_script,
|
|
1900
|
-
job_id=job_id,
|
|
1901
|
-
is_job_result=True,
|
|
1902
|
-
)
|
|
1903
|
-
return QueryResult(dataset=dr, version=dv.version, output=output)
|
|
1799
|
+
if _execute_last_expression:
|
|
1800
|
+
try:
|
|
1801
|
+
code_ast = ast.parse(query_script)
|
|
1802
|
+
code_ast = self.attach_query_wrapper(code_ast)
|
|
1803
|
+
query_script_compiled = ast.unparse(code_ast)
|
|
1804
|
+
except Exception as exc:
|
|
1805
|
+
raise QueryScriptCompileError(
|
|
1806
|
+
f"Query script failed to compile, reason: {exc}"
|
|
1807
|
+
) from exc
|
|
1808
|
+
else:
|
|
1809
|
+
query_script_compiled = query_script
|
|
1810
|
+
assert not save
|
|
1904
1811
|
|
|
1905
|
-
|
|
1906
|
-
|
|
1907
|
-
python_executable: str,
|
|
1908
|
-
query_script: str,
|
|
1909
|
-
envs: Optional[Mapping[str, str]],
|
|
1910
|
-
capture_output: bool,
|
|
1911
|
-
output_hook: Callable[[str], None],
|
|
1912
|
-
params: Optional[dict[str, str]],
|
|
1913
|
-
save: bool,
|
|
1914
|
-
job_id: Optional[str],
|
|
1915
|
-
) -> tuple[list[str], subprocess.Popen]:
|
|
1916
|
-
try:
|
|
1917
|
-
query_script_compiled = self.compile_query_script(query_script)
|
|
1918
|
-
except Exception as exc:
|
|
1919
|
-
raise QueryScriptCompileError(
|
|
1920
|
-
f"Query script failed to compile, reason: {exc}"
|
|
1921
|
-
) from exc
|
|
1922
|
-
envs = dict(envs or os.environ)
|
|
1923
|
-
envs.update(
|
|
1812
|
+
env = dict(env or os.environ)
|
|
1813
|
+
env.update(
|
|
1924
1814
|
{
|
|
1925
1815
|
"DATACHAIN_QUERY_PARAMS": json.dumps(params or {}),
|
|
1926
1816
|
"PYTHONPATH": os.getcwd(), # For local imports
|
|
@@ -1929,19 +1819,28 @@ class Catalog:
|
|
|
1929
1819
|
"DATACHAIN_JOB_ID": job_id or "",
|
|
1930
1820
|
},
|
|
1931
1821
|
)
|
|
1932
|
-
|
|
1933
|
-
|
|
1934
|
-
|
|
1935
|
-
|
|
1936
|
-
|
|
1937
|
-
|
|
1938
|
-
|
|
1939
|
-
|
|
1940
|
-
|
|
1941
|
-
|
|
1942
|
-
|
|
1943
|
-
|
|
1944
|
-
|
|
1822
|
+
popen_kwargs = {}
|
|
1823
|
+
if capture_output:
|
|
1824
|
+
popen_kwargs = {"stdout": subprocess.PIPE, "stderr": subprocess.STDOUT}
|
|
1825
|
+
|
|
1826
|
+
cmd = [python_executable, "-c", query_script_compiled]
|
|
1827
|
+
with subprocess.Popen(cmd, env=env, **popen_kwargs) as proc: # type: ignore[call-overload] # noqa: S603
|
|
1828
|
+
if capture_output:
|
|
1829
|
+
args = (proc.stdout, output_hook)
|
|
1830
|
+
thread = Thread(target=_process_stream, args=args, daemon=True)
|
|
1831
|
+
thread.start()
|
|
1832
|
+
thread.join() # wait for the reader thread
|
|
1833
|
+
|
|
1834
|
+
if proc.returncode == QUERY_SCRIPT_CANCELED_EXIT_CODE:
|
|
1835
|
+
raise QueryScriptCancelError(
|
|
1836
|
+
"Query script was canceled by user",
|
|
1837
|
+
return_code=proc.returncode,
|
|
1838
|
+
)
|
|
1839
|
+
if proc.returncode:
|
|
1840
|
+
raise QueryScriptRunError(
|
|
1841
|
+
f"Query script exited with error code {proc.returncode}",
|
|
1842
|
+
return_code=proc.returncode,
|
|
1843
|
+
)
|
|
1945
1844
|
|
|
1946
1845
|
def cp(
|
|
1947
1846
|
self,
|
|
@@ -2081,8 +1980,6 @@ class Catalog:
|
|
|
2081
1980
|
field_set.add("path")
|
|
2082
1981
|
elif column == "name":
|
|
2083
1982
|
field_set.add("path")
|
|
2084
|
-
elif column == "owner":
|
|
2085
|
-
field_set.add("owner_name")
|
|
2086
1983
|
elif column == "path":
|
|
2087
1984
|
field_set.add("dir_type")
|
|
2088
1985
|
field_set.add("path")
|
|
@@ -24,7 +24,7 @@ logger = logging.getLogger("datachain")
|
|
|
24
24
|
|
|
25
25
|
TTL_HUMAN = "4h"
|
|
26
26
|
TTL_INT = 4 * 60 * 60
|
|
27
|
-
FIND_COLUMNS = ["du", "name", "
|
|
27
|
+
FIND_COLUMNS = ["du", "name", "path", "size", "type"]
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
def human_time_type(value_str: str, can_be_none: bool = False) -> Optional[int]:
|
|
@@ -579,9 +579,8 @@ def _node_data_to_ls_values(row, long_format=False):
|
|
|
579
579
|
value = name + ending
|
|
580
580
|
if long_format:
|
|
581
581
|
last_modified = row[2]
|
|
582
|
-
owner_name = row[3]
|
|
583
582
|
timestamp = last_modified if not is_dir else None
|
|
584
|
-
return long_line_str(value, timestamp
|
|
583
|
+
return long_line_str(value, timestamp)
|
|
585
584
|
return value
|
|
586
585
|
|
|
587
586
|
|
|
@@ -599,7 +598,7 @@ def _ls_urls_flat(
|
|
|
599
598
|
if client_cls.is_root_url(source):
|
|
600
599
|
buckets = client_cls.ls_buckets(**catalog.client_config)
|
|
601
600
|
if long:
|
|
602
|
-
values = (long_line_str(b.name, b.created
|
|
601
|
+
values = (long_line_str(b.name, b.created) for b in buckets)
|
|
603
602
|
else:
|
|
604
603
|
values = (b.name for b in buckets)
|
|
605
604
|
yield source, values
|
|
@@ -607,7 +606,7 @@ def _ls_urls_flat(
|
|
|
607
606
|
found = False
|
|
608
607
|
fields = ["name", "dir_type"]
|
|
609
608
|
if long:
|
|
610
|
-
fields.
|
|
609
|
+
fields.append("last_modified")
|
|
611
610
|
for data_source, results in catalog.ls([source], fields=fields, **kwargs):
|
|
612
611
|
values = (_node_data_to_ls_values(r, long) for r in results)
|
|
613
612
|
found = True
|
|
@@ -683,7 +682,6 @@ def ls_remote(
|
|
|
683
682
|
entry = long_line_str(
|
|
684
683
|
row["name"] + ("/" if row["dir_type"] else ""),
|
|
685
684
|
row["last_modified"],
|
|
686
|
-
row["owner_name"],
|
|
687
685
|
)
|
|
688
686
|
print(format_ls_entry(entry))
|
|
689
687
|
else:
|
|
@@ -119,8 +119,6 @@ class ClientS3(Client):
|
|
|
119
119
|
is_latest=v.get("IsLatest", True),
|
|
120
120
|
last_modified=v.get("LastModified", ""),
|
|
121
121
|
size=v["Size"],
|
|
122
|
-
owner_name=v.get("Owner", {}).get("DisplayName", ""),
|
|
123
|
-
owner_id=v.get("Owner", {}).get("ID", ""),
|
|
124
122
|
)
|
|
125
123
|
|
|
126
124
|
async def _fetch_dir(
|
|
@@ -165,8 +163,6 @@ class ClientS3(Client):
|
|
|
165
163
|
is_latest=v.get("IsLatest", True),
|
|
166
164
|
last_modified=v.get("LastModified", ""),
|
|
167
165
|
size=v["size"],
|
|
168
|
-
owner_name=v.get("Owner", {}).get("DisplayName", ""),
|
|
169
|
-
owner_id=v.get("Owner", {}).get("ID", ""),
|
|
170
166
|
)
|
|
171
167
|
|
|
172
168
|
def info_to_file(self, v: dict[str, Any], path: str) -> File:
|
|
@@ -10,9 +10,8 @@ from typing import (
|
|
|
10
10
|
|
|
11
11
|
import sqlalchemy as sa
|
|
12
12
|
from sqlalchemy.sql import func as f
|
|
13
|
-
from sqlalchemy.sql.expression import null, true
|
|
13
|
+
from sqlalchemy.sql.expression import false, null, true
|
|
14
14
|
|
|
15
|
-
from datachain.node import DirType
|
|
16
15
|
from datachain.sql.functions import path
|
|
17
16
|
from datachain.sql.types import Int, SQLType, UInt64
|
|
18
17
|
|
|
@@ -81,8 +80,7 @@ class DirExpansion:
|
|
|
81
80
|
def base_select(q):
|
|
82
81
|
return sa.select(
|
|
83
82
|
q.c.sys__id,
|
|
84
|
-
|
|
85
|
-
(q.c.dir_type == DirType.DIR).label("is_dir"),
|
|
83
|
+
false().label("is_dir"),
|
|
86
84
|
q.c.source,
|
|
87
85
|
q.c.path,
|
|
88
86
|
q.c.version,
|
|
@@ -94,7 +92,6 @@ class DirExpansion:
|
|
|
94
92
|
return (
|
|
95
93
|
sa.select(
|
|
96
94
|
f.min(q.c.sys__id).label("sys__id"),
|
|
97
|
-
q.c.vtype,
|
|
98
95
|
q.c.is_dir,
|
|
99
96
|
q.c.source,
|
|
100
97
|
q.c.path,
|
|
@@ -102,8 +99,8 @@ class DirExpansion:
|
|
|
102
99
|
f.max(q.c.location).label("location"),
|
|
103
100
|
)
|
|
104
101
|
.select_from(q)
|
|
105
|
-
.group_by(q.c.source, q.c.path, q.c.
|
|
106
|
-
.order_by(q.c.source, q.c.path, q.c.
|
|
102
|
+
.group_by(q.c.source, q.c.path, q.c.is_dir, q.c.version)
|
|
103
|
+
.order_by(q.c.source, q.c.path, q.c.is_dir, q.c.version)
|
|
107
104
|
)
|
|
108
105
|
|
|
109
106
|
@classmethod
|
|
@@ -113,7 +110,6 @@ class DirExpansion:
|
|
|
113
110
|
q = q.union_all(
|
|
114
111
|
sa.select(
|
|
115
112
|
sa.literal(-1).label("sys__id"),
|
|
116
|
-
sa.literal("").label("vtype"),
|
|
117
113
|
true().label("is_dir"),
|
|
118
114
|
q.c.source,
|
|
119
115
|
parent.label("path"),
|