datachain 0.8.10__tar.gz → 0.8.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.8.10 → datachain-0.8.12}/.github/workflows/tests.yml +6 -14
- {datachain-0.8.10 → datachain-0.8.12}/.pre-commit-config.yaml +2 -2
- {datachain-0.8.10 → datachain-0.8.12}/PKG-INFO +3 -7
- {datachain-0.8.10 → datachain-0.8.12}/docs/overrides/main.html +10 -0
- datachain-0.8.12/docs/references/func.md +5 -0
- {datachain-0.8.10 → datachain-0.8.12}/docs/references/index.md +1 -1
- {datachain-0.8.10 → datachain-0.8.12}/examples/llm_and_nlp/hf-dataset-llm-eval.py +15 -9
- {datachain-0.8.10 → datachain-0.8.12}/mkdocs.yml +1 -1
- {datachain-0.8.10 → datachain-0.8.12}/pyproject.toml +2 -6
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/cache.py +4 -4
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/catalog/__init__.py +0 -2
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/catalog/catalog.py +103 -158
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/cli/__init__.py +7 -14
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/cli/commands/__init__.py +0 -2
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/cli/commands/datasets.py +0 -19
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/cli/parser/__init__.py +27 -41
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/cli/parser/studio.py +7 -6
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/cli/parser/utils.py +18 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/client/fsspec.py +11 -8
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/client/local.py +4 -4
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/data_storage/schema.py +1 -1
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/dataset.py +1 -7
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/error.py +12 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/func/__init__.py +2 -1
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/func/conditional.py +77 -26
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/func/func.py +17 -6
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/dc.py +24 -4
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/file.py +16 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/listing.py +30 -12
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/pytorch.py +1 -1
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/udf.py +1 -1
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/listing.py +1 -13
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/node.py +0 -15
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/nodes_fetcher.py +2 -2
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/remote/studio.py +2 -14
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/studio.py +1 -1
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain.egg-info/PKG-INFO +3 -7
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain.egg-info/SOURCES.txt +6 -4
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain.egg-info/requires.txt +2 -6
- {datachain-0.8.10 → datachain-0.8.12}/tests/func/test_catalog.py +59 -391
- {datachain-0.8.10/tests/unit → datachain-0.8.12/tests/func}/test_client.py +87 -24
- {datachain-0.8.10/tests/unit → datachain-0.8.12/tests/func}/test_data_storage.py +1 -52
- {datachain-0.8.10 → datachain-0.8.12}/tests/func/test_datachain.py +96 -6
- datachain-0.8.12/tests/func/test_datachain_merge.py +101 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/func/test_datasets.py +4 -4
- datachain-0.8.12/tests/func/test_file.py +65 -0
- datachain-0.8.12/tests/func/test_hf.py +50 -0
- datachain-0.8.12/tests/func/test_listing.py +64 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/func/test_pull.py +0 -32
- {datachain-0.8.10 → datachain-0.8.12}/tests/func/test_pytorch.py +14 -11
- {datachain-0.8.10 → datachain-0.8.12}/tests/func/test_query.py +79 -0
- datachain-0.8.12/tests/func/test_warehouse.py +6 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/scripts/name_len_slow.py +1 -1
- {datachain-0.8.10 → datachain-0.8.12}/tests/test_cli_studio.py +1 -1
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_datachain.py +51 -100
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_datachain_bootstrap.py +0 -30
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_datachain_merge.py +0 -97
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_diff.py +94 -107
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_file.py +0 -42
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_hf.py +1 -44
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/sql/test_conditional.py +31 -1
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_cache.py +4 -4
- datachain-0.8.12/tests/unit/test_client.py +33 -0
- datachain-0.8.12/tests/unit/test_data_storage.py +77 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_func.py +119 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_listing.py +0 -36
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_pytorch.py +3 -3
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_query.py +16 -1
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_warehouse.py +0 -8
- datachain-0.8.10/docs/references/sql.md +0 -18
- datachain-0.8.10/examples/llm_and_nlp/unstructured-embeddings-gen.py +0 -78
- datachain-0.8.10/examples/llm_and_nlp/unstructured-summary-map.py +0 -67
- datachain-0.8.10/tests/func/test_client.py +0 -93
- datachain-0.8.10/tests/func/test_listing.py +0 -27
- datachain-0.8.10/tests/unit/test_diff.py +0 -70
- {datachain-0.8.10 → datachain-0.8.12}/.cruft.json +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/.gitattributes +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/.github/codecov.yaml +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/.github/dependabot.yml +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/.github/workflows/release.yml +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/.gitignore +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/LICENSE +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/README.rst +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/docs/assets/datachain.svg +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/docs/contributing.md +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/docs/examples.md +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/docs/index.md +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/docs/quick-start.md +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/docs/references/datachain.md +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/docs/references/datatype.md +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/docs/references/file.md +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/docs/references/torch.md +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/docs/references/udf.md +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/docs/tutorials.md +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/examples/multimodal/wds.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/noxfile.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/setup.cfg +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/__init__.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/__main__.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/asyn.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/cli/utils.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/client/__init__.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/client/azure.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/client/gcs.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/client/hf.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/client/s3.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/config.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/func/array.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/func/base.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/func/numeric.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/func/path.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/func/random.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/func/string.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/func/window.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/job.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/clip.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/hf.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/image.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/settings.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/tar.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/text.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/utils.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/vfile.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/model/__init__.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/model/bbox.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/model/pose.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/model/segment.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/progress.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/py.typed +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/query/__init__.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/query/batch.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/query/dataset.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/query/metrics.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/query/params.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/query/queue.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/query/schema.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/query/session.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/query/udf.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/query/utils.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/types.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/utils.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/telemetry.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain/utils.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/__init__.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/conftest.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/data.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/examples/__init__.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/examples/test_examples.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/examples/wds_data.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/func/__init__.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/func/test_ls.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/func/test_metrics.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/func/test_session.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/func/test_toolkit.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/scripts/feature_class.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/test_atomicity.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/test_cli_e2e.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/test_query_e2e.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/test_telemetry.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/__init__.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_models.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_asyn.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_catalog.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_config.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_dataset.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_metastore.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_query_params.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_serializer.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_session.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_utils.py +0 -0
- {datachain-0.8.10 → datachain-0.8.12}/tests/utils.py +0 -0
|
@@ -3,7 +3,7 @@ name: Tests
|
|
|
3
3
|
on:
|
|
4
4
|
push:
|
|
5
5
|
branches: [main]
|
|
6
|
-
|
|
6
|
+
pull_request:
|
|
7
7
|
workflow_dispatch:
|
|
8
8
|
|
|
9
9
|
env:
|
|
@@ -14,15 +14,7 @@ concurrency:
|
|
|
14
14
|
cancel-in-progress: true
|
|
15
15
|
|
|
16
16
|
jobs:
|
|
17
|
-
authorize:
|
|
18
|
-
environment: ${{ github.event_name == 'pull_request_target' && github.event.pull_request.head.repo.full_name != github.repository && 'external' || 'internal' }}
|
|
19
|
-
runs-on: ubuntu-latest
|
|
20
|
-
steps:
|
|
21
|
-
- run: true
|
|
22
|
-
|
|
23
17
|
lint:
|
|
24
|
-
needs: authorize
|
|
25
|
-
|
|
26
18
|
runs-on: ubuntu-latest
|
|
27
19
|
steps:
|
|
28
20
|
- name: Check out the repository
|
|
@@ -62,8 +54,6 @@ jobs:
|
|
|
62
54
|
run: nox -s lint
|
|
63
55
|
|
|
64
56
|
datachain:
|
|
65
|
-
needs: authorize
|
|
66
|
-
|
|
67
57
|
timeout-minutes: 40
|
|
68
58
|
runs-on: ${{ matrix.os }}
|
|
69
59
|
strategy:
|
|
@@ -112,7 +102,11 @@ jobs:
|
|
|
112
102
|
run: echo 'DISABLE_REMOTES_ARG=--disable-remotes=azure,gs' >> $env:GITHUB_ENV
|
|
113
103
|
|
|
114
104
|
- name: Run tests
|
|
115
|
-
run: nox -s tests-${{ matrix.pyv }} -- $DISABLE_REMOTES_ARG
|
|
105
|
+
run: nox -s tests-${{ matrix.pyv }} -- -m "not e2e and not examples" $DISABLE_REMOTES_ARG
|
|
106
|
+
shell: bash
|
|
107
|
+
|
|
108
|
+
- name: Run E2E tests
|
|
109
|
+
run: nox -s tests-${{ matrix.pyv }} -- -m "e2e" --cov-append $DISABLE_REMOTES_ARG
|
|
116
110
|
shell: bash
|
|
117
111
|
|
|
118
112
|
- name: Upload coverage report
|
|
@@ -129,8 +123,6 @@ jobs:
|
|
|
129
123
|
run: nox -s docs
|
|
130
124
|
|
|
131
125
|
examples:
|
|
132
|
-
needs: authorize
|
|
133
|
-
|
|
134
126
|
runs-on: ${{ matrix.os }}
|
|
135
127
|
timeout-minutes: 60
|
|
136
128
|
strategy:
|
|
@@ -24,13 +24,13 @@ repos:
|
|
|
24
24
|
- id: trailing-whitespace
|
|
25
25
|
exclude: '^LICENSES/'
|
|
26
26
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
27
|
-
rev: 'v0.9.
|
|
27
|
+
rev: 'v0.9.3'
|
|
28
28
|
hooks:
|
|
29
29
|
- id: ruff
|
|
30
30
|
args: [--fix, --exit-non-zero-on-fix]
|
|
31
31
|
- id: ruff-format
|
|
32
32
|
- repo: https://github.com/codespell-project/codespell
|
|
33
|
-
rev: v2.
|
|
33
|
+
rev: v2.4.0
|
|
34
34
|
hooks:
|
|
35
35
|
- id: codespell
|
|
36
36
|
additional_dependencies: ["tomli"]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.12
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -78,7 +78,6 @@ Requires-Dist: pytest-xdist>=3.3.1; extra == "tests"
|
|
|
78
78
|
Requires-Dist: virtualenv; extra == "tests"
|
|
79
79
|
Requires-Dist: dulwich; extra == "tests"
|
|
80
80
|
Requires-Dist: hypothesis; extra == "tests"
|
|
81
|
-
Requires-Dist: open_clip_torch; extra == "tests"
|
|
82
81
|
Requires-Dist: aiotools>=1.7.0; extra == "tests"
|
|
83
82
|
Requires-Dist: requests-mock; extra == "tests"
|
|
84
83
|
Requires-Dist: scipy; extra == "tests"
|
|
@@ -94,12 +93,9 @@ Provides-Extra: examples
|
|
|
94
93
|
Requires-Dist: datachain[tests]; extra == "examples"
|
|
95
94
|
Requires-Dist: defusedxml; extra == "examples"
|
|
96
95
|
Requires-Dist: accelerate; extra == "examples"
|
|
97
|
-
Requires-Dist: unstructured_ingest[embed-huggingface]; extra == "examples"
|
|
98
|
-
Requires-Dist: unstructured[pdf]<0.16.12; extra == "examples"
|
|
99
|
-
Requires-Dist: pdfplumber==0.11.5; extra == "examples"
|
|
100
96
|
Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
|
|
101
|
-
Requires-Dist:
|
|
102
|
-
Requires-Dist:
|
|
97
|
+
Requires-Dist: ultralytics==8.3.68; extra == "examples"
|
|
98
|
+
Requires-Dist: open_clip_torch; extra == "examples"
|
|
103
99
|
|
|
104
100
|
================
|
|
105
101
|
|logo| DataChain
|
|
@@ -8,6 +8,16 @@
|
|
|
8
8
|
<script type="text/javascript">
|
|
9
9
|
!function () { var e, t, n; e = "14ffd92a6cbf5f2", t = function () { Reo.init({ clientID: "14ffd92a6cbf5f2" }) }, (n = document.createElement("script")).src = "https://static.reo.dev/" + e + "/reo.js", n.async = !0, n.onload = t, document.head.appendChild(n) }();
|
|
10
10
|
</script>
|
|
11
|
+
<script>
|
|
12
|
+
function initApollo() {
|
|
13
|
+
var n = Math.random().toString(36).substring(7), o = document.createElement("script");
|
|
14
|
+
o.src = "https://assets.apollo.io/micro/website-tracker/tracker.iife.js?nocache=" + n, o.async = !0, o.defer = !0,
|
|
15
|
+
o.onload = function () { window.trackingFunctions.onLoad({ appId: "66315101e9aa7501c79140d9" }) },
|
|
16
|
+
document.head.appendChild(o)
|
|
17
|
+
};
|
|
18
|
+
initApollo();
|
|
19
|
+
</script>
|
|
20
|
+
|
|
11
21
|
|
|
12
22
|
|
|
13
23
|
{% endblock %}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
# Functions
|
|
2
|
+
|
|
3
|
+
Use built-in functions for data manipulation and analysis to operate on the underlying database storing the chain data. These functions are useful for operations like [`DataChain.filter`](datachain.md#datachain.lib.dc.DataChain.filter) and [`DataChain.mutate`](datachain.md#datachain.lib.dc.DataChain.mutate). Import these functions from `datachain.func`.
|
|
4
|
+
|
|
5
|
+
::: datachain.func
|
|
@@ -10,5 +10,5 @@ DataChain's API is organized into several modules:
|
|
|
10
10
|
- [DataType](./datatype.md) - Type system and schema definitions
|
|
11
11
|
- [File](./file.md) - File handling and storage operations
|
|
12
12
|
- [UDF](./udf.md) - User-defined functions and transformations
|
|
13
|
-
- [
|
|
13
|
+
- [Functions](./func.md) - Built-in functions for data manipulation and analysis
|
|
14
14
|
- [Torch](./torch.md) - PyTorch data loading utilities
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from huggingface_hub import InferenceClient
|
|
2
|
+
from requests import HTTPError
|
|
2
3
|
|
|
3
4
|
from datachain import C, DataChain, DataModel
|
|
4
5
|
|
|
@@ -20,15 +21,20 @@ def eval_dialog(
|
|
|
20
21
|
user_input: str,
|
|
21
22
|
bot_response: str,
|
|
22
23
|
) -> DialogEval:
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
24
|
+
try:
|
|
25
|
+
completion = client.chat_completion(
|
|
26
|
+
messages=[
|
|
27
|
+
{
|
|
28
|
+
"role": "user",
|
|
29
|
+
"content": f"{PROMPT}\n\nUser: {user_input}\nBot: {bot_response}",
|
|
30
|
+
},
|
|
31
|
+
],
|
|
32
|
+
response_format={"type": "json", "value": DialogEval.model_json_schema()},
|
|
33
|
+
)
|
|
34
|
+
except HTTPError:
|
|
35
|
+
return DialogEval(
|
|
36
|
+
result="Error", reason="Error while interacting with the Hugging Face API."
|
|
37
|
+
)
|
|
32
38
|
|
|
33
39
|
message = completion.choices[0].message
|
|
34
40
|
try:
|
|
@@ -73,7 +73,7 @@ nav:
|
|
|
73
73
|
- File: references/file.md
|
|
74
74
|
- UDF: references/udf.md
|
|
75
75
|
- Torch: references/torch.md
|
|
76
|
-
-
|
|
76
|
+
- Functions: references/func.md
|
|
77
77
|
- 🤝 Contributing: contributing.md
|
|
78
78
|
|
|
79
79
|
- DataChain Website ↗: https://datachain.ai" target="_blank"
|
|
@@ -89,7 +89,6 @@ tests = [
|
|
|
89
89
|
"virtualenv",
|
|
90
90
|
"dulwich",
|
|
91
91
|
"hypothesis",
|
|
92
|
-
"open_clip_torch",
|
|
93
92
|
"aiotools>=1.7.0",
|
|
94
93
|
"requests-mock",
|
|
95
94
|
"scipy"
|
|
@@ -107,12 +106,9 @@ examples = [
|
|
|
107
106
|
"datachain[tests]",
|
|
108
107
|
"defusedxml",
|
|
109
108
|
"accelerate",
|
|
110
|
-
"unstructured_ingest[embed-huggingface]",
|
|
111
|
-
"unstructured[pdf]<0.16.12",
|
|
112
|
-
"pdfplumber==0.11.5",
|
|
113
109
|
"huggingface_hub[hf_transfer]",
|
|
114
|
-
"
|
|
115
|
-
"
|
|
110
|
+
"ultralytics==8.3.68",
|
|
111
|
+
"open_clip_torch"
|
|
116
112
|
]
|
|
117
113
|
|
|
118
114
|
[project.urls]
|
|
@@ -22,15 +22,15 @@ def try_scandir(path):
|
|
|
22
22
|
pass
|
|
23
23
|
|
|
24
24
|
|
|
25
|
-
def get_temp_cache(tmp_dir: str, prefix: Optional[str] = None) -> "
|
|
25
|
+
def get_temp_cache(tmp_dir: str, prefix: Optional[str] = None) -> "Cache":
|
|
26
26
|
cache_dir = mkdtemp(prefix=prefix, dir=tmp_dir)
|
|
27
|
-
return
|
|
27
|
+
return Cache(cache_dir, tmp_dir=tmp_dir)
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
@contextmanager
|
|
31
31
|
def temporary_cache(
|
|
32
32
|
tmp_dir: str, prefix: Optional[str] = None, delete: bool = True
|
|
33
|
-
) -> Iterator["
|
|
33
|
+
) -> Iterator["Cache"]:
|
|
34
34
|
cache = get_temp_cache(tmp_dir, prefix=prefix)
|
|
35
35
|
try:
|
|
36
36
|
yield cache
|
|
@@ -39,7 +39,7 @@ def temporary_cache(
|
|
|
39
39
|
cache.destroy()
|
|
40
40
|
|
|
41
41
|
|
|
42
|
-
class
|
|
42
|
+
class Cache:
|
|
43
43
|
def __init__(self, cache_dir: str, tmp_dir: str):
|
|
44
44
|
self.odb = LocalHashFileDB(
|
|
45
45
|
LocalFileSystem(),
|
|
@@ -3,7 +3,6 @@ from .catalog import (
|
|
|
3
3
|
QUERY_SCRIPT_CANCELED_EXIT_CODE,
|
|
4
4
|
QUERY_SCRIPT_INVALID_LAST_STATEMENT_EXIT_CODE,
|
|
5
5
|
Catalog,
|
|
6
|
-
parse_edatachain_file,
|
|
7
6
|
)
|
|
8
7
|
from .loader import get_catalog
|
|
9
8
|
|
|
@@ -13,5 +12,4 @@ __all__ = [
|
|
|
13
12
|
"QUERY_SCRIPT_INVALID_LAST_STATEMENT_EXIT_CODE",
|
|
14
13
|
"Catalog",
|
|
15
14
|
"get_catalog",
|
|
16
|
-
"parse_edatachain_file",
|
|
17
15
|
]
|