datachain 0.6.3__tar.gz → 0.6.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.6.3 → datachain-0.6.5}/.github/workflows/tests.yml +2 -0
- {datachain-0.6.3/src/datachain.egg-info → datachain-0.6.5}/PKG-INFO +1 -1
- datachain-0.6.5/examples/llm_and_nlp/hf-dataset-llm-eval.py +59 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/catalog/catalog.py +3 -25
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/cli.py +0 -8
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/client/fsspec.py +10 -5
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/client/hf.py +1 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/client/local.py +7 -3
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/data_storage/metastore.py +11 -478
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/data_storage/sqlite.py +9 -41
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/data_storage/warehouse.py +1 -2
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/dataset.py +12 -10
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/error.py +0 -4
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/arrow.py +1 -1
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/node.py +1 -1
- {datachain-0.6.3 → datachain-0.6.5/src/datachain.egg-info}/PKG-INFO +1 -1
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain.egg-info/SOURCES.txt +1 -2
- {datachain-0.6.3 → datachain-0.6.5}/tests/func/test_catalog.py +0 -5
- {datachain-0.6.3 → datachain-0.6.5}/tests/func/test_datachain.py +2 -3
- {datachain-0.6.3 → datachain-0.6.5}/tests/func/test_dataset_query.py +20 -35
- {datachain-0.6.3 → datachain-0.6.5}/tests/func/test_datasets.py +0 -1
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_catalog_loader.py +3 -8
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_metastore.py +2 -6
- datachain-0.6.3/src/datachain/storage.py +0 -136
- datachain-0.6.3/tests/unit/test_storage.py +0 -188
- {datachain-0.6.3 → datachain-0.6.5}/.cruft.json +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/.gitattributes +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/.github/codecov.yaml +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/.github/dependabot.yml +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/.github/workflows/release.yml +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/.gitignore +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/.pre-commit-config.yaml +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/CONTRIBUTING.rst +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/LICENSE +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/README.rst +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/docs/assets/datachain.svg +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/docs/assets/flowchart.png +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/docs/index.md +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/docs/references/datachain.md +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/docs/references/datatype.md +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/docs/references/file.md +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/docs/references/index.md +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/docs/references/sql.md +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/docs/references/torch.md +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/docs/references/udf.md +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/examples/llm_and_nlp/unstructured-embeddings-gen.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/examples/llm_and_nlp/unstructured-summary-map.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/examples/multimodal/wds.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/mkdocs.yml +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/noxfile.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/overrides/main.html +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/pyproject.toml +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/setup.cfg +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/__init__.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/__main__.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/asyn.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/cache.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/cli_utils.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/client/__init__.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/client/azure.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/client/gcs.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/client/s3.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/config.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/data_storage/id_generator.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/job.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/clip.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/dc.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/file.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/func/__init__.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/func/aggregate.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/func/func.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/hf.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/image.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/listing.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/settings.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/tar.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/text.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/udf.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/utils.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/vfile.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/listing.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/progress.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/py.typed +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/query/__init__.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/query/batch.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/query/dataset.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/query/metrics.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/query/params.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/query/queue.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/query/schema.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/query/session.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/remote/studio.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/sql/types.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/sql/utils.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/studio.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/telemetry.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain/utils.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain.egg-info/requires.txt +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/__init__.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/conftest.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/data.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/examples/__init__.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/examples/test_examples.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/examples/wds_data.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/func/__init__.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/func/test_client.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/func/test_listing.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/func/test_ls.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/func/test_metrics.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/func/test_pull.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/func/test_pytorch.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/func/test_query.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/scripts/feature_class.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/test_atomicity.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/test_cli_e2e.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/test_cli_studio.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/test_query_e2e.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/test_telemetry.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/__init__.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_datachain.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_asyn.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_cache.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_catalog.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_client.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_config.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_dataset.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_id_generator.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_listing.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_query.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_query_params.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_serializer.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_session.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_utils.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.6.3 → datachain-0.6.5}/tests/utils.py +0 -0
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
from huggingface_hub import InferenceClient
|
|
2
|
+
|
|
3
|
+
from datachain import C, DataChain, DataModel
|
|
4
|
+
|
|
5
|
+
PROMPT = """
|
|
6
|
+
Was this dialog successful? Put result as a single word: Success or Failure.
|
|
7
|
+
Explain the reason in a few words.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class DialogEval(DataModel):
|
|
12
|
+
result: str
|
|
13
|
+
reason: str
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# DataChain function to evaluate dialog.
|
|
17
|
+
# DataChain is using types for inputs, results to automatically infer schema.
|
|
18
|
+
def eval_dialog(user_input: str, bot_response: str) -> DialogEval:
|
|
19
|
+
client = InferenceClient("meta-llama/Llama-3.1-70B-Instruct")
|
|
20
|
+
|
|
21
|
+
completion = client.chat_completion(
|
|
22
|
+
messages=[
|
|
23
|
+
{
|
|
24
|
+
"role": "user",
|
|
25
|
+
"content": f"{PROMPT}\n\nUser: {user_input}\nBot: {bot_response}",
|
|
26
|
+
},
|
|
27
|
+
],
|
|
28
|
+
response_format={"type": "json", "value": DialogEval.model_json_schema()},
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
message = completion.choices[0].message
|
|
32
|
+
try:
|
|
33
|
+
return DialogEval.model_validate_json(message.content)
|
|
34
|
+
except ValueError:
|
|
35
|
+
return DialogEval(result="Error", reason="Failed to parse response.")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# Run HF inference in parallel for each example.
|
|
39
|
+
# Get result as Pydantic model that DataChain can understand and serialize it.
|
|
40
|
+
# Save to HF as Parquet. Dataset can be previewed here:
|
|
41
|
+
# https://huggingface.co/datasets/dvcorg/test-datachain-llm-eval/viewer
|
|
42
|
+
(
|
|
43
|
+
DataChain.from_csv(
|
|
44
|
+
"hf://datasets/infinite-dataset-hub/MobilePlanAssistant/data.csv"
|
|
45
|
+
)
|
|
46
|
+
.settings(parallel=10)
|
|
47
|
+
.map(response=eval_dialog)
|
|
48
|
+
.to_parquet("hf://datasets/dvcorg/test-datachain-llm-eval/data.parquet")
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
# Read it back to filter and show.
|
|
52
|
+
# It restores the Pydantic model from Parquet under the hood.
|
|
53
|
+
(
|
|
54
|
+
DataChain.from_parquet(
|
|
55
|
+
"hf://datasets/dvcorg/test-datachain-llm-eval/data.parquet", source=False
|
|
56
|
+
)
|
|
57
|
+
.filter(C("response.result") == "Failure")
|
|
58
|
+
.show(3)
|
|
59
|
+
)
|
|
@@ -42,6 +42,7 @@ from datachain.dataset import (
|
|
|
42
42
|
DatasetStats,
|
|
43
43
|
DatasetStatus,
|
|
44
44
|
RowDict,
|
|
45
|
+
StorageURI,
|
|
45
46
|
create_dataset_uri,
|
|
46
47
|
parse_dataset_uri,
|
|
47
48
|
)
|
|
@@ -58,7 +59,6 @@ from datachain.node import DirType, Node, NodeWithPath
|
|
|
58
59
|
from datachain.nodes_thread_pool import NodesThreadPool
|
|
59
60
|
from datachain.remote.studio import StudioClient
|
|
60
61
|
from datachain.sql.types import DateTime, SQLType, String
|
|
61
|
-
from datachain.storage import StorageURI
|
|
62
62
|
from datachain.utils import (
|
|
63
63
|
DataChainDir,
|
|
64
64
|
batched,
|
|
@@ -1702,31 +1702,9 @@ class Catalog:
|
|
|
1702
1702
|
*,
|
|
1703
1703
|
client_config=None,
|
|
1704
1704
|
) -> None:
|
|
1705
|
-
root_sources = [
|
|
1706
|
-
src for src in sources if Client.get_implementation(src).is_root_url(src)
|
|
1707
|
-
]
|
|
1708
|
-
non_root_sources = [
|
|
1709
|
-
src
|
|
1710
|
-
for src in sources
|
|
1711
|
-
if not Client.get_implementation(src).is_root_url(src)
|
|
1712
|
-
]
|
|
1713
|
-
|
|
1714
|
-
client_config = client_config or self.client_config
|
|
1715
|
-
|
|
1716
|
-
# for root sources (e.g s3://) we are just getting all buckets and
|
|
1717
|
-
# saving them as storages, without further indexing in each bucket
|
|
1718
|
-
for source in root_sources:
|
|
1719
|
-
for bucket in Client.get_implementation(source).ls_buckets(**client_config):
|
|
1720
|
-
client = self.get_client(bucket.uri, **client_config)
|
|
1721
|
-
print(f"Registering storage {client.uri}")
|
|
1722
|
-
self.metastore.create_storage_if_not_registered(client.uri)
|
|
1723
|
-
|
|
1724
1705
|
self.enlist_sources(
|
|
1725
|
-
|
|
1706
|
+
sources,
|
|
1726
1707
|
update,
|
|
1727
|
-
client_config=client_config,
|
|
1708
|
+
client_config=client_config or self.client_config,
|
|
1728
1709
|
only_index=True,
|
|
1729
1710
|
)
|
|
1730
|
-
|
|
1731
|
-
def find_stale_storages(self) -> None:
|
|
1732
|
-
self.metastore.find_stale_storages()
|
|
@@ -568,12 +568,6 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
|
|
|
568
568
|
)
|
|
569
569
|
add_sources_arg(parse_index)
|
|
570
570
|
|
|
571
|
-
subp.add_parser(
|
|
572
|
-
"find-stale-storages",
|
|
573
|
-
parents=[parent_parser],
|
|
574
|
-
description="Finds and marks stale storages",
|
|
575
|
-
)
|
|
576
|
-
|
|
577
571
|
show_parser = subp.add_parser(
|
|
578
572
|
"show",
|
|
579
573
|
parents=[parent_parser],
|
|
@@ -1100,8 +1094,6 @@ def main(argv: Optional[list[str]] = None) -> int: # noqa: C901, PLR0912, PLR09
|
|
|
1100
1094
|
)
|
|
1101
1095
|
elif args.command == "completion":
|
|
1102
1096
|
print(completion(args.shell))
|
|
1103
|
-
elif args.command == "find-stale-storages":
|
|
1104
|
-
catalog.find_stale_storages()
|
|
1105
1097
|
elif args.command == "query":
|
|
1106
1098
|
query(
|
|
1107
1099
|
catalog,
|
|
@@ -31,11 +31,12 @@ from datachain.error import ClientError as DataChainClientError
|
|
|
31
31
|
from datachain.lib.file import File
|
|
32
32
|
from datachain.nodes_fetcher import NodesFetcher
|
|
33
33
|
from datachain.nodes_thread_pool import NodeChunk
|
|
34
|
-
from datachain.storage import StorageURI
|
|
35
34
|
|
|
36
35
|
if TYPE_CHECKING:
|
|
37
36
|
from fsspec.spec import AbstractFileSystem
|
|
38
37
|
|
|
38
|
+
from datachain.dataset import StorageURI
|
|
39
|
+
|
|
39
40
|
|
|
40
41
|
logger = logging.getLogger("datachain")
|
|
41
42
|
|
|
@@ -63,7 +64,7 @@ def _is_win_local_path(uri: str) -> bool:
|
|
|
63
64
|
|
|
64
65
|
class Bucket(NamedTuple):
|
|
65
66
|
name: str
|
|
66
|
-
uri: StorageURI
|
|
67
|
+
uri: "StorageURI"
|
|
67
68
|
created: Optional[datetime]
|
|
68
69
|
|
|
69
70
|
|
|
@@ -115,7 +116,7 @@ class Client(ABC):
|
|
|
115
116
|
return DATA_SOURCE_URI_PATTERN.match(name) is not None
|
|
116
117
|
|
|
117
118
|
@staticmethod
|
|
118
|
-
def parse_url(source: str) -> tuple[StorageURI, str]:
|
|
119
|
+
def parse_url(source: str) -> tuple["StorageURI", str]:
|
|
119
120
|
cls = Client.get_implementation(source)
|
|
120
121
|
storage_name, rel_path = cls.split_url(source)
|
|
121
122
|
return cls.get_uri(storage_name), rel_path
|
|
@@ -148,7 +149,7 @@ class Client(ABC):
|
|
|
148
149
|
@classmethod
|
|
149
150
|
def from_source(
|
|
150
151
|
cls,
|
|
151
|
-
uri: StorageURI,
|
|
152
|
+
uri: "StorageURI",
|
|
152
153
|
cache: DataChainCache,
|
|
153
154
|
**kwargs,
|
|
154
155
|
) -> "Client":
|
|
@@ -156,6 +157,8 @@ class Client(ABC):
|
|
|
156
157
|
|
|
157
158
|
@classmethod
|
|
158
159
|
def ls_buckets(cls, **kwargs) -> Iterator[Bucket]:
|
|
160
|
+
from datachain.dataset import StorageURI
|
|
161
|
+
|
|
159
162
|
for entry in cls.create_fs(**kwargs).ls(cls.PREFIX, detail=True):
|
|
160
163
|
name = entry["name"].rstrip("/")
|
|
161
164
|
yield Bucket(
|
|
@@ -169,7 +172,9 @@ class Client(ABC):
|
|
|
169
172
|
return url == cls.PREFIX
|
|
170
173
|
|
|
171
174
|
@classmethod
|
|
172
|
-
def get_uri(cls, name) -> StorageURI:
|
|
175
|
+
def get_uri(cls, name) -> "StorageURI":
|
|
176
|
+
from datachain.dataset import StorageURI
|
|
177
|
+
|
|
173
178
|
return StorageURI(f"{cls.PREFIX}{name}")
|
|
174
179
|
|
|
175
180
|
@classmethod
|
|
@@ -2,16 +2,18 @@ import os
|
|
|
2
2
|
import posixpath
|
|
3
3
|
from datetime import datetime, timezone
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import Any
|
|
5
|
+
from typing import TYPE_CHECKING, Any
|
|
6
6
|
from urllib.parse import urlparse
|
|
7
7
|
|
|
8
8
|
from fsspec.implementations.local import LocalFileSystem
|
|
9
9
|
|
|
10
10
|
from datachain.lib.file import File
|
|
11
|
-
from datachain.storage import StorageURI
|
|
12
11
|
|
|
13
12
|
from .fsspec import Client
|
|
14
13
|
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from datachain.dataset import StorageURI
|
|
16
|
+
|
|
15
17
|
|
|
16
18
|
class FileClient(Client):
|
|
17
19
|
FS_CLASS = LocalFileSystem
|
|
@@ -28,7 +30,9 @@ class FileClient(Client):
|
|
|
28
30
|
raise TypeError("Signed urls are not implemented for local file system")
|
|
29
31
|
|
|
30
32
|
@classmethod
|
|
31
|
-
def get_uri(cls, name) -> StorageURI:
|
|
33
|
+
def get_uri(cls, name) -> "StorageURI":
|
|
34
|
+
from datachain.dataset import StorageURI
|
|
35
|
+
|
|
32
36
|
return StorageURI(f'{cls.PREFIX}/{name.removeprefix("/")}')
|
|
33
37
|
|
|
34
38
|
@classmethod
|