datachain 0.8.1__tar.gz → 0.8.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.8.1/src/datachain.egg-info → datachain-0.8.2}/PKG-INFO +83 -1
- {datachain-0.8.1 → datachain-0.8.2}/README.rst +82 -0
- {datachain-0.8.1 → datachain-0.8.2}/docs/quick-start.md +6 -6
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/client/gcs.py +8 -7
- {datachain-0.8.1 → datachain-0.8.2/src/datachain.egg-info}/PKG-INFO +83 -1
- datachain-0.8.2/tests/unit/test_client_gcs.py +6 -0
- datachain-0.8.1/tests/unit/test_client_gcs.py +0 -17
- {datachain-0.8.1 → datachain-0.8.2}/.cruft.json +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/.gitattributes +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/.github/codecov.yaml +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/.github/dependabot.yml +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/.github/workflows/release.yml +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/.github/workflows/tests.yml +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/.gitignore +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/.pre-commit-config.yaml +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/LICENSE +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/docs/assets/datachain.svg +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/docs/contributing.md +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/docs/examples.md +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/docs/index.md +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/docs/overrides/main.html +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/docs/references/datachain.md +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/docs/references/datatype.md +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/docs/references/file.md +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/docs/references/index.md +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/docs/references/sql.md +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/docs/references/torch.md +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/docs/references/udf.md +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/docs/tutorials.md +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/examples/llm_and_nlp/unstructured-embeddings-gen.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/examples/llm_and_nlp/unstructured-summary-map.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/examples/multimodal/wds.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/mkdocs.yml +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/noxfile.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/pyproject.toml +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/setup.cfg +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/__init__.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/__main__.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/asyn.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/cache.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/catalog/catalog.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/cli.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/cli_utils.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/client/__init__.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/client/azure.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/client/hf.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/client/local.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/client/s3.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/config.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/dataset.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/error.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/func/__init__.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/func/array.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/func/base.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/func/conditional.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/func/func.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/func/numeric.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/func/path.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/func/random.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/func/string.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/func/window.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/job.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/clip.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/dc.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/diff.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/file.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/hf.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/image.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/listing.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/settings.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/tar.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/text.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/udf.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/utils.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/vfile.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/listing.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/model/__init__.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/model/bbox.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/model/pose.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/model/segment.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/node.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/progress.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/py.typed +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/query/__init__.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/query/batch.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/query/dataset.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/query/metrics.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/query/params.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/query/queue.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/query/schema.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/query/session.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/query/udf.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/query/utils.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/remote/studio.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/types.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/sql/utils.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/studio.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/telemetry.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain/utils.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain.egg-info/SOURCES.txt +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain.egg-info/requires.txt +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/__init__.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/conftest.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/data.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/examples/__init__.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/examples/test_examples.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/examples/wds_data.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/func/__init__.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/func/test_catalog.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/func/test_client.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/func/test_datachain.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/func/test_datasets.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/func/test_listing.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/func/test_ls.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/func/test_metrics.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/func/test_pull.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/func/test_pytorch.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/func/test_query.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/func/test_session.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/func/test_toolkit.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/scripts/feature_class.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/test_atomicity.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/test_cli_e2e.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/test_cli_studio.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/test_query_e2e.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/test_telemetry.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/__init__.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_datachain.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_models.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_asyn.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_cache.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_catalog.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_client.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_config.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_dataset.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_func.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_listing.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_metastore.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_query.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_query_params.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_serializer.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_session.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_utils.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.8.1 → datachain-0.8.2}/tests/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.2
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -145,6 +145,88 @@ Getting Started
|
|
|
145
145
|
Visit `Quick Start <https://docs.datachain.ai/quick-start>`_ and `Docs <https://docs.datachain.ai/>`_
|
|
146
146
|
to get started with `DataChain` and learn more.
|
|
147
147
|
|
|
148
|
+
.. code:: bash
|
|
149
|
+
|
|
150
|
+
pip install datachain
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
Example: download subset of files based on metadata
|
|
154
|
+
---------------------------------------------------
|
|
155
|
+
|
|
156
|
+
Sometimes users only need to download a specific subset of files from cloud storage,
|
|
157
|
+
rather than the entire dataset.
|
|
158
|
+
For example, you could use a JSON file's metadata to download just cat images with
|
|
159
|
+
high confidence scores.
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
.. code:: py
|
|
163
|
+
|
|
164
|
+
from datachain import Column, DataChain
|
|
165
|
+
|
|
166
|
+
meta = DataChain.from_json("gs://datachain-demo/dogs-and-cats/*json", object_name="meta", anon=True)
|
|
167
|
+
images = DataChain.from_storage("gs://datachain-demo/dogs-and-cats/*jpg", anon=True)
|
|
168
|
+
|
|
169
|
+
images_id = images.map(id=lambda file: file.path.split('.')[-2])
|
|
170
|
+
annotated = images_id.merge(meta, on="id", right_on="meta.id")
|
|
171
|
+
|
|
172
|
+
likely_cats = annotated.filter((Column("meta.inference.confidence") > 0.93) \
|
|
173
|
+
& (Column("meta.inference.class_") == "cat"))
|
|
174
|
+
likely_cats.export_files("high-confidence-cats/", signal="file")
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
Example: LLM based text-file evaluation
|
|
178
|
+
---------------------------------------
|
|
179
|
+
|
|
180
|
+
In this example, we evaluate chatbot conversations stored in text files
|
|
181
|
+
using LLM based evaluation.
|
|
182
|
+
|
|
183
|
+
.. code:: shell
|
|
184
|
+
|
|
185
|
+
$ pip install mistralai # Requires version >=1.0.0
|
|
186
|
+
$ export MISTRAL_API_KEY=_your_key_
|
|
187
|
+
|
|
188
|
+
Python code:
|
|
189
|
+
|
|
190
|
+
.. code:: py
|
|
191
|
+
|
|
192
|
+
from mistralai import Mistral
|
|
193
|
+
from datachain import File, DataChain, Column
|
|
194
|
+
|
|
195
|
+
PROMPT = "Was this dialog successful? Answer in a single word: Success or Failure."
|
|
196
|
+
|
|
197
|
+
def eval_dialogue(file: File) -> bool:
|
|
198
|
+
client = Mistral()
|
|
199
|
+
response = client.chat.complete(
|
|
200
|
+
model="open-mixtral-8x22b",
|
|
201
|
+
messages=[{"role": "system", "content": PROMPT},
|
|
202
|
+
{"role": "user", "content": file.read()}])
|
|
203
|
+
result = response.choices[0].message.content
|
|
204
|
+
return result.lower().startswith("success")
|
|
205
|
+
|
|
206
|
+
chain = (
|
|
207
|
+
DataChain.from_storage("gs://datachain-demo/chatbot-KiT/", object_name="file", anon=True)
|
|
208
|
+
.settings(parallel=4, cache=True)
|
|
209
|
+
.map(is_success=eval_dialogue)
|
|
210
|
+
.save("mistral_files")
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
successful_chain = chain.filter(Column("is_success") == True)
|
|
214
|
+
successful_chain.export_files("./output_mistral")
|
|
215
|
+
|
|
216
|
+
print(f"{successful_chain.count()} files were exported")
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
With the instruction above, the Mistral model considers 31/50 files to hold the successful dialogues:
|
|
221
|
+
|
|
222
|
+
.. code:: shell
|
|
223
|
+
|
|
224
|
+
$ ls output_mistral/datachain-demo/chatbot-KiT/
|
|
225
|
+
1.txt 15.txt 18.txt 2.txt 22.txt 25.txt 28.txt 33.txt 37.txt 4.txt 41.txt ...
|
|
226
|
+
$ ls output_mistral/datachain-demo/chatbot-KiT/ | wc -l
|
|
227
|
+
31
|
|
228
|
+
|
|
229
|
+
|
|
148
230
|
Key Features
|
|
149
231
|
============
|
|
150
232
|
|
|
@@ -42,6 +42,88 @@ Getting Started
|
|
|
42
42
|
Visit `Quick Start <https://docs.datachain.ai/quick-start>`_ and `Docs <https://docs.datachain.ai/>`_
|
|
43
43
|
to get started with `DataChain` and learn more.
|
|
44
44
|
|
|
45
|
+
.. code:: bash
|
|
46
|
+
|
|
47
|
+
pip install datachain
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
Example: download subset of files based on metadata
|
|
51
|
+
---------------------------------------------------
|
|
52
|
+
|
|
53
|
+
Sometimes users only need to download a specific subset of files from cloud storage,
|
|
54
|
+
rather than the entire dataset.
|
|
55
|
+
For example, you could use a JSON file's metadata to download just cat images with
|
|
56
|
+
high confidence scores.
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
.. code:: py
|
|
60
|
+
|
|
61
|
+
from datachain import Column, DataChain
|
|
62
|
+
|
|
63
|
+
meta = DataChain.from_json("gs://datachain-demo/dogs-and-cats/*json", object_name="meta", anon=True)
|
|
64
|
+
images = DataChain.from_storage("gs://datachain-demo/dogs-and-cats/*jpg", anon=True)
|
|
65
|
+
|
|
66
|
+
images_id = images.map(id=lambda file: file.path.split('.')[-2])
|
|
67
|
+
annotated = images_id.merge(meta, on="id", right_on="meta.id")
|
|
68
|
+
|
|
69
|
+
likely_cats = annotated.filter((Column("meta.inference.confidence") > 0.93) \
|
|
70
|
+
& (Column("meta.inference.class_") == "cat"))
|
|
71
|
+
likely_cats.export_files("high-confidence-cats/", signal="file")
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
Example: LLM based text-file evaluation
|
|
75
|
+
---------------------------------------
|
|
76
|
+
|
|
77
|
+
In this example, we evaluate chatbot conversations stored in text files
|
|
78
|
+
using LLM based evaluation.
|
|
79
|
+
|
|
80
|
+
.. code:: shell
|
|
81
|
+
|
|
82
|
+
$ pip install mistralai # Requires version >=1.0.0
|
|
83
|
+
$ export MISTRAL_API_KEY=_your_key_
|
|
84
|
+
|
|
85
|
+
Python code:
|
|
86
|
+
|
|
87
|
+
.. code:: py
|
|
88
|
+
|
|
89
|
+
from mistralai import Mistral
|
|
90
|
+
from datachain import File, DataChain, Column
|
|
91
|
+
|
|
92
|
+
PROMPT = "Was this dialog successful? Answer in a single word: Success or Failure."
|
|
93
|
+
|
|
94
|
+
def eval_dialogue(file: File) -> bool:
|
|
95
|
+
client = Mistral()
|
|
96
|
+
response = client.chat.complete(
|
|
97
|
+
model="open-mixtral-8x22b",
|
|
98
|
+
messages=[{"role": "system", "content": PROMPT},
|
|
99
|
+
{"role": "user", "content": file.read()}])
|
|
100
|
+
result = response.choices[0].message.content
|
|
101
|
+
return result.lower().startswith("success")
|
|
102
|
+
|
|
103
|
+
chain = (
|
|
104
|
+
DataChain.from_storage("gs://datachain-demo/chatbot-KiT/", object_name="file", anon=True)
|
|
105
|
+
.settings(parallel=4, cache=True)
|
|
106
|
+
.map(is_success=eval_dialogue)
|
|
107
|
+
.save("mistral_files")
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
successful_chain = chain.filter(Column("is_success") == True)
|
|
111
|
+
successful_chain.export_files("./output_mistral")
|
|
112
|
+
|
|
113
|
+
print(f"{successful_chain.count()} files were exported")
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
With the instruction above, the Mistral model considers 31/50 files to hold the successful dialogues:
|
|
118
|
+
|
|
119
|
+
.. code:: shell
|
|
120
|
+
|
|
121
|
+
$ ls output_mistral/datachain-demo/chatbot-KiT/
|
|
122
|
+
1.txt 15.txt 18.txt 2.txt 22.txt 25.txt 28.txt 33.txt 37.txt 4.txt 41.txt ...
|
|
123
|
+
$ ls output_mistral/datachain-demo/chatbot-KiT/ | wc -l
|
|
124
|
+
31
|
|
125
|
+
|
|
126
|
+
|
|
45
127
|
Key Features
|
|
46
128
|
============
|
|
47
129
|
|
|
@@ -39,8 +39,8 @@ using JSON metadata:
|
|
|
39
39
|
``` py
|
|
40
40
|
from datachain import Column, DataChain
|
|
41
41
|
|
|
42
|
-
meta = DataChain.from_json("gs://datachain-demo/dogs-and-cats/*json", object_name="meta")
|
|
43
|
-
images = DataChain.from_storage("gs://datachain-demo/dogs-and-cats/*jpg")
|
|
42
|
+
meta = DataChain.from_json("gs://datachain-demo/dogs-and-cats/*json", object_name="meta", anon=True)
|
|
43
|
+
images = DataChain.from_storage("gs://datachain-demo/dogs-and-cats/*jpg", anon=True)
|
|
44
44
|
|
|
45
45
|
images_id = images.map(id=lambda file: file.path.split('.')[-2])
|
|
46
46
|
annotated = images_id.merge(meta, on="id", right_on="meta.id")
|
|
@@ -78,7 +78,7 @@ def is_positive_dialogue_ending(file) -> bool:
|
|
|
78
78
|
|
|
79
79
|
chain = (
|
|
80
80
|
DataChain.from_storage("gs://datachain-demo/chatbot-KiT/",
|
|
81
|
-
object_name="file", type="text")
|
|
81
|
+
object_name="file", type="text", anon=True)
|
|
82
82
|
.settings(parallel=8, cache=True)
|
|
83
83
|
.map(is_positive=is_positive_dialogue_ending)
|
|
84
84
|
.save("file_response")
|
|
@@ -132,7 +132,7 @@ def eval_dialogue(file: File) -> bool:
|
|
|
132
132
|
return result.lower().startswith("success")
|
|
133
133
|
|
|
134
134
|
chain = (
|
|
135
|
-
DataChain.from_storage("gs://datachain-demo/chatbot-KiT/", object_name="file")
|
|
135
|
+
DataChain.from_storage("gs://datachain-demo/chatbot-KiT/", object_name="file", anon=True)
|
|
136
136
|
.map(is_success=eval_dialogue)
|
|
137
137
|
.save("mistral_files")
|
|
138
138
|
)
|
|
@@ -177,7 +177,7 @@ def eval_dialog(file: File) -> ChatCompletionResponse:
|
|
|
177
177
|
{"role": "user", "content": file.read()}])
|
|
178
178
|
|
|
179
179
|
chain = (
|
|
180
|
-
DataChain.from_storage("gs://datachain-demo/chatbot-KiT/", object_name="file")
|
|
180
|
+
DataChain.from_storage("gs://datachain-demo/chatbot-KiT/", object_name="file", anon=True)
|
|
181
181
|
.settings(parallel=4, cache=True)
|
|
182
182
|
.map(response=eval_dialog)
|
|
183
183
|
.map(status=lambda response: response.choices[0].message.content.lower()[:7])
|
|
@@ -273,7 +273,7 @@ from datachain import C, DataChain
|
|
|
273
273
|
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
|
|
274
274
|
|
|
275
275
|
chain = (
|
|
276
|
-
DataChain.from_storage("gs://datachain-demo/dogs-and-cats/", type="image")
|
|
276
|
+
DataChain.from_storage("gs://datachain-demo/dogs-and-cats/", type="image", anon=True)
|
|
277
277
|
.map(label=lambda name: name.split(".")[0], params=["file.name"])
|
|
278
278
|
.select("file", "label").to_pytorch(
|
|
279
279
|
transform=processor.image_processor,
|
|
@@ -33,13 +33,14 @@ class GCSClient(Client):
|
|
|
33
33
|
return cast(GCSFileSystem, super().create_fs(**kwargs))
|
|
34
34
|
|
|
35
35
|
def url(self, path: str, expires: int = 3600, **kwargs) -> str:
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
36
|
+
"""
|
|
37
|
+
Generate a signed URL for the given path.
|
|
38
|
+
If the client is anonymous, a public URL is returned instead
|
|
39
|
+
(see https://cloud.google.com/storage/docs/access-public-data#api-link).
|
|
40
|
+
"""
|
|
41
|
+
if self.fs.storage_options.get("token") == "anon":
|
|
42
|
+
return f"https://storage.googleapis.com/{self.name}/{path}"
|
|
43
|
+
return self.fs.sign(self.get_full_path(path), expiration=expires, **kwargs)
|
|
43
44
|
|
|
44
45
|
@staticmethod
|
|
45
46
|
def parse_timestamp(timestamp: str) -> datetime:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.2
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -145,6 +145,88 @@ Getting Started
|
|
|
145
145
|
Visit `Quick Start <https://docs.datachain.ai/quick-start>`_ and `Docs <https://docs.datachain.ai/>`_
|
|
146
146
|
to get started with `DataChain` and learn more.
|
|
147
147
|
|
|
148
|
+
.. code:: bash
|
|
149
|
+
|
|
150
|
+
pip install datachain
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
Example: download subset of files based on metadata
|
|
154
|
+
---------------------------------------------------
|
|
155
|
+
|
|
156
|
+
Sometimes users only need to download a specific subset of files from cloud storage,
|
|
157
|
+
rather than the entire dataset.
|
|
158
|
+
For example, you could use a JSON file's metadata to download just cat images with
|
|
159
|
+
high confidence scores.
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
.. code:: py
|
|
163
|
+
|
|
164
|
+
from datachain import Column, DataChain
|
|
165
|
+
|
|
166
|
+
meta = DataChain.from_json("gs://datachain-demo/dogs-and-cats/*json", object_name="meta", anon=True)
|
|
167
|
+
images = DataChain.from_storage("gs://datachain-demo/dogs-and-cats/*jpg", anon=True)
|
|
168
|
+
|
|
169
|
+
images_id = images.map(id=lambda file: file.path.split('.')[-2])
|
|
170
|
+
annotated = images_id.merge(meta, on="id", right_on="meta.id")
|
|
171
|
+
|
|
172
|
+
likely_cats = annotated.filter((Column("meta.inference.confidence") > 0.93) \
|
|
173
|
+
& (Column("meta.inference.class_") == "cat"))
|
|
174
|
+
likely_cats.export_files("high-confidence-cats/", signal="file")
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
Example: LLM based text-file evaluation
|
|
178
|
+
---------------------------------------
|
|
179
|
+
|
|
180
|
+
In this example, we evaluate chatbot conversations stored in text files
|
|
181
|
+
using LLM based evaluation.
|
|
182
|
+
|
|
183
|
+
.. code:: shell
|
|
184
|
+
|
|
185
|
+
$ pip install mistralai # Requires version >=1.0.0
|
|
186
|
+
$ export MISTRAL_API_KEY=_your_key_
|
|
187
|
+
|
|
188
|
+
Python code:
|
|
189
|
+
|
|
190
|
+
.. code:: py
|
|
191
|
+
|
|
192
|
+
from mistralai import Mistral
|
|
193
|
+
from datachain import File, DataChain, Column
|
|
194
|
+
|
|
195
|
+
PROMPT = "Was this dialog successful? Answer in a single word: Success or Failure."
|
|
196
|
+
|
|
197
|
+
def eval_dialogue(file: File) -> bool:
|
|
198
|
+
client = Mistral()
|
|
199
|
+
response = client.chat.complete(
|
|
200
|
+
model="open-mixtral-8x22b",
|
|
201
|
+
messages=[{"role": "system", "content": PROMPT},
|
|
202
|
+
{"role": "user", "content": file.read()}])
|
|
203
|
+
result = response.choices[0].message.content
|
|
204
|
+
return result.lower().startswith("success")
|
|
205
|
+
|
|
206
|
+
chain = (
|
|
207
|
+
DataChain.from_storage("gs://datachain-demo/chatbot-KiT/", object_name="file", anon=True)
|
|
208
|
+
.settings(parallel=4, cache=True)
|
|
209
|
+
.map(is_success=eval_dialogue)
|
|
210
|
+
.save("mistral_files")
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
successful_chain = chain.filter(Column("is_success") == True)
|
|
214
|
+
successful_chain.export_files("./output_mistral")
|
|
215
|
+
|
|
216
|
+
print(f"{successful_chain.count()} files were exported")
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
With the instruction above, the Mistral model considers 31/50 files to hold the successful dialogues:
|
|
221
|
+
|
|
222
|
+
.. code:: shell
|
|
223
|
+
|
|
224
|
+
$ ls output_mistral/datachain-demo/chatbot-KiT/
|
|
225
|
+
1.txt 15.txt 18.txt 2.txt 22.txt 25.txt 28.txt 33.txt 37.txt 4.txt 41.txt ...
|
|
226
|
+
$ ls output_mistral/datachain-demo/chatbot-KiT/ | wc -l
|
|
227
|
+
31
|
|
228
|
+
|
|
229
|
+
|
|
148
230
|
Key Features
|
|
149
231
|
============
|
|
150
232
|
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
from datachain.client import Client
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
def test_anon_url(mocker):
|
|
5
|
-
def sign(*args, **kwargs):
|
|
6
|
-
raise AttributeError(
|
|
7
|
-
"you need a private key to sign credentials."
|
|
8
|
-
"the credentials you are currently using"
|
|
9
|
-
" <class 'google.oauth2.credentials.Credentials'> just contains a token."
|
|
10
|
-
" see https://googleapis.dev/python/google-api-core/latest/auth.html"
|
|
11
|
-
"#setting-up-a-service-account for more details."
|
|
12
|
-
)
|
|
13
|
-
|
|
14
|
-
mocker.patch("gcsfs.GCSFileSystem.sign", side_effect=sign)
|
|
15
|
-
|
|
16
|
-
client = Client.get_client("gs://foo", None, anon=True)
|
|
17
|
-
assert client.url("bar") == "https://storage.googleapis.com/foo/bar"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|