datachain 0.25.1__tar.gz → 0.26.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.25.1 → datachain-0.26.0}/.gitignore +2 -0
- {datachain-0.25.1 → datachain-0.26.0}/PKG-INFO +6 -2
- {datachain-0.25.1 → datachain-0.26.0}/docs/commands/job/run.md +13 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/references/datachain.md +4 -0
- {datachain-0.25.1 → datachain-0.26.0}/examples/computer_vision/iptc_exif_xmp_lib.py +19 -23
- datachain-0.26.0/examples/computer_vision/llava2_image_desc_lib.py +71 -0
- {datachain-0.25.1 → datachain-0.26.0}/examples/computer_vision/openimage-detect.py +14 -12
- {datachain-0.25.1 → datachain-0.26.0}/examples/computer_vision/ultralytics-bbox.py +1 -1
- {datachain-0.25.1 → datachain-0.26.0}/examples/computer_vision/ultralytics-pose.py +1 -1
- {datachain-0.25.1 → datachain-0.26.0}/examples/computer_vision/ultralytics-segment.py +1 -1
- {datachain-0.25.1 → datachain-0.26.0}/examples/get_started/common_sql_functions.py +14 -18
- {datachain-0.25.1 → datachain-0.26.0}/examples/get_started/json-csv-reader.py +9 -12
- {datachain-0.25.1 → datachain-0.26.0}/examples/get_started/torch-loader.py +2 -2
- {datachain-0.25.1 → datachain-0.26.0}/examples/get_started/udfs/parallel.py +9 -9
- datachain-0.26.0/examples/get_started/udfs/simple.py +21 -0
- {datachain-0.25.1 → datachain-0.26.0}/examples/get_started/udfs/stateful.py +15 -19
- datachain-0.26.0/examples/llm_and_nlp/claude-query.py +65 -0
- {datachain-0.25.1 → datachain-0.26.0}/examples/llm_and_nlp/hf-dataset-llm-eval.py +6 -6
- datachain-0.26.0/examples/multimodal/audio-to-text.py +62 -0
- {datachain-0.25.1 → datachain-0.26.0}/examples/multimodal/clip_inference.py +21 -7
- datachain-0.26.0/examples/multimodal/hf_pipeline.py +119 -0
- datachain-0.26.0/examples/multimodal/openai_image_desc_lib.py +58 -0
- {datachain-0.25.1 → datachain-0.26.0}/examples/multimodal/wds.py +18 -4
- {datachain-0.25.1 → datachain-0.26.0}/examples/multimodal/wds_filtered.py +19 -5
- {datachain-0.25.1 → datachain-0.26.0}/pyproject.toml +7 -2
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/__init__.py +6 -0
- datachain-0.26.0/src/datachain/lib/audio.py +151 -0
- datachain-0.26.0/src/datachain/lib/convert/sql_to_python.py +22 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/dc/datachain.py +125 -23
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/dc/datasets.py +1 -1
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/file.py +190 -1
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/model_store.py +8 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/signal_schema.py +47 -7
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/udf.py +17 -5
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/query/dataset.py +15 -9
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain.egg-info/PKG-INFO +6 -2
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain.egg-info/SOURCES.txt +5 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain.egg-info/requires.txt +6 -1
- datachain-0.26.0/tests/func/test_audio.py +115 -0
- datachain-0.26.0/tests/unit/lib/test_audio.py +265 -0
- datachain-0.26.0/tests/unit/lib/test_partition_by.py +552 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/lib/test_signal_schema.py +246 -7
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/lib/test_sql_to_python.py +3 -1
- datachain-0.25.1/examples/computer_vision/llava2_image_desc_lib.py +0 -86
- datachain-0.25.1/examples/get_started/udfs/simple.py +0 -19
- datachain-0.25.1/examples/llm_and_nlp/claude-query.py +0 -78
- datachain-0.25.1/examples/multimodal/hf_pipeline.py +0 -139
- datachain-0.25.1/examples/multimodal/openai_image_desc_lib.py +0 -93
- datachain-0.25.1/src/datachain/lib/convert/sql_to_python.py +0 -14
- {datachain-0.25.1 → datachain-0.26.0}/.cruft.json +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/.gitattributes +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/.github/codecov.yaml +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/.github/dependabot.yml +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/.github/workflows/release.yml +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/.github/workflows/tests.yml +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/.pre-commit-config.yaml +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/LICENSE +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/README.rst +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/assets/datachain.svg +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/commands/auth/login.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/commands/auth/logout.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/commands/auth/team.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/commands/auth/token.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/commands/index.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/commands/job/cancel.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/commands/job/clusters.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/commands/job/logs.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/commands/job/ls.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/contributing.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/examples.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/guide/db_migrations.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/guide/delta.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/guide/env.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/guide/index.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/guide/namespaces.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/guide/processing.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/guide/remotes.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/guide/retry.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/index.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/overrides/main.html +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/quick-start.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/references/data-types/file.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/references/data-types/index.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/references/data-types/pose.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/references/data-types/segment.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/references/func.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/references/index.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/references/toolkit.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/references/torch.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/references/udf.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/docs/tutorials.md +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/examples/incremental_processing/delta.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/examples/incremental_processing/retry.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/examples/incremental_processing/utils.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/mkdocs.yml +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/noxfile.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/setup.cfg +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/__main__.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/asyn.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/cache.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/catalog/catalog.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/cli/utils.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/client/__init__.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/client/azure.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/client/gcs.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/client/hf.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/client/local.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/client/s3.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/config.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/dataset.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/delta.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/error.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/fs/reference.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/fs/utils.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/func/__init__.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/func/array.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/func/base.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/func/conditional.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/func/func.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/func/numeric.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/func/path.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/func/random.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/func/string.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/func/window.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/job.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/clip.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/dc/__init__.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/dc/database.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/dc/records.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/dc/storage.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/hf.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/image.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/listing.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/namespaces.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/projects.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/settings.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/tar.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/text.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/utils.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/video.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/listing.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/model/__init__.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/model/bbox.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/model/pose.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/model/segment.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/model/utils.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/namespace.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/node.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/progress.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/project.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/py.typed +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/query/__init__.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/query/batch.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/query/metrics.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/query/params.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/query/queue.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/query/schema.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/query/session.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/query/udf.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/query/utils.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/remote/studio.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/script_meta.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/semver.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/sql/types.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/sql/utils.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/studio.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/telemetry.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain/utils.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/__init__.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/conftest.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/data.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/examples/__init__.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/examples/test_examples.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/examples/wds_data.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/__init__.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/data/lena.jpg +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/functions/__init__.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/functions/test_aggregate.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/functions/test_array.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/functions/test_conditional.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/functions/test_numeric.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/functions/test_path.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/functions/test_random.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/functions/test_string.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/model/__init__.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/test_batching.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/test_catalog.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/test_client.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/test_data_storage.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/test_datachain.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/test_datasets.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/test_delta.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/test_file.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/test_hf.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/test_image.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/test_listing.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/test_ls.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/test_metastore.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/test_metrics.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/test_pull.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/test_pytorch.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/test_query.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/test_read_database.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/test_read_dataset_remote.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/test_read_dataset_version_specifiers.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/test_retry.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/test_session.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/test_toolkit.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/test_video.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/func/test_warehouse.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/scripts/feature_class.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/test_atomicity.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/test_cli_e2e.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/test_cli_studio.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/test_import_time.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/test_query_e2e.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/test_telemetry.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/__init__.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/lib/test_datachain.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/lib/test_namespace.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/lib/test_project.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/lib/test_udf.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/model/__init__.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/test_asyn.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/test_cache.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/test_catalog.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/test_client.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/test_config.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/test_dataset.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/test_func.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/test_listing.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/test_metastore.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/test_query.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/test_query_params.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/test_semver.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/test_serializer.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/test_session.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/test_utils.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.25.1 → datachain-0.26.0}/tests/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.26.0
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -63,6 +63,9 @@ Provides-Extra: torch
|
|
|
63
63
|
Requires-Dist: torch>=2.1.0; extra == "torch"
|
|
64
64
|
Requires-Dist: torchvision; extra == "torch"
|
|
65
65
|
Requires-Dist: transformers>=4.36.0; extra == "torch"
|
|
66
|
+
Provides-Extra: audio
|
|
67
|
+
Requires-Dist: torchaudio; extra == "audio"
|
|
68
|
+
Requires-Dist: soundfile; extra == "audio"
|
|
66
69
|
Provides-Extra: remote
|
|
67
70
|
Requires-Dist: lz4; extra == "remote"
|
|
68
71
|
Requires-Dist: requests>=2.22.0; extra == "remote"
|
|
@@ -78,7 +81,7 @@ Requires-Dist: ffmpeg-python; extra == "video"
|
|
|
78
81
|
Requires-Dist: imageio[ffmpeg,pyav]>=2.37.0; extra == "video"
|
|
79
82
|
Requires-Dist: opencv-python; extra == "video"
|
|
80
83
|
Provides-Extra: tests
|
|
81
|
-
Requires-Dist: datachain[hf,remote,torch,vector,video]; extra == "tests"
|
|
84
|
+
Requires-Dist: datachain[audio,hf,remote,torch,vector,video]; extra == "tests"
|
|
82
85
|
Requires-Dist: pytest<9,>=8; extra == "tests"
|
|
83
86
|
Requires-Dist: pytest-sugar>=0.9.6; extra == "tests"
|
|
84
87
|
Requires-Dist: pytest-cov>=4.1.0; extra == "tests"
|
|
@@ -108,6 +111,7 @@ Requires-Dist: accelerate; extra == "examples"
|
|
|
108
111
|
Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
|
|
109
112
|
Requires-Dist: ultralytics; extra == "examples"
|
|
110
113
|
Requires-Dist: open_clip_torch; extra == "examples"
|
|
114
|
+
Requires-Dist: openai; extra == "examples"
|
|
111
115
|
Dynamic: license-file
|
|
112
116
|
|
|
113
117
|
================
|
|
@@ -31,6 +31,7 @@ This command runs a job in Studio using the specified query file. You can config
|
|
|
31
31
|
* `--req-file REQ_FILE` - Python requirements file
|
|
32
32
|
* `--req REQ` - Python package requirements
|
|
33
33
|
* `--priority PRIORITY` - Priority for the job in range 0-5. Lower value is higher priority (default: 5)
|
|
34
|
+
* `--repository URL` - Repository URL to clone before running the job.
|
|
34
35
|
* `-h`, `--help` - Show the help message and exit.
|
|
35
36
|
* `-v`, `--verbose` - Be verbose.
|
|
36
37
|
* `-q`, `--quiet` - Be quiet.
|
|
@@ -67,6 +68,18 @@ datachain job run --env API_KEY=123 --req pandas numpy query.py
|
|
|
67
68
|
datachain job run --repository https://github.com/iterative/datachain query.py
|
|
68
69
|
```
|
|
69
70
|
|
|
71
|
+
To specify a branch / revision:
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
datachain job run --repository https://github.com/iterative/datachain@main query.py
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Git URLs are also supported:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
datachain job run --repository git@github.com:iterative/datachain.git@main query.py
|
|
81
|
+
```
|
|
82
|
+
|
|
70
83
|
7. Run a job with higher priority
|
|
71
84
|
```bash
|
|
72
85
|
datachain job run --priority 2 query.py
|
|
@@ -15,6 +15,10 @@ for examples of how to create a chain.
|
|
|
15
15
|
|
|
16
16
|
::: datachain.lib.dc.datasets.datasets
|
|
17
17
|
|
|
18
|
+
::: datachain.lib.dc.datasets.delete_dataset
|
|
19
|
+
|
|
20
|
+
::: datachain.lib.dc.datasets.move_dataset
|
|
21
|
+
|
|
18
22
|
::: datachain.lib.dc.hf.read_hf
|
|
19
23
|
|
|
20
24
|
::: datachain.lib.dc.json.read_json
|
|
@@ -5,8 +5,6 @@ To install the required dependencies:
|
|
|
5
5
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
import json
|
|
9
|
-
|
|
10
8
|
from PIL import (
|
|
11
9
|
ExifTags,
|
|
12
10
|
IptcImagePlugin,
|
|
@@ -14,8 +12,7 @@ from PIL import (
|
|
|
14
12
|
)
|
|
15
13
|
|
|
16
14
|
import datachain as dc
|
|
17
|
-
|
|
18
|
-
source = "gs://datachain-demo/open-images-v6/"
|
|
15
|
+
from datachain import C, DataModel, File
|
|
19
16
|
|
|
20
17
|
|
|
21
18
|
def cast(v): # to JSON serializable types
|
|
@@ -34,16 +31,21 @@ def cast(v): # to JSON serializable types
|
|
|
34
31
|
return v
|
|
35
32
|
|
|
36
33
|
|
|
37
|
-
|
|
38
|
-
|
|
34
|
+
class ImageDescription(DataModel):
|
|
35
|
+
xmp: dict
|
|
36
|
+
exif: dict
|
|
37
|
+
iptc: dict
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def image_description(file: File) -> tuple[ImageDescription, str]:
|
|
41
|
+
xmp, exif, iptc = {}, {}, {}
|
|
39
42
|
try:
|
|
40
43
|
img = file.read()
|
|
41
44
|
xmp = img.getxmp()
|
|
42
45
|
img_exif = img.getexif()
|
|
43
46
|
img_iptc = IptcImagePlugin.getiptcinfo(img)
|
|
44
47
|
except Exception as err: # noqa: BLE001
|
|
45
|
-
|
|
46
|
-
return ({}, {}, {}, error)
|
|
48
|
+
return ImageDescription(xmp={}, exif={}, iptc={}), str(err)
|
|
47
49
|
|
|
48
50
|
if img_iptc:
|
|
49
51
|
for k, v in img_iptc.items():
|
|
@@ -57,26 +59,20 @@ def image_description(file):
|
|
|
57
59
|
if k in ExifTags.GPSTAGS:
|
|
58
60
|
exif[ExifTags.GPSTAGS[k]] = v
|
|
59
61
|
|
|
60
|
-
return (
|
|
61
|
-
json.dumps(xmp),
|
|
62
|
-
json.dumps(exif),
|
|
63
|
-
json.dumps(iptc),
|
|
64
|
-
"",
|
|
65
|
-
)
|
|
62
|
+
return (ImageDescription(xmp=xmp, exif=exif, iptc=iptc), "")
|
|
66
63
|
|
|
67
64
|
|
|
68
65
|
if __name__ == "__main__":
|
|
69
66
|
(
|
|
70
|
-
dc.read_storage(
|
|
71
|
-
.
|
|
72
|
-
.filter(dc.C("file.path").glob("*.jpg"))
|
|
67
|
+
dc.read_storage("gs://datachain-demo/open-images-v6/", type="image", anon=True)
|
|
68
|
+
.filter(C("file.path").glob("*.jpg"))
|
|
73
69
|
.limit(5000)
|
|
74
|
-
.
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
70
|
+
.settings(parallel=True)
|
|
71
|
+
.map(image_description, output=("description", "error"))
|
|
72
|
+
.filter(
|
|
73
|
+
(C("description.xmp") != "{}")
|
|
74
|
+
| (C("description.exif") != "{}")
|
|
75
|
+
| (C("description.iptc") != "{}")
|
|
78
76
|
)
|
|
79
|
-
.select("file.path", "xmp", "exif", "iptc", "error")
|
|
80
|
-
.filter((dc.C("xmp") != "{}") | (dc.C("exif") != "{}") | (dc.C("iptc") != "{}"))
|
|
81
77
|
.show()
|
|
82
78
|
)
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""
|
|
2
|
+
To install the required dependencies:
|
|
3
|
+
|
|
4
|
+
pip install datachain[examples]
|
|
5
|
+
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import torch
|
|
9
|
+
from transformers import (
|
|
10
|
+
AutoProcessor,
|
|
11
|
+
LlavaForConditionalGeneration,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
import datachain as dc
|
|
15
|
+
from datachain import C, File
|
|
16
|
+
|
|
17
|
+
model = "llava-hf/llava-1.5-7b-hf"
|
|
18
|
+
|
|
19
|
+
# HuggingFace supports the following base models:
|
|
20
|
+
#
|
|
21
|
+
# "llava-hf/llava-1.5-7b-hf"
|
|
22
|
+
# "llava-hf/llava-1.5-13b-hf"
|
|
23
|
+
# "llava-hf/bakLlava-v1-hf"
|
|
24
|
+
#
|
|
25
|
+
# https://huggingface.co/llava-hf
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# Probably this code can be written with HF pipeline
|
|
29
|
+
# but we keep it a bit more low-level for the sake of example.
|
|
30
|
+
class LLaVaProcessor:
|
|
31
|
+
def __init__(self, model_name, max_tokens=300):
|
|
32
|
+
if torch.cuda.is_available():
|
|
33
|
+
self.device = "cuda"
|
|
34
|
+
self.dtype = torch.float16
|
|
35
|
+
else:
|
|
36
|
+
self.device = "cpu"
|
|
37
|
+
self.dtype = torch.float32
|
|
38
|
+
|
|
39
|
+
self.model_name = model_name
|
|
40
|
+
self.max_tokens = max_tokens
|
|
41
|
+
self.prompt = "USER: <image>\nDescribe this picture\nASSISTANT:"
|
|
42
|
+
|
|
43
|
+
self.processor = AutoProcessor.from_pretrained(self.model_name)
|
|
44
|
+
self.model = LlavaForConditionalGeneration.from_pretrained(
|
|
45
|
+
self.model_name, torch_dtype=self.dtype, low_cpu_mem_usage=True
|
|
46
|
+
).to(self.device)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def process(processor: LLaVaProcessor, file: File) -> tuple[str, str]:
|
|
50
|
+
inputs = processor.processor(
|
|
51
|
+
text=processor.prompt, images=file.read(), return_tensors="pt"
|
|
52
|
+
).to(processor.device, processor.dtype)
|
|
53
|
+
|
|
54
|
+
generated_ids = processor.model.generate(
|
|
55
|
+
**inputs, max_new_tokens=processor.max_tokens
|
|
56
|
+
)
|
|
57
|
+
generated_text = processor.processor.batch_decode(
|
|
58
|
+
generated_ids, skip_special_tokens=True
|
|
59
|
+
)
|
|
60
|
+
desc = generated_text[0]
|
|
61
|
+
return desc.split("ASSISTANT:")[-1].strip(), ""
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
if __name__ == "__main__":
|
|
65
|
+
(
|
|
66
|
+
dc.read_storage("gs://datachain-demo/dogs-and-cats/", type="image", anon=True)
|
|
67
|
+
.filter(C("file.path").glob("*/cat*.jpg"))
|
|
68
|
+
.setup(processor=lambda: LLaVaProcessor(model_name=model))
|
|
69
|
+
.map(process, output=("description", "error"))
|
|
70
|
+
.show(5)
|
|
71
|
+
)
|
|
@@ -1,19 +1,23 @@
|
|
|
1
1
|
import json
|
|
2
|
+
from collections.abc import Iterator
|
|
2
3
|
|
|
3
4
|
from PIL import Image
|
|
4
5
|
|
|
5
6
|
import datachain as dc
|
|
6
|
-
from datachain import File, model
|
|
7
|
+
from datachain import C, File, model
|
|
7
8
|
from datachain.func import path
|
|
8
9
|
|
|
9
10
|
|
|
10
|
-
|
|
11
|
-
|
|
11
|
+
# Example showing extraction of bounding boxes from Open Images dataset
|
|
12
|
+
# that comes as pairs of JPG and JSON files.
|
|
13
|
+
def openimage_detect(file: list[File]) -> Iterator[tuple[File, model.BBox]]:
|
|
14
|
+
if len(file) != 2:
|
|
12
15
|
raise ValueError("Group jpg-json mismatch")
|
|
13
16
|
|
|
14
|
-
stream_jpg =
|
|
15
|
-
stream_json =
|
|
16
|
-
|
|
17
|
+
stream_jpg = file[0]
|
|
18
|
+
stream_json = file[1]
|
|
19
|
+
source = stream_jpg.source
|
|
20
|
+
if stream_jpg.get_file_ext() != "jpg":
|
|
17
21
|
stream_jpg, stream_json = stream_json, stream_jpg
|
|
18
22
|
|
|
19
23
|
with stream_jpg.open() as fd:
|
|
@@ -38,16 +42,14 @@ def openimage_detect(args):
|
|
|
38
42
|
yield fstream, bbox
|
|
39
43
|
|
|
40
44
|
|
|
41
|
-
source = "gs://datachain-demo/openimages-v6-test-jsonpairs/"
|
|
42
|
-
|
|
43
45
|
(
|
|
44
|
-
dc.read_storage(
|
|
45
|
-
.filter(
|
|
46
|
+
dc.read_storage("gs://datachain-demo/openimages-v6-test-jsonpairs/", anon=True)
|
|
47
|
+
.filter(C("file.path").glob("*.jpg") | C("file.path").glob("*.json"))
|
|
48
|
+
.settings(cache=True, parallel=True)
|
|
46
49
|
.agg(
|
|
47
50
|
openimage_detect,
|
|
48
51
|
partition_by=path.file_stem("file.path"),
|
|
49
|
-
|
|
50
|
-
output={"file": File, "bbox": model.BBox},
|
|
52
|
+
output=("file", "bbox"),
|
|
51
53
|
)
|
|
52
54
|
.show()
|
|
53
55
|
)
|
|
@@ -10,7 +10,7 @@ def process_bboxes(yolo: YOLO, file: dc.File) -> YoloBBoxes:
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
(
|
|
13
|
-
dc.read_storage("gs://datachain-demo/openimages-v6-test-jsonpairs/")
|
|
13
|
+
dc.read_storage("gs://datachain-demo/openimages-v6-test-jsonpairs/", anon=True)
|
|
14
14
|
.filter(dc.C("file.path").glob("*.jpg"))
|
|
15
15
|
.limit(20)
|
|
16
16
|
.setup(yolo=lambda: YOLO("yolo11n.pt"))
|
|
@@ -10,7 +10,7 @@ def process_poses(yolo: YOLO, file: dc.File) -> YoloPoses:
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
(
|
|
13
|
-
dc.read_storage("gs://datachain-demo/openimages-v6-test-jsonpairs/")
|
|
13
|
+
dc.read_storage("gs://datachain-demo/openimages-v6-test-jsonpairs/", anon=True)
|
|
14
14
|
.filter(dc.C("file.path").glob("*.jpg"))
|
|
15
15
|
.limit(20)
|
|
16
16
|
.setup(yolo=lambda: YOLO("yolo11n-pose.pt"))
|
|
@@ -10,7 +10,7 @@ def process_segments(yolo: YOLO, file: dc.File) -> YoloSegments:
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
(
|
|
13
|
-
dc.read_storage("gs://datachain-demo/openimages-v6-test-jsonpairs/")
|
|
13
|
+
dc.read_storage("gs://datachain-demo/openimages-v6-test-jsonpairs/", anon=True)
|
|
14
14
|
.filter(dc.C("file.path").glob("*.jpg"))
|
|
15
15
|
.limit(20)
|
|
16
16
|
.setup(yolo=lambda: YOLO("yolo11n-seg.pt"))
|
|
@@ -1,23 +1,19 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Example demonstrating showing functions (manipulating strings, paths, arrays)
|
|
3
|
+
that are translated directly to SQL (vectorized). They don't require heavy compute,
|
|
4
|
+
fetching object into cluster, etc.
|
|
5
|
+
"""
|
|
6
|
+
|
|
1
7
|
import datachain as dc
|
|
8
|
+
from datachain import C
|
|
2
9
|
from datachain.func import array, greatest, least, path, string
|
|
3
10
|
|
|
4
|
-
|
|
5
|
-
def num_chars_udf(file):
|
|
6
|
-
parts = file.name.split(".")
|
|
7
|
-
if len(parts) > 1:
|
|
8
|
-
return (list(parts[1]),)
|
|
9
|
-
return ([],)
|
|
10
|
-
|
|
11
|
-
|
|
12
11
|
chain = dc.read_storage("gs://datachain-demo/dogs-and-cats/", anon=True)
|
|
13
|
-
chain.map(num_chars_udf, params=["file"], output={"num_chars": list[str]}).select(
|
|
14
|
-
"file.path", "num_chars"
|
|
15
|
-
).show(5)
|
|
16
12
|
|
|
17
13
|
(
|
|
18
14
|
chain.mutate(
|
|
19
|
-
length=string.length(path.name(
|
|
20
|
-
parts=string.split(path.name(
|
|
15
|
+
length=string.length(path.name(C("file.path"))),
|
|
16
|
+
parts=string.split(path.name(C("file.path")), "."),
|
|
21
17
|
)
|
|
22
18
|
.select("file.path", "length", "parts")
|
|
23
19
|
.show(5)
|
|
@@ -25,14 +21,14 @@ chain.map(num_chars_udf, params=["file"], output={"num_chars": list[str]}).selec
|
|
|
25
21
|
|
|
26
22
|
(
|
|
27
23
|
chain.mutate(
|
|
28
|
-
stem=path.file_stem(
|
|
29
|
-
ext=path.file_ext(
|
|
24
|
+
stem=path.file_stem(C("file.path")),
|
|
25
|
+
ext=path.file_ext(C("file.path")),
|
|
30
26
|
)
|
|
31
27
|
.select("file.path", "stem", "ext")
|
|
32
28
|
.show(5)
|
|
33
29
|
)
|
|
34
30
|
|
|
35
|
-
parts = string.split(path.name(
|
|
31
|
+
parts = string.split(path.name(C("file.path")), ".")
|
|
36
32
|
chain = chain.mutate(
|
|
37
33
|
isdog=array.contains(parts, "dog"),
|
|
38
34
|
iscat=array.contains(parts, "cat"),
|
|
@@ -46,8 +42,8 @@ chain = chain.mutate(
|
|
|
46
42
|
|
|
47
43
|
(
|
|
48
44
|
chain.mutate(
|
|
49
|
-
greatest=greatest(chain.column("a"),
|
|
50
|
-
least=least(chain.column("a"),
|
|
45
|
+
greatest=greatest(chain.column("a"), C("b")),
|
|
46
|
+
least=least(chain.column("a"), C("b")),
|
|
51
47
|
)
|
|
52
48
|
.select("a", "b", "greatest", "least")
|
|
53
49
|
.show(10)
|
|
@@ -1,33 +1,25 @@
|
|
|
1
1
|
from typing import Optional
|
|
2
2
|
|
|
3
|
-
from pydantic import BaseModel
|
|
4
|
-
|
|
5
3
|
import datachain as dc
|
|
6
|
-
from datachain
|
|
4
|
+
from datachain import DataModel
|
|
7
5
|
from datachain.lib.meta_formats import gen_datamodel_code
|
|
8
6
|
|
|
9
7
|
|
|
10
8
|
# Sample model for static JSON model
|
|
11
|
-
class LicenseModel(
|
|
9
|
+
class LicenseModel(DataModel):
|
|
12
10
|
url: str
|
|
13
11
|
id: int
|
|
14
12
|
name: str
|
|
15
13
|
|
|
16
14
|
|
|
17
|
-
LicenseFeature = ModelStore.register(LicenseModel)
|
|
18
|
-
|
|
19
|
-
|
|
20
15
|
# Sample model for static CSV model
|
|
21
|
-
class ChatDialog(
|
|
16
|
+
class ChatDialog(DataModel):
|
|
22
17
|
id: Optional[int] = None
|
|
23
18
|
count: Optional[int] = None
|
|
24
19
|
sender: Optional[str] = None
|
|
25
20
|
text: Optional[str] = None
|
|
26
21
|
|
|
27
22
|
|
|
28
|
-
ChatFeature = ModelStore.register(ChatDialog)
|
|
29
|
-
|
|
30
|
-
|
|
31
23
|
def main():
|
|
32
24
|
# Dynamic JSONl schema from 2 objects
|
|
33
25
|
uri = "gs://datachain-demo/jsonl/object.jsonl"
|
|
@@ -53,7 +45,7 @@ def main():
|
|
|
53
45
|
|
|
54
46
|
# Static JSON schema test parsing 3/7 objects
|
|
55
47
|
static_json_ds = dc.read_json(
|
|
56
|
-
uri, jmespath="licenses", spec=
|
|
48
|
+
uri, jmespath="licenses", spec=LicenseModel, nrows=3, anon="True"
|
|
57
49
|
)
|
|
58
50
|
static_json_ds.show()
|
|
59
51
|
|
|
@@ -73,6 +65,11 @@ def main():
|
|
|
73
65
|
dynamic_csv_ds.print_schema()
|
|
74
66
|
dynamic_csv_ds.show()
|
|
75
67
|
|
|
68
|
+
print(
|
|
69
|
+
"Note: script might hang at the end due to https://github.com/apache/arrow/issues/43497"
|
|
70
|
+
)
|
|
71
|
+
print("Just press Ctrl+C to exit.")
|
|
72
|
+
|
|
76
73
|
|
|
77
74
|
if __name__ == "__main__":
|
|
78
75
|
main()
|
|
@@ -55,8 +55,8 @@ class CNN(nn.Module):
|
|
|
55
55
|
|
|
56
56
|
if __name__ == "__main__":
|
|
57
57
|
ds = (
|
|
58
|
-
dc.read_storage(STORAGE, type="image")
|
|
59
|
-
.settings(prefetch=25)
|
|
58
|
+
dc.read_storage(STORAGE, type="image", anon=True)
|
|
59
|
+
.settings(prefetch=25, cache=True)
|
|
60
60
|
.filter(dc.C("file.path").glob("*.jpg"))
|
|
61
61
|
.map(
|
|
62
62
|
label=lambda path: label_to_int(basename(path)[:3], CLASSES),
|
|
@@ -21,19 +21,19 @@ def fibonacci(n):
|
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
# Define the UDF:
|
|
24
|
-
def path_len_benchmark(path):
|
|
24
|
+
def path_len_benchmark(path: str) -> int:
|
|
25
25
|
# Run the fibonacci benchmark as an example of a single-threaded CPU-bound UDF
|
|
26
26
|
fibonacci(35)
|
|
27
27
|
if path.endswith(".json"):
|
|
28
|
-
return
|
|
28
|
+
return -1
|
|
29
29
|
return len(path)
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
# Run in chain
|
|
33
|
-
|
|
34
|
-
"gs://datachain-demo/dogs-and-cats/",
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
params=["file.path"]
|
|
38
|
-
|
|
39
|
-
)
|
|
33
|
+
(
|
|
34
|
+
dc.read_storage("gs://datachain-demo/dogs-and-cats/", anon=True)
|
|
35
|
+
# Try to disable to see the difference in performance
|
|
36
|
+
.settings(parallel=-1)
|
|
37
|
+
.map(path_len=path_len_benchmark, params=["file.path"])
|
|
38
|
+
.show()
|
|
39
|
+
)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import datachain as dc
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
# Define the UDF:
|
|
5
|
+
# DataChain figures out input and output types automatically
|
|
6
|
+
# based on the function signature and the data provided.
|
|
7
|
+
def path_len(path: str) -> int:
|
|
8
|
+
if path.endswith(".json"):
|
|
9
|
+
return -1
|
|
10
|
+
return len(path)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
if __name__ == "__main__":
|
|
14
|
+
# Process all the files in the storage bucket, using the UDF
|
|
15
|
+
# `read_storage` reads files from the specified path
|
|
16
|
+
# and returns a DataChain object that has `File` objects
|
|
17
|
+
(
|
|
18
|
+
dc.read_storage("gs://datachain-demo/dogs-and-cats/", anon=True)
|
|
19
|
+
.map(path_len=path_len, params=["file.path"])
|
|
20
|
+
.show()
|
|
21
|
+
)
|
|
@@ -7,41 +7,37 @@ To install the required dependencies:
|
|
|
7
7
|
|
|
8
8
|
import os
|
|
9
9
|
|
|
10
|
-
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
|
11
|
-
|
|
12
10
|
import open_clip
|
|
13
11
|
|
|
14
12
|
import datachain as dc
|
|
13
|
+
from datachain import C, File
|
|
14
|
+
|
|
15
|
+
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
|
15
16
|
|
|
16
17
|
|
|
17
|
-
class
|
|
18
|
+
class ClipImageEncoder:
|
|
18
19
|
def __init__(self, model_name: str, pretrained: str):
|
|
19
20
|
self.model_name = model_name
|
|
20
21
|
self.pretrained = pretrained
|
|
21
|
-
|
|
22
|
-
def setup(self):
|
|
23
22
|
self.model, _, self.preprocess = open_clip.create_model_and_transforms(
|
|
24
23
|
self.model_name, self.pretrained
|
|
25
24
|
)
|
|
26
25
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
26
|
+
|
|
27
|
+
def embeddings(file: File, encoder: ClipImageEncoder) -> list[float]:
|
|
28
|
+
img = file.read()
|
|
29
|
+
img = encoder.preprocess(img).unsqueeze(0)
|
|
30
|
+
emb = encoder.model.encode_image(img)
|
|
31
|
+
return emb[0].tolist()
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
if __name__ == "__main__":
|
|
35
|
-
# Run in chain
|
|
36
35
|
(
|
|
37
|
-
dc.read_storage("gs://datachain-demo/dogs-and-cats/", type="image")
|
|
38
|
-
.filter(
|
|
39
|
-
.settings(parallel=2)
|
|
36
|
+
dc.read_storage("gs://datachain-demo/dogs-and-cats/", type="image", anon=True)
|
|
37
|
+
.filter(C("file.path").glob("*cat*.jpg"))
|
|
40
38
|
.limit(5)
|
|
41
|
-
.
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
output={"emb": list[float]},
|
|
45
|
-
)
|
|
39
|
+
.settings(parallel=True)
|
|
40
|
+
.setup(encoder=lambda: ClipImageEncoder("ViT-B-32", "laion2b_s34b_b79k"))
|
|
41
|
+
.map(emb=embeddings)
|
|
46
42
|
.show()
|
|
47
43
|
)
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
|
|
4
|
+
import anthropic
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
import datachain as dc
|
|
8
|
+
from datachain import C, File
|
|
9
|
+
|
|
10
|
+
DATA = "gs://datachain-demo/chatbot-KiT"
|
|
11
|
+
MODEL = "claude-3-5-haiku-latest"
|
|
12
|
+
TEMPERATURE = 0.9
|
|
13
|
+
DEFAULT_OUTPUT_TOKENS = 1024
|
|
14
|
+
|
|
15
|
+
PROMPT = """Consider the dialogue between the 'user' and the 'bot'. The 'user' is a
|
|
16
|
+
human trying to find the best mobile plan. The 'bot' is a chatbot designed to query
|
|
17
|
+
the user and offer the best solution. The dialog is successful if the 'bot' is able to
|
|
18
|
+
gather the information and offer a plan, or inform the user that such plan does not
|
|
19
|
+
exist. The dialog is not successful if the conversation ends early or the 'user'
|
|
20
|
+
requests additional functions the 'bot' cannot perform. Read the dialogue below and
|
|
21
|
+
rate it 'Success' if it is successful, and 'Failure' if not. After that, provide
|
|
22
|
+
one-sentence explanation of the reasons for this rating. Use only JSON object as output
|
|
23
|
+
with the keys 'status', and 'explanation'.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
API_KEY = os.environ.get("ANTHROPIC_API_KEY")
|
|
27
|
+
|
|
28
|
+
if not API_KEY:
|
|
29
|
+
print("This example requires an Anthropic API key")
|
|
30
|
+
print("Add your key using the ANTHROPIC_API_KEY environment variable.")
|
|
31
|
+
sys.exit(0)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class Rating(BaseModel):
|
|
35
|
+
status: str = ""
|
|
36
|
+
explanation: str = ""
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def rate(client: anthropic.Anthropic, file: File) -> Rating:
|
|
40
|
+
content = file.read()
|
|
41
|
+
response = client.messages.create(
|
|
42
|
+
model=MODEL,
|
|
43
|
+
max_tokens=DEFAULT_OUTPUT_TOKENS,
|
|
44
|
+
system=PROMPT,
|
|
45
|
+
temperature=TEMPERATURE,
|
|
46
|
+
messages=[
|
|
47
|
+
{"role": "user", "content": f"{content}"},
|
|
48
|
+
],
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
first_block = response.content[0]
|
|
52
|
+
if first_block.type == "text":
|
|
53
|
+
return Rating.model_validate_json(first_block.text)
|
|
54
|
+
raise ValueError(f"Unexpected content block type: {first_block.type}")
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
(
|
|
58
|
+
dc.read_storage(DATA, type="text", anon=True)
|
|
59
|
+
.filter(C("file.path").glob("*.txt"))
|
|
60
|
+
.limit(4)
|
|
61
|
+
.settings(parallel=2, cache=True)
|
|
62
|
+
.setup(client=lambda: anthropic.Anthropic(api_key=API_KEY))
|
|
63
|
+
.map(rating=rate)
|
|
64
|
+
.show()
|
|
65
|
+
)
|
|
@@ -25,20 +25,20 @@ def eval_dialog(
|
|
|
25
25
|
) -> DialogEval:
|
|
26
26
|
try:
|
|
27
27
|
completion = client.chat_completion(
|
|
28
|
-
model="
|
|
28
|
+
model="HuggingFaceTB/SmolLM3-3B",
|
|
29
29
|
messages=[
|
|
30
30
|
{
|
|
31
31
|
"role": "user",
|
|
32
32
|
"content": f"{PROMPT}\n\nUser: {user_input}\nBot: {bot_response}",
|
|
33
33
|
},
|
|
34
34
|
],
|
|
35
|
-
response_format={
|
|
35
|
+
response_format={
|
|
36
|
+
"type": "json_schema",
|
|
37
|
+
"json_schema": {"schema": DialogEval.model_json_schema()},
|
|
38
|
+
},
|
|
36
39
|
)
|
|
37
40
|
except HTTPError as e:
|
|
38
|
-
return DialogEval(
|
|
39
|
-
result="Error",
|
|
40
|
-
reason=f"Error while interacting with the Hugging Face API. {e}",
|
|
41
|
-
)
|
|
41
|
+
return DialogEval(result="Error", reason=str(e))
|
|
42
42
|
|
|
43
43
|
message = completion.choices[0].message
|
|
44
44
|
try:
|