datachain 0.10.0__tar.gz → 0.11.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.10.0 → datachain-0.11.0}/.pre-commit-config.yaml +1 -1
- {datachain-0.10.0 → datachain-0.11.0}/PKG-INFO +3 -2
- {datachain-0.10.0 → datachain-0.11.0}/pyproject.toml +3 -2
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/file.py +6 -2
- datachain-0.11.0/src/datachain/script_meta.py +147 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain.egg-info/PKG-INFO +3 -2
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain.egg-info/SOURCES.txt +2 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain.egg-info/requires.txt +4 -1
- {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_datachain.py +21 -0
- datachain-0.11.0/tests/unit/test_script_meta.py +119 -0
- {datachain-0.10.0 → datachain-0.11.0}/.cruft.json +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/.gitattributes +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/.github/codecov.yaml +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/.github/dependabot.yml +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/.github/workflows/release.yml +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/.github/workflows/tests.yml +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/.gitignore +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/LICENSE +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/README.rst +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/docs/assets/datachain.svg +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/docs/contributing.md +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/docs/examples.md +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/docs/index.md +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/docs/overrides/main.html +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/docs/quick-start.md +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/docs/references/data-types/file.md +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/docs/references/data-types/index.md +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/docs/references/data-types/pose.md +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/docs/references/data-types/segment.md +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/docs/references/datachain.md +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/docs/references/func.md +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/docs/references/index.md +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/docs/references/toolkit.md +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/docs/references/torch.md +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/docs/references/udf.md +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/docs/tutorials.md +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/examples/multimodal/wds.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/mkdocs.yml +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/noxfile.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/setup.cfg +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/__init__.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/__main__.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/asyn.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/cache.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/catalog/catalog.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/cli/utils.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/client/__init__.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/client/azure.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/client/gcs.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/client/hf.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/client/local.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/client/s3.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/config.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/dataset.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/error.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/fs/reference.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/func/__init__.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/func/array.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/func/base.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/func/conditional.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/func/func.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/func/numeric.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/func/path.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/func/random.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/func/string.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/func/window.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/job.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/clip.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/dc.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/hf.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/image.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/listing.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/settings.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/tar.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/text.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/udf.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/utils.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/video.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/listing.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/model/__init__.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/model/bbox.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/model/pose.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/model/segment.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/node.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/progress.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/py.typed +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/query/__init__.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/query/batch.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/query/dataset.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/query/metrics.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/query/params.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/query/queue.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/query/schema.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/query/session.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/query/udf.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/query/utils.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/remote/studio.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/types.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/sql/utils.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/studio.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/telemetry.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain/utils.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/__init__.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/conftest.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/data.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/examples/__init__.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/examples/test_examples.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/examples/wds_data.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/func/__init__.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_catalog.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_client.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_data_storage.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_datasets.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_file.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_hf.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_listing.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_ls.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_metrics.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_pull.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_pytorch.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_query.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_session.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_toolkit.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/func/test_warehouse.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/scripts/feature_class.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/test_atomicity.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/test_cli_e2e.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/test_cli_studio.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/test_query_e2e.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/test_telemetry.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/__init__.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_datachain.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_models.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_video.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_asyn.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_cache.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_catalog.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_client.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_config.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_dataset.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_func.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_listing.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_metastore.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_query.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_query_params.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_serializer.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_session.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_utils.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.10.0 → datachain-0.11.0}/tests/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.11.0
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -49,6 +49,7 @@ Requires-Dist: platformdirs
|
|
|
49
49
|
Requires-Dist: dvc-studio-client<1,>=0.21
|
|
50
50
|
Requires-Dist: tabulate
|
|
51
51
|
Requires-Dist: websockets
|
|
52
|
+
Requires-Dist: tomli; python_version < "3.11"
|
|
52
53
|
Provides-Extra: docs
|
|
53
54
|
Requires-Dist: mkdocs>=1.5.2; extra == "docs"
|
|
54
55
|
Requires-Dist: mkdocs-gen-files>=0.5.0; extra == "docs"
|
|
@@ -102,7 +103,7 @@ Requires-Dist: datachain[tests]; extra == "examples"
|
|
|
102
103
|
Requires-Dist: defusedxml; extra == "examples"
|
|
103
104
|
Requires-Dist: accelerate; extra == "examples"
|
|
104
105
|
Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
|
|
105
|
-
Requires-Dist: ultralytics==8.3.
|
|
106
|
+
Requires-Dist: ultralytics==8.3.78; extra == "examples"
|
|
106
107
|
Requires-Dist: open_clip_torch; extra == "examples"
|
|
107
108
|
|
|
108
109
|
================
|
|
@@ -51,7 +51,8 @@ dependencies = [
|
|
|
51
51
|
"platformdirs",
|
|
52
52
|
"dvc-studio-client>=0.21,<1",
|
|
53
53
|
"tabulate",
|
|
54
|
-
"websockets"
|
|
54
|
+
"websockets",
|
|
55
|
+
"tomli;python_version<'3.11'"
|
|
55
56
|
]
|
|
56
57
|
|
|
57
58
|
[project.optional-dependencies]
|
|
@@ -118,7 +119,7 @@ examples = [
|
|
|
118
119
|
"defusedxml",
|
|
119
120
|
"accelerate",
|
|
120
121
|
"huggingface_hub[hf_transfer]",
|
|
121
|
-
"ultralytics==8.3.
|
|
122
|
+
"ultralytics==8.3.78",
|
|
122
123
|
"open_clip_torch"
|
|
123
124
|
]
|
|
124
125
|
|
|
@@ -272,8 +272,12 @@ class File(DataModel):
|
|
|
272
272
|
def save(self, destination: str):
|
|
273
273
|
"""Writes it's content to destination"""
|
|
274
274
|
destination = stringify_path(destination)
|
|
275
|
-
client: Client = self._catalog.get_client(
|
|
276
|
-
|
|
275
|
+
client: Client = self._catalog.get_client(destination)
|
|
276
|
+
|
|
277
|
+
if client.PREFIX == "file://" and not destination.startswith(client.PREFIX):
|
|
278
|
+
destination = Path(destination).absolute().as_uri()
|
|
279
|
+
|
|
280
|
+
client.upload(self.read(), destination)
|
|
277
281
|
|
|
278
282
|
def _symlink_to(self, destination: str):
|
|
279
283
|
if self.location:
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Any, Optional
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
import tomllib
|
|
7
|
+
except ModuleNotFoundError:
|
|
8
|
+
# tomllib is in standard library from python 3.11 so for earlier versions
|
|
9
|
+
# we need tomli
|
|
10
|
+
import tomli as tomllib # type: ignore[no-redef]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ScriptConfigParsingError(Exception):
|
|
14
|
+
def __init__(self, message):
|
|
15
|
+
super().__init__(message)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class ScriptConfig:
|
|
20
|
+
"""
|
|
21
|
+
Class that is parsing inline script metadata to get some basic information for
|
|
22
|
+
running datachain script like python version, dependencies, attachments etc.
|
|
23
|
+
Inline script metadata must follow the format described in https://packaging.python.org/en/latest/specifications/inline-script-metadata/#inline-script-metadata.
|
|
24
|
+
Example of script with inline metadata:
|
|
25
|
+
# /// script
|
|
26
|
+
# requires-python = ">=3.12"
|
|
27
|
+
#
|
|
28
|
+
# dependencies = [
|
|
29
|
+
# "pandas < 2.1.0",
|
|
30
|
+
# "numpy == 1.26.4"
|
|
31
|
+
# ]
|
|
32
|
+
#
|
|
33
|
+
# [tools.datachain.workers]
|
|
34
|
+
# num_workers = 3
|
|
35
|
+
#
|
|
36
|
+
# [tools.datachain.attachments]
|
|
37
|
+
# image1 = "s3://ldb-public/image1.jpg"
|
|
38
|
+
# file1 = "s3://ldb-public/file.pdf"
|
|
39
|
+
#
|
|
40
|
+
# [tools.datachain.params]
|
|
41
|
+
# min_length_sec = 1
|
|
42
|
+
# cache = false
|
|
43
|
+
#
|
|
44
|
+
# [tools.datachain.inputs]
|
|
45
|
+
# threshold = 0.5
|
|
46
|
+
# start_ds_name = "ds://start"
|
|
47
|
+
#
|
|
48
|
+
# [tools.datachain.outputs]
|
|
49
|
+
# result_dataset = "ds://res"
|
|
50
|
+
# result_dir = "/temp"
|
|
51
|
+
#
|
|
52
|
+
# ///
|
|
53
|
+
|
|
54
|
+
import sys
|
|
55
|
+
import pandas as pd
|
|
56
|
+
|
|
57
|
+
print(f"Python version: {sys.version_info}")
|
|
58
|
+
print(f"Pandas version: {pd.__version__}")
|
|
59
|
+
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
python_version: Optional[str]
|
|
63
|
+
dependencies: list[str]
|
|
64
|
+
attachments: dict[str, str]
|
|
65
|
+
params: dict[str, Any]
|
|
66
|
+
inputs: dict[str, Any]
|
|
67
|
+
outputs: dict[str, Any]
|
|
68
|
+
num_workers: Optional[int] = None
|
|
69
|
+
|
|
70
|
+
def __init__(
|
|
71
|
+
self,
|
|
72
|
+
python_version: Optional[str] = None,
|
|
73
|
+
dependencies: Optional[list[str]] = None,
|
|
74
|
+
attachments: Optional[dict[str, str]] = None,
|
|
75
|
+
params: Optional[dict[str, Any]] = None,
|
|
76
|
+
inputs: Optional[dict[str, Any]] = None,
|
|
77
|
+
outputs: Optional[dict[str, Any]] = None,
|
|
78
|
+
num_workers: Optional[int] = None,
|
|
79
|
+
):
|
|
80
|
+
self.python_version = python_version
|
|
81
|
+
self.dependencies = dependencies or []
|
|
82
|
+
self.attachments = attachments or {}
|
|
83
|
+
self.params = params or {}
|
|
84
|
+
self.inputs = inputs or {}
|
|
85
|
+
self.outputs = outputs or {}
|
|
86
|
+
self.num_workers = num_workers
|
|
87
|
+
|
|
88
|
+
def get_param(self, name: str, default: Any) -> Any:
|
|
89
|
+
return self.params.get(name, default)
|
|
90
|
+
|
|
91
|
+
def get_input(self, name: str, default: Any) -> Any:
|
|
92
|
+
return self.inputs.get(name, default)
|
|
93
|
+
|
|
94
|
+
def get_output(self, name: str, default: Any) -> Any:
|
|
95
|
+
return self.outputs.get(name, default)
|
|
96
|
+
|
|
97
|
+
def get_attachment(self, name: str, default: Any) -> Any:
|
|
98
|
+
return self.attachments.get(name, default)
|
|
99
|
+
|
|
100
|
+
@staticmethod
|
|
101
|
+
def read(script: str) -> Optional[dict]:
|
|
102
|
+
"""Converts inline script metadata to dict with all found data"""
|
|
103
|
+
regex = (
|
|
104
|
+
r"(?m)^# \/\/\/ (?P<type>[a-zA-Z0-9-]+)[ \t]*$[\r\n|\r|\n]"
|
|
105
|
+
"(?P<content>(?:^#(?:| .*)$[\r\n|\r|\n])+)^# \\/\\/\\/[ \t]*$"
|
|
106
|
+
)
|
|
107
|
+
name = "script"
|
|
108
|
+
matches = list(
|
|
109
|
+
filter(lambda m: m.group("type") == name, re.finditer(regex, script))
|
|
110
|
+
)
|
|
111
|
+
if len(matches) > 1:
|
|
112
|
+
raise ValueError(f"Multiple {name} blocks found")
|
|
113
|
+
if len(matches) == 1:
|
|
114
|
+
content = "".join(
|
|
115
|
+
line[2:] if line.startswith("# ") else line[1:]
|
|
116
|
+
for line in matches[0].group("content").splitlines(keepends=True)
|
|
117
|
+
)
|
|
118
|
+
return tomllib.loads(content)
|
|
119
|
+
return None
|
|
120
|
+
|
|
121
|
+
@staticmethod
|
|
122
|
+
def parse(script: str) -> Optional["ScriptConfig"]:
|
|
123
|
+
"""
|
|
124
|
+
Method that is parsing inline script metadata from datachain script and
|
|
125
|
+
instantiating ScriptConfig class with found data. If no inline metadata is
|
|
126
|
+
found, it returns None
|
|
127
|
+
"""
|
|
128
|
+
try:
|
|
129
|
+
meta = ScriptConfig.read(script)
|
|
130
|
+
if not meta:
|
|
131
|
+
return None
|
|
132
|
+
custom = meta.get("tools", {}).get("datachain", {})
|
|
133
|
+
return ScriptConfig(
|
|
134
|
+
python_version=meta.get("requires-python"),
|
|
135
|
+
dependencies=meta.get("dependencies"),
|
|
136
|
+
num_workers=custom.get("workers", {}).get("num_workers"),
|
|
137
|
+
attachments=custom.get("attachments"),
|
|
138
|
+
params={k: str(v) for k, v in custom.get("params").items()}
|
|
139
|
+
if custom.get("params")
|
|
140
|
+
else None,
|
|
141
|
+
inputs=custom.get("inputs"),
|
|
142
|
+
outputs=custom.get("outputs"),
|
|
143
|
+
)
|
|
144
|
+
except Exception as e:
|
|
145
|
+
raise ScriptConfigParsingError(
|
|
146
|
+
f"Error when parsing script meta: {e}"
|
|
147
|
+
) from e
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.11.0
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -49,6 +49,7 @@ Requires-Dist: platformdirs
|
|
|
49
49
|
Requires-Dist: dvc-studio-client<1,>=0.21
|
|
50
50
|
Requires-Dist: tabulate
|
|
51
51
|
Requires-Dist: websockets
|
|
52
|
+
Requires-Dist: tomli; python_version < "3.11"
|
|
52
53
|
Provides-Extra: docs
|
|
53
54
|
Requires-Dist: mkdocs>=1.5.2; extra == "docs"
|
|
54
55
|
Requires-Dist: mkdocs-gen-files>=0.5.0; extra == "docs"
|
|
@@ -102,7 +103,7 @@ Requires-Dist: datachain[tests]; extra == "examples"
|
|
|
102
103
|
Requires-Dist: defusedxml; extra == "examples"
|
|
103
104
|
Requires-Dist: accelerate; extra == "examples"
|
|
104
105
|
Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
|
|
105
|
-
Requires-Dist: ultralytics==8.3.
|
|
106
|
+
Requires-Dist: ultralytics==8.3.78; extra == "examples"
|
|
106
107
|
Requires-Dist: open_clip_torch; extra == "examples"
|
|
107
108
|
|
|
108
109
|
================
|
|
@@ -77,6 +77,7 @@ src/datachain/nodes_fetcher.py
|
|
|
77
77
|
src/datachain/nodes_thread_pool.py
|
|
78
78
|
src/datachain/progress.py
|
|
79
79
|
src/datachain/py.typed
|
|
80
|
+
src/datachain/script_meta.py
|
|
80
81
|
src/datachain/studio.py
|
|
81
82
|
src/datachain/telemetry.py
|
|
82
83
|
src/datachain/utils.py
|
|
@@ -279,6 +280,7 @@ tests/unit/test_pytorch.py
|
|
|
279
280
|
tests/unit/test_query.py
|
|
280
281
|
tests/unit/test_query_metrics.py
|
|
281
282
|
tests/unit/test_query_params.py
|
|
283
|
+
tests/unit/test_script_meta.py
|
|
282
284
|
tests/unit/test_serializer.py
|
|
283
285
|
tests/unit/test_session.py
|
|
284
286
|
tests/unit/test_utils.py
|
|
@@ -32,6 +32,9 @@ dvc-studio-client<1,>=0.21
|
|
|
32
32
|
tabulate
|
|
33
33
|
websockets
|
|
34
34
|
|
|
35
|
+
[:python_version < "3.11"]
|
|
36
|
+
tomli
|
|
37
|
+
|
|
35
38
|
[dev]
|
|
36
39
|
datachain[docs,tests]
|
|
37
40
|
mypy==1.15.0
|
|
@@ -55,7 +58,7 @@ datachain[tests]
|
|
|
55
58
|
defusedxml
|
|
56
59
|
accelerate
|
|
57
60
|
huggingface_hub[hf_transfer]
|
|
58
|
-
ultralytics==8.3.
|
|
61
|
+
ultralytics==8.3.78
|
|
59
62
|
open_clip_torch
|
|
60
63
|
|
|
61
64
|
[hf]
|
|
@@ -358,6 +358,27 @@ def test_export_images_files(test_session, tmp_dir, tmp_path, use_cache):
|
|
|
358
358
|
assert images_equal(img["data"], exported_img)
|
|
359
359
|
|
|
360
360
|
|
|
361
|
+
def test_to_storage_relative_path(test_session, tmp_path):
|
|
362
|
+
images = [
|
|
363
|
+
{"name": "img1.jpg", "data": Image.new(mode="RGB", size=(64, 64))},
|
|
364
|
+
{"name": "img2.jpg", "data": Image.new(mode="RGB", size=(128, 128))},
|
|
365
|
+
]
|
|
366
|
+
|
|
367
|
+
for img in images:
|
|
368
|
+
img["data"].save(tmp_path / img["name"])
|
|
369
|
+
|
|
370
|
+
DataChain.from_values(
|
|
371
|
+
file=[
|
|
372
|
+
ImageFile(path=img["name"], source=f"file://{tmp_path}") for img in images
|
|
373
|
+
],
|
|
374
|
+
session=test_session,
|
|
375
|
+
).to_storage("output", placement="filename")
|
|
376
|
+
|
|
377
|
+
for img in images:
|
|
378
|
+
exported_img = Image.open(Path("output") / img["name"])
|
|
379
|
+
assert images_equal(img["data"], exported_img)
|
|
380
|
+
|
|
381
|
+
|
|
361
382
|
def test_to_storage_files_filename_placement_not_unique_files(tmp_dir, test_session):
|
|
362
383
|
data = b"some\x00data\x00is\x48\x65\x6c\x57\x6f\x72\x6c\x64\xff\xffheRe"
|
|
363
384
|
bucket_name = "mybucket"
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from datachain.script_meta import ScriptConfig, ScriptConfigParsingError
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_parsing_all_fields():
|
|
7
|
+
script = """
|
|
8
|
+
# /// script
|
|
9
|
+
# requires-python = ">=3.12"
|
|
10
|
+
#
|
|
11
|
+
# dependencies = [
|
|
12
|
+
# "pandas < 2.1.0",
|
|
13
|
+
# "numpy == 1.26.4"
|
|
14
|
+
# ]
|
|
15
|
+
#
|
|
16
|
+
# [tools.datachain.workers]
|
|
17
|
+
# num_workers = 3
|
|
18
|
+
#
|
|
19
|
+
# [tools.datachain.attachments]
|
|
20
|
+
# image1 = "s3://ldb-public/image1.jpg"
|
|
21
|
+
# file1 = "s3://ldb-public/file.pdf"
|
|
22
|
+
#
|
|
23
|
+
# [tools.datachain.params]
|
|
24
|
+
# min_length_sec = 1
|
|
25
|
+
# cache = false
|
|
26
|
+
#
|
|
27
|
+
# [tools.datachain.inputs]
|
|
28
|
+
# threshold = 0.5
|
|
29
|
+
# start_ds_name = "ds://start"
|
|
30
|
+
#
|
|
31
|
+
# [tools.datachain.outputs]
|
|
32
|
+
# result_dataset = "ds://res"
|
|
33
|
+
# result_dir = "/temp"
|
|
34
|
+
#
|
|
35
|
+
# ///
|
|
36
|
+
import sys
|
|
37
|
+
import pandas as pd
|
|
38
|
+
|
|
39
|
+
print(f"Python version: {sys.version_info}")
|
|
40
|
+
print(f"Pandas version: {pd.__version__}")
|
|
41
|
+
"""
|
|
42
|
+
sm = ScriptConfig.parse(script)
|
|
43
|
+
assert sm == ScriptConfig(
|
|
44
|
+
python_version=">=3.12",
|
|
45
|
+
dependencies=["pandas < 2.1.0", "numpy == 1.26.4"],
|
|
46
|
+
attachments={
|
|
47
|
+
"image1": "s3://ldb-public/image1.jpg",
|
|
48
|
+
"file1": "s3://ldb-public/file.pdf",
|
|
49
|
+
},
|
|
50
|
+
params={"min_length_sec": "1", "cache": "False"},
|
|
51
|
+
inputs={"threshold": 0.5, "start_ds_name": "ds://start"},
|
|
52
|
+
outputs={"result_dataset": "ds://res", "result_dir": "/temp"},
|
|
53
|
+
num_workers=3,
|
|
54
|
+
)
|
|
55
|
+
assert sm.get_param("non_existing", "default") == "default"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def test_parsing_no_metadata():
|
|
59
|
+
script = """
|
|
60
|
+
import sys
|
|
61
|
+
import pandas as pd
|
|
62
|
+
|
|
63
|
+
print(f"Python version: {sys.version_info}")
|
|
64
|
+
print(f"Pandas version: {pd.__version__}")
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
assert ScriptConfig.parse(script) is None
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def test_parsing_empty():
|
|
71
|
+
script = """
|
|
72
|
+
# /// script
|
|
73
|
+
# ///
|
|
74
|
+
import sys
|
|
75
|
+
import pandas as pd
|
|
76
|
+
|
|
77
|
+
print(f"Python version: {sys.version_info}")
|
|
78
|
+
print(f"Pandas version: {pd.__version__}")
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
assert ScriptConfig.parse(script) is None
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def test_parsing_only_python_version():
|
|
85
|
+
script = """
|
|
86
|
+
# /// script
|
|
87
|
+
# requires-python = ">=3.12"
|
|
88
|
+
# ///
|
|
89
|
+
import sys
|
|
90
|
+
import pandas as pd
|
|
91
|
+
|
|
92
|
+
print(f"Python version: {sys.version_info}")
|
|
93
|
+
print(f"Pandas version: {pd.__version__}")
|
|
94
|
+
"""
|
|
95
|
+
assert ScriptConfig.parse(script) == ScriptConfig(
|
|
96
|
+
python_version=">=3.12",
|
|
97
|
+
dependencies=[],
|
|
98
|
+
attachments={},
|
|
99
|
+
params={},
|
|
100
|
+
num_workers=None,
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def test_error_when_parsing():
|
|
105
|
+
script = """
|
|
106
|
+
# /// script
|
|
107
|
+
# dependencies = [}
|
|
108
|
+
# ///
|
|
109
|
+
import sys
|
|
110
|
+
import pandas as pd
|
|
111
|
+
|
|
112
|
+
print(f"Python version: {sys.version_info}")
|
|
113
|
+
print(f"Pandas version: {pd.__version__}")
|
|
114
|
+
"""
|
|
115
|
+
with pytest.raises(ScriptConfigParsingError) as excinfo:
|
|
116
|
+
ScriptConfig.parse(script)
|
|
117
|
+
assert str(excinfo.value) == (
|
|
118
|
+
"Error when parsing script meta: Invalid value (at line 1, column 17)"
|
|
119
|
+
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|