datachain 0.2.14__tar.gz → 0.2.16__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.2.14 → datachain-0.2.16}/.pre-commit-config.yaml +2 -2
- {datachain-0.2.14/src/datachain.egg-info → datachain-0.2.16}/PKG-INFO +1 -1
- {datachain-0.2.14 → datachain-0.2.16}/docs/index.md +2 -3
- {datachain-0.2.14 → datachain-0.2.16}/docs/references/udf.md +2 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/computer_vision/iptc_exif_xmp_lib.py +2 -1
- {datachain-0.2.14 → datachain-0.2.16}/examples/computer_vision/openimage-detect.py +1 -1
- {datachain-0.2.14 → datachain-0.2.16}/examples/get_started/json-csv-reader.py +7 -17
- datachain-0.2.16/examples/get_started/json-metadata-tutorial.ipynb +1949 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/get_started/torch-loader.py +1 -1
- datachain-0.2.16/examples/llm/llm_chatbot_evaluation.ipynb +683 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/llm_and_nlp/llm-claude-aggregate-query.py +6 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/llm_and_nlp/llm-claude-simple-query.py +6 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/llm_and_nlp/llm-claude.py +7 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/multimodal/wds.py +20 -11
- {datachain-0.2.14 → datachain-0.2.16}/examples/multimodal/wds_filtered.py +1 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/catalog/catalog.py +5 -7
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/cli.py +1 -1
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/data_storage/metastore.py +2 -2
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/data_storage/sqlite.py +21 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/data_storage/warehouse.py +28 -8
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/lib/arrow.py +27 -8
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/lib/convert/flatten.py +10 -5
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/lib/convert/python_to_sql.py +1 -1
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/lib/data_model.py +6 -1
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/lib/dc.py +102 -32
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/lib/meta_formats.py +6 -6
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/lib/settings.py +1 -17
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/lib/signal_schema.py +4 -1
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/lib/udf.py +18 -10
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/query/dataset.py +10 -46
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/sql/types.py +5 -1
- {datachain-0.2.14 → datachain-0.2.16/src/datachain.egg-info}/PKG-INFO +1 -1
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain.egg-info/SOURCES.txt +2 -1
- {datachain-0.2.14 → datachain-0.2.16}/tests/func/test_catalog.py +2 -2
- {datachain-0.2.14 → datachain-0.2.16}/tests/func/test_datachain.py +16 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/lib/test_arrow.py +32 -5
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/lib/test_datachain.py +208 -5
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/lib/test_feature.py +47 -0
- datachain-0.2.14/examples/llm/llm_chatbot_evaluation.ipynb +0 -772
- {datachain-0.2.14 → datachain-0.2.16}/.cruft.json +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/.gitattributes +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/.github/codecov.yaml +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/.github/dependabot.yml +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/.github/workflows/release.yml +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/.github/workflows/tests.yml +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/.gitignore +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/CONTRIBUTING.rst +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/LICENSE +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/README.rst +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/docs/assets/datachain.png +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/docs/assets/flowchart.png +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/docs/references/datachain.md +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/docs/references/datatype.md +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/docs/references/file.md +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/docs/references/index.md +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/docs/references/sql.md +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/docs/references/torch.md +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/computer_vision/blip2_image_desc_lib.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/computer_vision/fashion_product_images/.gitignore +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/computer_vision/fashion_product_images/1-quick-start.ipynb +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/computer_vision/fashion_product_images/2-working-with-image-datachains.ipynb +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/computer_vision/fashion_product_images/3-train-model.ipynb +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/computer_vision/fashion_product_images/4-inference.ipynb +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/computer_vision/fashion_product_images/README.md +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/computer_vision/fashion_product_images/requirements.txt +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/computer_vision/fashion_product_images/scripts/1-quick-start.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/computer_vision/fashion_product_images/scripts/2-basic-operations.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/computer_vision/fashion_product_images/scripts/2-embeddings.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/computer_vision/fashion_product_images/scripts/3-split-train-test.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/computer_vision/fashion_product_images/scripts/3-train-model.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/computer_vision/fashion_product_images/src/clustering.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/computer_vision/fashion_product_images/src/train.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/computer_vision/fashion_product_images/static/images/basic-operations.png +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/computer_vision/fashion_product_images/static/images/core-concepts.png +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/computer_vision/fashion_product_images/static/images/datachain-logo.png +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/computer_vision/fashion_product_images/static/images/datachain-overview.png +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/computer_vision/fashion_product_images/static/images/dataset-1.png +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/computer_vision/fashion_product_images/static/images/dataset-2.png +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/computer_vision/fashion_product_images/static/images/dataset-3.png +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/computer_vision/fashion_product_images/static/images/studio.png +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/llm_and_nlp/unstructured-text.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/multimodal/clip_fine_tuning.ipynb +0 -0
- /datachain-0.2.14/examples/multimodal/clip.py → /datachain-0.2.16/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/mkdocs.yml +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/noxfile.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/pyproject.toml +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/setup.cfg +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/__init__.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/__main__.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/asyn.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/cache.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/catalog/subclass.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/cli_utils.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/client/__init__.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/client/azure.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/client/gcs.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/client/local.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/client/s3.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/config.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/data_storage/id_generator.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/dataset.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/error.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/job.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/lib/clip.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/lib/file.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/lib/image.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/lib/text.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/lib/utils.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/lib/vfile.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/listing.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/node.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/progress.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/py.typed +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/query/__init__.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/query/batch.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/query/builtins.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/query/metrics.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/query/params.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/query/schema.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/query/session.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/query/udf.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/remote/studio.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/sql/utils.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/storage.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain/utils.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain.egg-info/requires.txt +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/__init__.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/conftest.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/data.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/examples/__init__.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/examples/wds_data.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/func/__init__.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/func/test_client.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/func/test_datasets.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/func/test_ls.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/func/test_pull.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/func/test_pytorch.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/func/test_query.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/scripts/feature_class.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/scripts/name_len_normal.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/test_cli_e2e.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/test_query_e2e.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/__init__.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/test_asyn.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/test_cache.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/test_catalog.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/test_client.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/test_dataset.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/test_id_generator.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/test_listing.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/test_metastore.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/test_query_params.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/test_serializer.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/test_session.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/test_storage.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/test_udf.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/test_utils.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.2.14 → datachain-0.2.16}/tests/utils.py +0 -0
|
@@ -24,7 +24,7 @@ repos:
|
|
|
24
24
|
- id: trailing-whitespace
|
|
25
25
|
exclude: '^LICENSES/'
|
|
26
26
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
27
|
-
rev: 'v0.5.
|
|
27
|
+
rev: 'v0.5.5'
|
|
28
28
|
hooks:
|
|
29
29
|
- id: ruff
|
|
30
30
|
args: [--fix, --exit-non-zero-on-fix]
|
|
@@ -35,7 +35,7 @@ repos:
|
|
|
35
35
|
- id: codespell
|
|
36
36
|
additional_dependencies: ["tomli"]
|
|
37
37
|
- repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks
|
|
38
|
-
rev: v2.
|
|
38
|
+
rev: v2.14.0
|
|
39
39
|
hooks:
|
|
40
40
|
- id: pretty-format-toml
|
|
41
41
|
args: [--autofix, --no-sort]
|
|
@@ -278,10 +278,9 @@ images_with_dogs.select("annotations", "file.name").show()
|
|
|
278
278
|
|
|
279
279
|
[Limited by 20 rows]
|
|
280
280
|
```
|
|
281
|
+
For in-depth review of working with JSON metadata, please follow this tutorial:
|
|
281
282
|
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
[https://github.com/iterative/datachain/blob/main/examples/getting_started/datasets.ipynb](https://github.com/iterative/datachain/blob/main/examples/getting_started/datasets.ipynb)
|
|
283
|
+
[https://github.com/iterative/datachain/blob/main/examples/get_started/json-metadata-tutorial.ipynb](https://github.com/iterative/datachain/blob/main/examples/get_started/json-metadata-tutorial.ipynb)
|
|
285
284
|
|
|
286
285
|
### Passing data to training
|
|
287
286
|
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# pip install defusedxml
|
|
1
2
|
import json
|
|
2
3
|
|
|
3
4
|
from PIL import (
|
|
@@ -63,7 +64,7 @@ if __name__ == "__main__":
|
|
|
63
64
|
DataChain.from_storage(source, type="image")
|
|
64
65
|
.settings(parallel=-1)
|
|
65
66
|
.filter(C("file.name").glob("*.jpg"))
|
|
66
|
-
.limit(
|
|
67
|
+
.limit(5000)
|
|
67
68
|
.map(
|
|
68
69
|
image_description,
|
|
69
70
|
params=["file"],
|
|
@@ -35,8 +35,8 @@ def main():
|
|
|
35
35
|
print("Dynamic JSONl schema from 2 objects")
|
|
36
36
|
print("========================================================================")
|
|
37
37
|
uri = "gs://datachain-demo/jsonl/object.jsonl"
|
|
38
|
-
jsonl_ds = DataChain.from_json(uri, meta_type="jsonl",
|
|
39
|
-
|
|
38
|
+
jsonl_ds = DataChain.from_json(uri, meta_type="jsonl", print_schema=True)
|
|
39
|
+
jsonl_ds.show()
|
|
40
40
|
|
|
41
41
|
print()
|
|
42
42
|
print("========================================================================")
|
|
@@ -49,8 +49,7 @@ def main():
|
|
|
49
49
|
json_pairs_ds = DataChain.from_json(
|
|
50
50
|
uri, schema_from=schema_uri, jmespath="@", model_name="OpenImage"
|
|
51
51
|
)
|
|
52
|
-
|
|
53
|
-
# print(list(json_pairs_ds.collect())[0])
|
|
52
|
+
json_pairs_ds.show()
|
|
54
53
|
|
|
55
54
|
uri = "gs://datachain-demo/coco2017/annotations_captions/"
|
|
56
55
|
|
|
@@ -61,7 +60,7 @@ def main():
|
|
|
61
60
|
chain = (
|
|
62
61
|
DataChain.from_storage(uri)
|
|
63
62
|
.filter(C("file.name").glob("*.json"))
|
|
64
|
-
.
|
|
63
|
+
.print_json_schema(jmespath="@", model_name="Coco")
|
|
65
64
|
)
|
|
66
65
|
chain.save()
|
|
67
66
|
|
|
@@ -72,13 +71,13 @@ def main():
|
|
|
72
71
|
static_json_ds = DataChain.from_json(
|
|
73
72
|
uri, jmespath="licenses", spec=LicenseFeature, nrows=3
|
|
74
73
|
)
|
|
75
|
-
|
|
74
|
+
static_json_ds.show()
|
|
76
75
|
|
|
77
76
|
print()
|
|
78
77
|
print("========================================================================")
|
|
79
78
|
print("dynamic JSON schema test parsing 5K objects")
|
|
80
79
|
print("========================================================================")
|
|
81
|
-
dynamic_json_ds = DataChain.from_json(uri, jmespath="images",
|
|
80
|
+
dynamic_json_ds = DataChain.from_json(uri, jmespath="images", print_schema=True)
|
|
82
81
|
print(dynamic_json_ds.to_pandas())
|
|
83
82
|
|
|
84
83
|
uri = "gs://datachain-demo/chatbot-csv/"
|
|
@@ -88,16 +87,7 @@ def main():
|
|
|
88
87
|
print("========================================================================")
|
|
89
88
|
static_csv_ds = DataChain.from_csv(uri, output=ChatDialog, object_name="chat")
|
|
90
89
|
static_csv_ds.print_schema()
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
uri = "gs://datachain-demo/laion-aesthetics-csv"
|
|
94
|
-
print()
|
|
95
|
-
print("========================================================================")
|
|
96
|
-
print("dynamic CSV with header schema test parsing 3/3M objects")
|
|
97
|
-
print("========================================================================")
|
|
98
|
-
dynamic_csv_ds = DataChain.from_csv(uri, object_name="laion", nrows=3)
|
|
99
|
-
dynamic_csv_ds.print_schema()
|
|
100
|
-
print(dynamic_csv_ds.to_pandas())
|
|
90
|
+
static_csv_ds.show()
|
|
101
91
|
|
|
102
92
|
|
|
103
93
|
if __name__ == "__main__":
|