datachain 0.30.4__tar.gz → 0.30.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.30.4 → datachain-0.30.5}/PKG-INFO +3 -3
- datachain-0.30.5/examples/get_started/nested_datamodel.py +70 -0
- {datachain-0.30.4 → datachain-0.30.5}/pyproject.toml +2 -2
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/data_storage/warehouse.py +2 -2
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/arrow.py +2 -2
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/dc/datachain.py +9 -7
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/model_store.py +12 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/query/dispatch.py +5 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/sqlite/base.py +12 -11
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/sqlite/types.py +8 -13
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/types.py +3 -3
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/utils.py +1 -1
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain.egg-info/PKG-INFO +3 -3
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain.egg-info/SOURCES.txt +1 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain.egg-info/requires.txt +2 -2
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/functions/test_array.py +82 -4
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_data_storage.py +2 -2
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_datachain.py +29 -0
- datachain-0.30.5/tests/unit/sql/sqlite/test_types.py +40 -0
- datachain-0.30.4/tests/unit/sql/sqlite/test_types.py +0 -19
- {datachain-0.30.4 → datachain-0.30.5}/.cruft.json +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/.gitattributes +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/.github/codecov.yaml +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/.github/dependabot.yml +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/.github/workflows/release.yml +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/.github/workflows/tests.yml +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/.gitignore +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/.pre-commit-config.yaml +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/LICENSE +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/README.rst +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/assets/datachain.svg +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/commands/auth/login.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/commands/auth/logout.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/commands/auth/team.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/commands/auth/token.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/commands/index.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/commands/job/cancel.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/commands/job/clusters.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/commands/job/logs.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/commands/job/ls.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/commands/job/run.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/contributing.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/examples.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/guide/db_migrations.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/guide/delta.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/guide/env.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/guide/index.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/guide/namespaces.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/guide/processing.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/guide/remotes.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/guide/retry.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/index.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/overrides/main.html +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/quick-start.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/references/data-types/file.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/references/data-types/index.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/references/data-types/pose.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/references/data-types/segment.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/references/datachain.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/references/func.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/references/functions/aggregate.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/references/functions/array.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/references/functions/conditional.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/references/functions/numeric.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/references/functions/path.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/references/functions/random.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/references/functions/string.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/references/functions/window.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/references/index.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/references/toolkit.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/references/torch.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/references/udf.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/docs/tutorials.md +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/examples/incremental_processing/delta.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/examples/incremental_processing/retry.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/examples/incremental_processing/utils.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/examples/multimodal/audio-to-text.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/examples/multimodal/wds.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/mkdocs.yml +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/noxfile.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/setup.cfg +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/__init__.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/__main__.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/asyn.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/cache.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/catalog/catalog.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/cli/utils.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/client/__init__.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/client/azure.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/client/gcs.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/client/hf.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/client/local.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/client/s3.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/config.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/dataset.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/delta.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/error.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/fs/reference.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/fs/utils.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/func/__init__.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/func/array.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/func/base.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/func/conditional.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/func/func.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/func/numeric.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/func/path.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/func/random.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/func/string.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/func/window.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/job.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/audio.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/clip.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/dc/__init__.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/dc/database.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/dc/datasets.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/dc/records.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/dc/storage.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/file.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/hf.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/image.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/listing.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/namespaces.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/projects.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/settings.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/tar.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/text.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/udf.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/utils.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/video.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/listing.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/model/__init__.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/model/bbox.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/model/pose.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/model/segment.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/model/utils.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/namespace.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/node.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/progress.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/project.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/py.typed +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/query/__init__.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/query/batch.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/query/dataset.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/query/metrics.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/query/params.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/query/queue.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/query/schema.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/query/session.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/query/udf.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/query/utils.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/remote/studio.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/script_meta.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/semver.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/postgresql_dialect.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/postgresql_types.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/sql/utils.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/studio.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/telemetry.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/__init__.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/conftest.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/data.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/examples/__init__.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/examples/test_examples.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/examples/wds_data.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/__init__.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/data/lena.jpg +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/functions/__init__.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/functions/test_aggregate.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/functions/test_conditional.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/functions/test_numeric.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/functions/test_path.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/functions/test_random.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/functions/test_string.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/model/__init__.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_audio.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_batching.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_catalog.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_client.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_datachain.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_datasets.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_delta.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_file.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_hf.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_image.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_listing.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_ls.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_metastore.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_metrics.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_mutate.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_pull.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_pytorch.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_query.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_read_database.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_read_dataset_remote.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_read_dataset_version_specifiers.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_retry.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_session.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_studio_datetime_parsing.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_to_database.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_toolkit.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_video.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/func/test_warehouse.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/scripts/feature_class.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/test_atomicity.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/test_cli_e2e.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/test_cli_studio.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/test_import_time.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/test_query_e2e.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/test_telemetry.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/__init__.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_audio.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_namespace.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_partition_by.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_project.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_settings.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_udf.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/model/__init__.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_asyn.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_cache.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_catalog.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_client.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_config.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_dataset.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_func.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_listing.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_metastore.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_query.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_query_params.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_semver.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_serializer.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_session.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_utils.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.30.4 → datachain-0.30.5}/tests/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.30.
|
|
3
|
+
Version: 0.30.5
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -22,6 +22,7 @@ Requires-Dist: tomlkit
|
|
|
22
22
|
Requires-Dist: tqdm
|
|
23
23
|
Requires-Dist: numpy<3,>=1
|
|
24
24
|
Requires-Dist: pandas>=2.0.0
|
|
25
|
+
Requires-Dist: ujson>=5.10.0
|
|
25
26
|
Requires-Dist: packaging
|
|
26
27
|
Requires-Dist: pyarrow
|
|
27
28
|
Requires-Dist: typing-extensions
|
|
@@ -38,7 +39,6 @@ Requires-Dist: shtab<2,>=1.3.4
|
|
|
38
39
|
Requires-Dist: sqlalchemy>=2
|
|
39
40
|
Requires-Dist: multiprocess==0.70.16
|
|
40
41
|
Requires-Dist: cloudpickle
|
|
41
|
-
Requires-Dist: orjson>=3.10.5
|
|
42
42
|
Requires-Dist: pydantic
|
|
43
43
|
Requires-Dist: jmespath>=1.0
|
|
44
44
|
Requires-Dist: datamodel-code-generator>=0.25
|
|
@@ -92,7 +92,7 @@ Requires-Dist: pytest-mock>=3.12.0; extra == "tests"
|
|
|
92
92
|
Requires-Dist: pytest-servers[all]>=0.5.9; extra == "tests"
|
|
93
93
|
Requires-Dist: pytest-benchmark[histogram]; extra == "tests"
|
|
94
94
|
Requires-Dist: pytest-xdist>=3.3.1; extra == "tests"
|
|
95
|
-
Requires-Dist: pytest-
|
|
95
|
+
Requires-Dist: pytest-dotenv; extra == "tests"
|
|
96
96
|
Requires-Dist: virtualenv; extra == "tests"
|
|
97
97
|
Requires-Dist: dulwich; extra == "tests"
|
|
98
98
|
Requires-Dist: hypothesis; extra == "tests"
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""Example: Nested DataModels with parallel execution.
|
|
2
|
+
|
|
3
|
+
Demonstrates mapping a function that returns a nested DataModel (a DataModel
|
|
4
|
+
containing other DataModels).
|
|
5
|
+
|
|
6
|
+
The example keeps things minimal: we persist a tiny dataset, run a parallel map
|
|
7
|
+
that returns a nested DataModel, and display the result.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from typing import Optional
|
|
11
|
+
|
|
12
|
+
from pydantic import Field
|
|
13
|
+
|
|
14
|
+
import datachain as dc
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Metric(dc.DataModel):
|
|
18
|
+
"""Represents a single computed metric with quality metadata."""
|
|
19
|
+
|
|
20
|
+
value: Optional[float] = Field(default=None, description="Computed metric value")
|
|
21
|
+
confidence: Optional[float] = Field(
|
|
22
|
+
default=None, description="Confidence / quality score"
|
|
23
|
+
)
|
|
24
|
+
status: Optional[str] = Field(default=None, description="Processing status label")
|
|
25
|
+
metric_error: Optional[str] = Field(
|
|
26
|
+
default=None, description="Error message if metric computation failed"
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class SampleMetrics(dc.DataModel):
|
|
31
|
+
"""Container for two illustrative nested metrics.
|
|
32
|
+
|
|
33
|
+
Each sub-field is its own DataModel instance to demonstrate nested schemas
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
metric_primary: Metric = Field(
|
|
37
|
+
default_factory=lambda: Metric(), description="Primary metric"
|
|
38
|
+
)
|
|
39
|
+
metric_secondary: Metric = Field(
|
|
40
|
+
default_factory=lambda: Metric(), description="Secondary metric"
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def generate_sample_metrics() -> SampleMetrics:
|
|
45
|
+
"""Synthesize a pair of metrics.
|
|
46
|
+
|
|
47
|
+
In real scenarios you'd compute these values; here we just return constants
|
|
48
|
+
to keep the example deterministic.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
return SampleMetrics(
|
|
52
|
+
metric_primary=Metric(value=50.0, confidence=0.95, status="ok"),
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def main():
|
|
57
|
+
(
|
|
58
|
+
dc.read_values(record_id=[1, 2])
|
|
59
|
+
.settings(parallel=2) # Keep it parallel to test serialization
|
|
60
|
+
.map(metrics=generate_sample_metrics)
|
|
61
|
+
.save("nested_datamodel")
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
dc.read_dataset("nested_datamodel").show()
|
|
65
|
+
|
|
66
|
+
print(dc.read_dataset("nested_datamodel").to_values("metrics"))
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
if __name__ == "__main__":
|
|
70
|
+
main()
|
|
@@ -26,6 +26,7 @@ dependencies = [
|
|
|
26
26
|
"tqdm",
|
|
27
27
|
"numpy>=1,<3",
|
|
28
28
|
"pandas>=2.0.0",
|
|
29
|
+
"ujson>=5.10.0",
|
|
29
30
|
"packaging",
|
|
30
31
|
"pyarrow",
|
|
31
32
|
"typing-extensions",
|
|
@@ -42,7 +43,6 @@ dependencies = [
|
|
|
42
43
|
"sqlalchemy>=2",
|
|
43
44
|
"multiprocess==0.70.16",
|
|
44
45
|
"cloudpickle",
|
|
45
|
-
"orjson>=3.10.5",
|
|
46
46
|
"pydantic",
|
|
47
47
|
"jmespath>=1.0",
|
|
48
48
|
"datamodel-code-generator>=0.25",
|
|
@@ -108,7 +108,7 @@ tests = [
|
|
|
108
108
|
"pytest-servers[all]>=0.5.9",
|
|
109
109
|
"pytest-benchmark[histogram]",
|
|
110
110
|
"pytest-xdist>=3.3.1",
|
|
111
|
-
"pytest-
|
|
111
|
+
"pytest-dotenv",
|
|
112
112
|
"virtualenv",
|
|
113
113
|
"dulwich",
|
|
114
114
|
"hypothesis",
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import glob
|
|
2
|
-
import json
|
|
3
2
|
import logging
|
|
4
3
|
import posixpath
|
|
5
4
|
import random
|
|
@@ -11,6 +10,7 @@ from urllib.parse import urlparse
|
|
|
11
10
|
|
|
12
11
|
import attrs
|
|
13
12
|
import sqlalchemy as sa
|
|
13
|
+
import ujson as json
|
|
14
14
|
from sqlalchemy.sql.expression import true
|
|
15
15
|
|
|
16
16
|
from datachain.client import Client
|
|
@@ -122,7 +122,7 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
122
122
|
if value_type is str:
|
|
123
123
|
return val
|
|
124
124
|
if value_type in (dict, list):
|
|
125
|
-
return json.dumps(val)
|
|
125
|
+
return json.dumps(val, ensure_ascii=False)
|
|
126
126
|
raise ValueError(
|
|
127
127
|
f"Cannot convert value {val!r} with type {value_type} to JSON"
|
|
128
128
|
)
|
|
@@ -2,8 +2,8 @@ from collections.abc import Sequence
|
|
|
2
2
|
from itertools import islice
|
|
3
3
|
from typing import TYPE_CHECKING, Any, Optional
|
|
4
4
|
|
|
5
|
-
import orjson
|
|
6
5
|
import pyarrow as pa
|
|
6
|
+
import ujson as json
|
|
7
7
|
from pyarrow._csv import ParseOptions
|
|
8
8
|
from pyarrow.dataset import CsvFileFormat, dataset
|
|
9
9
|
from tqdm.auto import tqdm
|
|
@@ -269,7 +269,7 @@ def _get_hf_schema(
|
|
|
269
269
|
def _get_datachain_schema(schema: "pa.Schema") -> Optional[SignalSchema]:
|
|
270
270
|
"""Return a restored SignalSchema from parquet metadata, if any is found."""
|
|
271
271
|
if schema.metadata and DATACHAIN_SIGNAL_SCHEMA_PARQUET_KEY in schema.metadata:
|
|
272
|
-
serialized_signal_schema =
|
|
272
|
+
serialized_signal_schema = json.loads(
|
|
273
273
|
schema.metadata[DATACHAIN_SIGNAL_SCHEMA_PARQUET_KEY]
|
|
274
274
|
)
|
|
275
275
|
return SignalSchema.deserialize(serialized_signal_schema)
|
|
@@ -19,8 +19,8 @@ from typing import (
|
|
|
19
19
|
overload,
|
|
20
20
|
)
|
|
21
21
|
|
|
22
|
-
import orjson
|
|
23
22
|
import sqlalchemy
|
|
23
|
+
import ujson as json
|
|
24
24
|
from pydantic import BaseModel
|
|
25
25
|
from sqlalchemy.sql.elements import ColumnElement
|
|
26
26
|
from tqdm import tqdm
|
|
@@ -462,8 +462,6 @@ class DataChain:
|
|
|
462
462
|
Returns:
|
|
463
463
|
DataChain: A new DataChain instance with the new set of columns.
|
|
464
464
|
"""
|
|
465
|
-
import json
|
|
466
|
-
|
|
467
465
|
import pyarrow as pa
|
|
468
466
|
|
|
469
467
|
from datachain.lib.arrow import schema_to_output
|
|
@@ -2129,9 +2127,9 @@ class DataChain:
|
|
|
2129
2127
|
fsspec_fs = client.create_fs(**fs_kwargs)
|
|
2130
2128
|
|
|
2131
2129
|
_partition_cols = list(partition_cols) if partition_cols else None
|
|
2132
|
-
signal_schema_metadata =
|
|
2133
|
-
self._effective_signals_schema.serialize()
|
|
2134
|
-
)
|
|
2130
|
+
signal_schema_metadata = json.dumps(
|
|
2131
|
+
self._effective_signals_schema.serialize(), ensure_ascii=False
|
|
2132
|
+
).encode("utf-8")
|
|
2135
2133
|
|
|
2136
2134
|
column_names, column_chunks = self.to_columnar_data_with_names(chunk_size)
|
|
2137
2135
|
|
|
@@ -2278,7 +2276,11 @@ class DataChain:
|
|
|
2278
2276
|
f.write(b"\n")
|
|
2279
2277
|
else:
|
|
2280
2278
|
is_first = False
|
|
2281
|
-
f.write(
|
|
2279
|
+
f.write(
|
|
2280
|
+
json.dumps(
|
|
2281
|
+
row_to_nested_dict(headers, row), ensure_ascii=False
|
|
2282
|
+
).encode("utf-8")
|
|
2283
|
+
)
|
|
2282
2284
|
if include_outer_list:
|
|
2283
2285
|
# This makes the file JSON instead of JSON lines.
|
|
2284
2286
|
f.write(b"\n]\n")
|
|
@@ -89,3 +89,15 @@ class ModelStore:
|
|
|
89
89
|
and ModelStore.is_pydantic(parent_type)
|
|
90
90
|
and "@" in ModelStore.get_name(parent_type)
|
|
91
91
|
)
|
|
92
|
+
|
|
93
|
+
@classmethod
|
|
94
|
+
def rebuild_all(cls) -> None:
|
|
95
|
+
"""Ensure pydantic schemas are (re)built for all registered models.
|
|
96
|
+
|
|
97
|
+
Uses ``force=True`` to avoid subtle cases where a deserialized class
|
|
98
|
+
(e.g. from by-value cloudpickle in workers) reports built state but
|
|
99
|
+
nested model field schemas aren't fully resolved yet.
|
|
100
|
+
"""
|
|
101
|
+
for versions in cls.store.values():
|
|
102
|
+
for model in versions.values():
|
|
103
|
+
model.model_rebuild(force=True)
|
|
@@ -13,6 +13,7 @@ from multiprocess import get_context
|
|
|
13
13
|
from datachain.catalog import Catalog
|
|
14
14
|
from datachain.catalog.catalog import clone_catalog_with_cache
|
|
15
15
|
from datachain.catalog.loader import DISTRIBUTED_IMPORT_PATH, get_udf_distributor_class
|
|
16
|
+
from datachain.lib.model_store import ModelStore
|
|
16
17
|
from datachain.lib.udf import _get_cache
|
|
17
18
|
from datachain.query.dataset import (
|
|
18
19
|
get_download_callback,
|
|
@@ -130,6 +131,8 @@ class UDFDispatcher:
|
|
|
130
131
|
|
|
131
132
|
def _create_worker(self) -> "UDFWorker":
|
|
132
133
|
udf: UDFAdapter = loads(self.udf_data)
|
|
134
|
+
# Ensure all registered DataModels have rebuilt schemas in worker processes.
|
|
135
|
+
ModelStore.rebuild_all()
|
|
133
136
|
return UDFWorker(
|
|
134
137
|
self.catalog,
|
|
135
138
|
udf,
|
|
@@ -196,6 +199,8 @@ class UDFDispatcher:
|
|
|
196
199
|
generated_cb: Callback = DEFAULT_CALLBACK,
|
|
197
200
|
) -> None:
|
|
198
201
|
udf: UDFAdapter = loads(self.udf_data)
|
|
202
|
+
# Rebuild schemas in single process too for consistency (cheap, idempotent).
|
|
203
|
+
ModelStore.rebuild_all()
|
|
199
204
|
|
|
200
205
|
if ids_only and not self.is_batching:
|
|
201
206
|
input_rows = flatten(input_rows)
|
|
@@ -8,8 +8,8 @@ from functools import cache
|
|
|
8
8
|
from types import MappingProxyType
|
|
9
9
|
from typing import Callable, Optional
|
|
10
10
|
|
|
11
|
-
import orjson
|
|
12
11
|
import sqlalchemy as sa
|
|
12
|
+
import ujson as json
|
|
13
13
|
from sqlalchemy.dialects import sqlite
|
|
14
14
|
from sqlalchemy.ext.compiler import compiles
|
|
15
15
|
from sqlalchemy.sql.elements import literal
|
|
@@ -182,7 +182,7 @@ def missing_vector_function(name, exc):
|
|
|
182
182
|
|
|
183
183
|
|
|
184
184
|
def sqlite_string_split(string: str, sep: str, maxsplit: int = -1) -> str:
|
|
185
|
-
return
|
|
185
|
+
return json.dumps(string.split(sep, maxsplit), ensure_ascii=False)
|
|
186
186
|
|
|
187
187
|
|
|
188
188
|
def sqlite_int_hash_64(x: int) -> int:
|
|
@@ -453,17 +453,17 @@ def compile_byte_hamming_distance(element, compiler, **kwargs):
|
|
|
453
453
|
|
|
454
454
|
|
|
455
455
|
def py_json_array_length(arr):
|
|
456
|
-
return len(
|
|
456
|
+
return len(json.loads(arr))
|
|
457
457
|
|
|
458
458
|
|
|
459
459
|
def py_json_array_contains(arr, value, is_json):
|
|
460
460
|
if is_json:
|
|
461
|
-
value =
|
|
462
|
-
return value in
|
|
461
|
+
value = json.loads(value)
|
|
462
|
+
return value in json.loads(arr)
|
|
463
463
|
|
|
464
464
|
|
|
465
465
|
def py_json_array_get_element(val, idx):
|
|
466
|
-
arr =
|
|
466
|
+
arr = json.loads(val)
|
|
467
467
|
try:
|
|
468
468
|
return arr[idx]
|
|
469
469
|
except IndexError:
|
|
@@ -471,17 +471,18 @@ def py_json_array_get_element(val, idx):
|
|
|
471
471
|
|
|
472
472
|
|
|
473
473
|
def py_json_array_slice(val, offset: int, length: Optional[int] = None):
|
|
474
|
-
arr =
|
|
474
|
+
arr = json.loads(val)
|
|
475
475
|
try:
|
|
476
|
-
return
|
|
477
|
-
list(arr[offset : offset + length] if length is not None else arr[offset:])
|
|
478
|
-
|
|
476
|
+
return json.dumps(
|
|
477
|
+
list(arr[offset : offset + length] if length is not None else arr[offset:]),
|
|
478
|
+
ensure_ascii=False,
|
|
479
|
+
)
|
|
479
480
|
except IndexError:
|
|
480
481
|
return None
|
|
481
482
|
|
|
482
483
|
|
|
483
484
|
def py_json_array_join(val, sep: str):
|
|
484
|
-
return sep.join(
|
|
485
|
+
return sep.join(json.loads(val))
|
|
485
486
|
|
|
486
487
|
|
|
487
488
|
def compile_array_get_element(element, compiler, **kwargs):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import sqlite3
|
|
2
2
|
|
|
3
|
-
import
|
|
3
|
+
import ujson as json
|
|
4
4
|
from sqlalchemy import types
|
|
5
5
|
|
|
6
6
|
from datachain.sql.types import TypeConverter, TypeReadConverter
|
|
@@ -28,26 +28,21 @@ class Array(types.UserDefinedType):
|
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
def adapt_array(arr):
|
|
31
|
-
return
|
|
31
|
+
return json.dumps(arr, ensure_ascii=False)
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
def adapt_dict(dct):
|
|
35
|
-
return
|
|
35
|
+
return json.dumps(dct, ensure_ascii=False)
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
def convert_array(arr):
|
|
39
|
-
return
|
|
39
|
+
return json.loads(arr)
|
|
40
40
|
|
|
41
41
|
|
|
42
42
|
def adapt_np_array(arr):
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
return obj
|
|
47
|
-
|
|
48
|
-
return orjson.dumps(
|
|
49
|
-
arr, option=orjson.OPT_SERIALIZE_NUMPY, default=_json_serialize
|
|
50
|
-
).decode("utf-8")
|
|
43
|
+
# Primarily needed for UDF numpy results (e.g. WDS)
|
|
44
|
+
# tolist() gives nested Python lists + native scalars; ujson.dumps handles NaN/Inf.
|
|
45
|
+
return json.dumps(arr.tolist(), ensure_ascii=False)
|
|
51
46
|
|
|
52
47
|
|
|
53
48
|
def adapt_np_generic(val):
|
|
@@ -74,5 +69,5 @@ class SQLiteTypeConverter(TypeConverter):
|
|
|
74
69
|
class SQLiteTypeReadConverter(TypeReadConverter):
|
|
75
70
|
def array(self, value, item_type, dialect):
|
|
76
71
|
if isinstance(value, str):
|
|
77
|
-
value =
|
|
72
|
+
value = json.loads(value)
|
|
78
73
|
return super().array(value, item_type, dialect)
|
|
@@ -16,8 +16,8 @@ from datetime import datetime
|
|
|
16
16
|
from types import MappingProxyType
|
|
17
17
|
from typing import Any, Union
|
|
18
18
|
|
|
19
|
-
import orjson
|
|
20
19
|
import sqlalchemy as sa
|
|
20
|
+
import ujson as jsonlib
|
|
21
21
|
from sqlalchemy import TypeDecorator, types
|
|
22
22
|
|
|
23
23
|
from datachain.lib.data_model import StandardType
|
|
@@ -352,7 +352,7 @@ class Array(SQLType):
|
|
|
352
352
|
def on_read_convert(self, value, dialect):
|
|
353
353
|
r = read_converter(dialect).array(value, self.item_type, dialect)
|
|
354
354
|
if isinstance(self.item_type, JSON):
|
|
355
|
-
r = [
|
|
355
|
+
r = [jsonlib.loads(item) if isinstance(item, str) else item for item in r]
|
|
356
356
|
return r
|
|
357
357
|
|
|
358
358
|
|
|
@@ -466,7 +466,7 @@ class TypeReadConverter:
|
|
|
466
466
|
if isinstance(value, str):
|
|
467
467
|
if value == "":
|
|
468
468
|
return {}
|
|
469
|
-
return
|
|
469
|
+
return jsonlib.loads(value)
|
|
470
470
|
return value
|
|
471
471
|
|
|
472
472
|
def datetime(self, value):
|
|
@@ -417,7 +417,7 @@ class JSONSerialize(json.JSONEncoder):
|
|
|
417
417
|
|
|
418
418
|
def inside_colab() -> bool:
|
|
419
419
|
try:
|
|
420
|
-
from google import colab # noqa: F401
|
|
420
|
+
from google import colab # type: ignore[attr-defined] # noqa: F401
|
|
421
421
|
except ImportError:
|
|
422
422
|
return False
|
|
423
423
|
return True
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.30.
|
|
3
|
+
Version: 0.30.5
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -22,6 +22,7 @@ Requires-Dist: tomlkit
|
|
|
22
22
|
Requires-Dist: tqdm
|
|
23
23
|
Requires-Dist: numpy<3,>=1
|
|
24
24
|
Requires-Dist: pandas>=2.0.0
|
|
25
|
+
Requires-Dist: ujson>=5.10.0
|
|
25
26
|
Requires-Dist: packaging
|
|
26
27
|
Requires-Dist: pyarrow
|
|
27
28
|
Requires-Dist: typing-extensions
|
|
@@ -38,7 +39,6 @@ Requires-Dist: shtab<2,>=1.3.4
|
|
|
38
39
|
Requires-Dist: sqlalchemy>=2
|
|
39
40
|
Requires-Dist: multiprocess==0.70.16
|
|
40
41
|
Requires-Dist: cloudpickle
|
|
41
|
-
Requires-Dist: orjson>=3.10.5
|
|
42
42
|
Requires-Dist: pydantic
|
|
43
43
|
Requires-Dist: jmespath>=1.0
|
|
44
44
|
Requires-Dist: datamodel-code-generator>=0.25
|
|
@@ -92,7 +92,7 @@ Requires-Dist: pytest-mock>=3.12.0; extra == "tests"
|
|
|
92
92
|
Requires-Dist: pytest-servers[all]>=0.5.9; extra == "tests"
|
|
93
93
|
Requires-Dist: pytest-benchmark[histogram]; extra == "tests"
|
|
94
94
|
Requires-Dist: pytest-xdist>=3.3.1; extra == "tests"
|
|
95
|
-
Requires-Dist: pytest-
|
|
95
|
+
Requires-Dist: pytest-dotenv; extra == "tests"
|
|
96
96
|
Requires-Dist: virtualenv; extra == "tests"
|
|
97
97
|
Requires-Dist: dulwich; extra == "tests"
|
|
98
98
|
Requires-Dist: hypothesis; extra == "tests"
|
|
@@ -78,6 +78,7 @@ examples/computer_vision/ultralytics-pose.py
|
|
|
78
78
|
examples/computer_vision/ultralytics-segment.py
|
|
79
79
|
examples/get_started/common_sql_functions.py
|
|
80
80
|
examples/get_started/json-csv-reader.py
|
|
81
|
+
examples/get_started/nested_datamodel.py
|
|
81
82
|
examples/get_started/torch-loader.py
|
|
82
83
|
examples/get_started/udfs/parallel.py
|
|
83
84
|
examples/get_started/udfs/simple.py
|
|
@@ -3,6 +3,7 @@ tomlkit
|
|
|
3
3
|
tqdm
|
|
4
4
|
numpy<3,>=1
|
|
5
5
|
pandas>=2.0.0
|
|
6
|
+
ujson>=5.10.0
|
|
6
7
|
packaging
|
|
7
8
|
pyarrow
|
|
8
9
|
typing-extensions
|
|
@@ -19,7 +20,6 @@ shtab<2,>=1.3.4
|
|
|
19
20
|
sqlalchemy>=2
|
|
20
21
|
multiprocess==0.70.16
|
|
21
22
|
cloudpickle
|
|
22
|
-
orjson>=3.10.5
|
|
23
23
|
pydantic
|
|
24
24
|
jmespath>=1.0
|
|
25
25
|
datamodel-code-generator>=0.25
|
|
@@ -92,7 +92,7 @@ pytest-mock>=3.12.0
|
|
|
92
92
|
pytest-servers[all]>=0.5.9
|
|
93
93
|
pytest-benchmark[histogram]
|
|
94
94
|
pytest-xdist>=3.3.1
|
|
95
|
-
pytest-
|
|
95
|
+
pytest-dotenv
|
|
96
96
|
virtualenv
|
|
97
97
|
dulwich
|
|
98
98
|
hypothesis
|
|
@@ -356,26 +356,32 @@ def test_array_contains(test_session):
|
|
|
356
356
|
|
|
357
357
|
ds = list(
|
|
358
358
|
dc.read_values(
|
|
359
|
-
id=(1, 2, 3),
|
|
359
|
+
id=(1, 2, 3, 4),
|
|
360
360
|
arr=(
|
|
361
361
|
Arr(i=[10, 20, 30], f=[1.0, 2.0, 3.0], s=["a", "b", "c"]),
|
|
362
362
|
Arr(i=[40, 50, 60], f=[4.0, 5.0, 6.0], s=["d", "e", "f"]),
|
|
363
363
|
Arr(i=[50], f=[5.0], s=["g"]),
|
|
364
|
+
# New row with NaN/Inf values for testing
|
|
365
|
+
Arr(i=[100], f=[float("nan"), float("inf"), float("-inf")], s=["h"]),
|
|
364
366
|
),
|
|
365
367
|
ii=(
|
|
366
368
|
[20, 30, 50, 80],
|
|
367
369
|
[10],
|
|
368
370
|
[],
|
|
371
|
+
[200],
|
|
369
372
|
),
|
|
370
373
|
ff=(
|
|
371
374
|
[2.0, 3.0, 5.0, 7.0],
|
|
372
375
|
[4.0],
|
|
373
376
|
[],
|
|
377
|
+
# Test array with special float values
|
|
378
|
+
[float("inf"), float("-inf"), 1.5],
|
|
374
379
|
),
|
|
375
380
|
ss=(
|
|
376
381
|
["b", "c", "e", "f"],
|
|
377
382
|
["d"],
|
|
378
383
|
[],
|
|
384
|
+
["i"],
|
|
379
385
|
),
|
|
380
386
|
session=test_session,
|
|
381
387
|
)
|
|
@@ -395,6 +401,14 @@ def test_array_contains(test_session):
|
|
|
395
401
|
t13=func.array.contains([1, 2, 3, 4, 5], 3),
|
|
396
402
|
t14=func.array.contains([1, 2, 3, 4, 5], 7),
|
|
397
403
|
t15=func.array.contains([], 1),
|
|
404
|
+
# Test NaN/Inf handling with contains
|
|
405
|
+
t16=func.array.contains("arr.f", float("inf")), # Should find inf in row 4
|
|
406
|
+
# Should find -inf in row 4
|
|
407
|
+
t17=func.array.contains("arr.f", float("-inf")),
|
|
408
|
+
# Should NOT find nan (NaN != NaN)
|
|
409
|
+
t18=func.array.contains("arr.f", float("nan")),
|
|
410
|
+
t19=func.array.contains("ff", float("inf")), # Should find inf in row 4
|
|
411
|
+
t20=func.array.contains("ff", float("-inf")), # Should find -inf in row 4
|
|
398
412
|
)
|
|
399
413
|
.order_by("id")
|
|
400
414
|
.to_list(
|
|
@@ -413,11 +427,75 @@ def test_array_contains(test_session):
|
|
|
413
427
|
"t13",
|
|
414
428
|
"t14",
|
|
415
429
|
"t15",
|
|
430
|
+
"t16",
|
|
431
|
+
"t17",
|
|
432
|
+
"t18",
|
|
433
|
+
"t19",
|
|
434
|
+
"t20",
|
|
416
435
|
)
|
|
417
436
|
)
|
|
418
437
|
|
|
419
438
|
assert ds == [
|
|
420
|
-
|
|
421
|
-
(
|
|
422
|
-
|
|
439
|
+
# Row 1: Regular values
|
|
440
|
+
(1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0),
|
|
441
|
+
# Row 2: Regular values
|
|
442
|
+
(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0),
|
|
443
|
+
# Row 3: Regular values
|
|
444
|
+
(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0),
|
|
445
|
+
# Row 4: Contains NaN/Inf values - inf/-inf should be found, NaN should not
|
|
446
|
+
(0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1),
|
|
423
447
|
]
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
def test_array_functions_with_nan_inf(test_session):
|
|
451
|
+
class ArrWithSpecial(dc.DataModel):
|
|
452
|
+
f: list[float] # Will contain NaN and Infinity values
|
|
453
|
+
|
|
454
|
+
ds = list(
|
|
455
|
+
dc.read_values(
|
|
456
|
+
id=(1, 2, 3),
|
|
457
|
+
arr=(
|
|
458
|
+
ArrWithSpecial(f=[1.0, float("nan"), 3.0]),
|
|
459
|
+
ArrWithSpecial(f=[float("inf"), 2.0, float("-inf")]),
|
|
460
|
+
ArrWithSpecial(f=[float("nan"), float("inf")]),
|
|
461
|
+
),
|
|
462
|
+
special_floats=(
|
|
463
|
+
[1.0, float("nan"), float("inf")],
|
|
464
|
+
[float("-inf"), 2.0],
|
|
465
|
+
[float("nan")],
|
|
466
|
+
),
|
|
467
|
+
session=test_session,
|
|
468
|
+
)
|
|
469
|
+
.mutate(
|
|
470
|
+
# Test array.length with NaN/INF arrays
|
|
471
|
+
len1=func.array.length("arr.f"),
|
|
472
|
+
len2=func.array.length("special_floats"),
|
|
473
|
+
# Test array.slice with NaN/INF arrays
|
|
474
|
+
slice1=func.array.slice("arr.f", 0, 2),
|
|
475
|
+
slice2=func.array.slice("special_floats", 1),
|
|
476
|
+
# Test array.get_element with NaN/INF arrays
|
|
477
|
+
elem1=func.array.get_element("arr.f", 0),
|
|
478
|
+
elem2=func.array.get_element("special_floats", 0),
|
|
479
|
+
)
|
|
480
|
+
.order_by("id")
|
|
481
|
+
.to_list("len1", "len2", "slice1", "slice2", "elem1", "elem2")
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
# Verify lengths are correct
|
|
485
|
+
assert ds[0][0] == 3 # [1.0, nan, 3.0]
|
|
486
|
+
assert ds[0][1] == 3 # [1.0, nan, inf]
|
|
487
|
+
assert ds[1][0] == 3 # [inf, 2.0, -inf]
|
|
488
|
+
assert ds[1][1] == 2 # [-inf, 2.0]
|
|
489
|
+
assert ds[2][0] == 2 # [nan, inf]
|
|
490
|
+
assert ds[2][1] == 1 # [nan]
|
|
491
|
+
|
|
492
|
+
# Verify slices preserve NaN/INF
|
|
493
|
+
assert len(ds[0][2]) == 2 # slice of [1.0, nan, 3.0]
|
|
494
|
+
assert ds[0][2][0] == 1.0
|
|
495
|
+
assert math.isnan(ds[0][2][1])
|
|
496
|
+
|
|
497
|
+
# Verify get_element preserves NaN/INF
|
|
498
|
+
assert ds[0][4] == 1.0 # arr.f[0] for first row
|
|
499
|
+
# special_floats[0] for second row (-inf)
|
|
500
|
+
assert math.isinf(ds[1][5]) and ds[1][5] < 0
|
|
501
|
+
assert ds[1][4] == float("inf") # arr.f[0] for second row
|
|
@@ -135,8 +135,8 @@ def test_convert_type(cloud_test_catalog):
|
|
|
135
135
|
|
|
136
136
|
# JSON Tests
|
|
137
137
|
assert run_convert_type('{"a": 1}', JSON()) == '{"a": 1}'
|
|
138
|
-
assert run_convert_type({"a": 1}, JSON()) == '{"a":
|
|
139
|
-
assert run_convert_type([{"a": 1}], JSON()) == '[{"a":
|
|
138
|
+
assert run_convert_type({"a": 1}, JSON()) == '{"a":1}'
|
|
139
|
+
assert run_convert_type([{"a": 1}], JSON()) == '[{"a":1}]'
|
|
140
140
|
with pytest.raises(ValueError):
|
|
141
141
|
run_convert_type(0.5, JSON())
|
|
142
142
|
|
|
@@ -3230,6 +3230,35 @@ def test_read_csv_nan_inf(tmp_dir, test_session):
|
|
|
3230
3230
|
assert any(r for r in res if np.isneginf(r))
|
|
3231
3231
|
|
|
3232
3232
|
|
|
3233
|
+
def test_dicts_nan_inf(test_session):
|
|
3234
|
+
metrics_data = [
|
|
3235
|
+
{"accuracy": 0.95, "loss": 0.1, "precision": 0.92},
|
|
3236
|
+
{"accuracy": float("nan"), "loss": float("inf"), "precision": 0.88},
|
|
3237
|
+
{"accuracy": 0.87, "loss": float("-inf"), "precision": float("nan")},
|
|
3238
|
+
]
|
|
3239
|
+
|
|
3240
|
+
dc.read_values(
|
|
3241
|
+
id=[1, 2, 3],
|
|
3242
|
+
metrics=metrics_data,
|
|
3243
|
+
session=test_session,
|
|
3244
|
+
).save("test_dicts_nan_inf")
|
|
3245
|
+
|
|
3246
|
+
res = dc.read_dataset("test_dicts_nan_inf").order_by("id").to_values("metrics")
|
|
3247
|
+
assert len(res) == 3
|
|
3248
|
+
|
|
3249
|
+
assert res[0]["accuracy"] == 0.95
|
|
3250
|
+
assert res[0]["loss"] == 0.1
|
|
3251
|
+
assert res[0]["precision"] == 0.92
|
|
3252
|
+
|
|
3253
|
+
assert math.isnan(res[1]["accuracy"])
|
|
3254
|
+
assert math.isinf(res[1]["loss"]) and res[1]["loss"] > 0
|
|
3255
|
+
assert res[1]["precision"] == 0.88
|
|
3256
|
+
|
|
3257
|
+
assert res[2]["accuracy"] == 0.87
|
|
3258
|
+
assert math.isinf(res[2]["loss"]) and res[2]["loss"] < 0
|
|
3259
|
+
assert math.isnan(res[2]["precision"])
|
|
3260
|
+
|
|
3261
|
+
|
|
3233
3262
|
def test_group_by_int(test_session):
|
|
3234
3263
|
from datachain import func
|
|
3235
3264
|
|