datachain 0.30.0__tar.gz → 0.30.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.30.0 → datachain-0.30.2}/.github/workflows/tests.yml +18 -0
- {datachain-0.30.0 → datachain-0.30.2}/.gitignore +4 -0
- {datachain-0.30.0 → datachain-0.30.2}/PKG-INFO +5 -2
- {datachain-0.30.0 → datachain-0.30.2}/pyproject.toml +9 -2
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/dc/database.py +37 -16
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/dc/datachain.py +5 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/query/dataset.py +1 -1
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/sql/__init__.py +2 -0
- datachain-0.30.2/src/datachain/sql/postgresql_dialect.py +9 -0
- datachain-0.30.2/src/datachain/sql/postgresql_types.py +21 -0
- datachain-0.30.2/src/datachain/sql/sqlite/__init__.py +11 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/sql/sqlite/base.py +6 -1
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/sql/types.py +32 -8
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain.egg-info/PKG-INFO +5 -2
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain.egg-info/SOURCES.txt +2 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain.egg-info/requires.txt +5 -1
- {datachain-0.30.0 → datachain-0.30.2}/tests/examples/test_examples.py +1 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/test_to_database.py +474 -298
- datachain-0.30.0/src/datachain/sql/sqlite/__init__.py +0 -7
- {datachain-0.30.0 → datachain-0.30.2}/.cruft.json +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/.gitattributes +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/.github/codecov.yaml +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/.github/dependabot.yml +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/.github/workflows/release.yml +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/.pre-commit-config.yaml +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/LICENSE +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/README.rst +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/assets/datachain.svg +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/commands/auth/login.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/commands/auth/logout.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/commands/auth/team.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/commands/auth/token.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/commands/index.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/commands/job/cancel.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/commands/job/clusters.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/commands/job/logs.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/commands/job/ls.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/commands/job/run.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/contributing.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/examples.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/guide/db_migrations.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/guide/delta.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/guide/env.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/guide/index.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/guide/namespaces.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/guide/processing.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/guide/remotes.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/guide/retry.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/index.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/overrides/main.html +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/quick-start.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/references/data-types/file.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/references/data-types/index.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/references/data-types/pose.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/references/data-types/segment.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/references/datachain.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/references/func.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/references/index.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/references/toolkit.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/references/torch.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/references/udf.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/docs/tutorials.md +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/examples/incremental_processing/delta.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/examples/incremental_processing/retry.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/examples/incremental_processing/utils.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/examples/multimodal/audio-to-text.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/examples/multimodal/wds.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/mkdocs.yml +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/noxfile.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/setup.cfg +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/__init__.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/__main__.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/asyn.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/cache.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/catalog/catalog.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/cli/utils.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/client/__init__.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/client/azure.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/client/gcs.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/client/hf.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/client/local.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/client/s3.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/config.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/dataset.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/delta.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/error.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/fs/reference.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/fs/utils.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/func/__init__.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/func/array.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/func/base.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/func/conditional.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/func/func.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/func/numeric.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/func/path.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/func/random.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/func/string.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/func/window.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/job.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/audio.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/clip.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/dc/__init__.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/dc/datasets.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/dc/records.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/dc/storage.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/file.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/hf.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/image.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/listing.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/namespaces.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/projects.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/settings.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/tar.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/text.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/udf.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/utils.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/video.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/listing.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/model/__init__.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/model/bbox.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/model/pose.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/model/segment.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/model/utils.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/namespace.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/node.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/progress.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/project.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/py.typed +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/query/__init__.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/query/batch.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/query/metrics.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/query/params.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/query/queue.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/query/schema.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/query/session.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/query/udf.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/query/utils.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/remote/studio.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/script_meta.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/semver.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/sql/utils.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/studio.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/telemetry.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain/utils.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/__init__.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/conftest.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/data.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/examples/__init__.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/examples/wds_data.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/__init__.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/data/lena.jpg +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/functions/__init__.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/functions/test_aggregate.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/functions/test_array.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/functions/test_conditional.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/functions/test_numeric.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/functions/test_path.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/functions/test_random.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/functions/test_string.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/model/__init__.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/test_audio.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/test_batching.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/test_catalog.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/test_client.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/test_data_storage.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/test_datachain.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/test_datasets.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/test_delta.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/test_file.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/test_hf.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/test_image.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/test_listing.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/test_ls.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/test_metastore.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/test_metrics.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/test_pull.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/test_pytorch.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/test_query.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/test_read_database.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/test_read_dataset_remote.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/test_read_dataset_version_specifiers.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/test_retry.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/test_session.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/test_studio_datetime_parsing.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/test_toolkit.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/test_video.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/func/test_warehouse.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/scripts/feature_class.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/test_atomicity.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/test_cli_e2e.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/test_cli_studio.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/test_import_time.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/test_query_e2e.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/test_telemetry.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/__init__.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/lib/test_audio.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/lib/test_datachain.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/lib/test_namespace.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/lib/test_partition_by.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/lib/test_project.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/lib/test_settings.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/lib/test_udf.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/model/__init__.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/test_asyn.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/test_cache.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/test_catalog.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/test_client.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/test_config.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/test_dataset.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/test_func.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/test_listing.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/test_metastore.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/test_query.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/test_query_params.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/test_semver.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/test_serializer.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/test_session.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/test_utils.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.30.0 → datachain-0.30.2}/tests/utils.py +0 -0
|
@@ -78,6 +78,24 @@ jobs:
|
|
|
78
78
|
fetch-depth: 0
|
|
79
79
|
ref: ${{ github.event.pull_request.head.sha || github.ref }}
|
|
80
80
|
|
|
81
|
+
- name: Setup PostgreSQL
|
|
82
|
+
if: runner.os != 'Windows'
|
|
83
|
+
uses: ikalnytskyi/action-setup-postgres@10ab8a56cc77b4823c2bfa57b1d4dd5605ef0481 # v7
|
|
84
|
+
with:
|
|
85
|
+
username: test
|
|
86
|
+
password: test
|
|
87
|
+
database: test_datachain
|
|
88
|
+
port: 5432
|
|
89
|
+
postgres-version: "17"
|
|
90
|
+
id: postgres
|
|
91
|
+
|
|
92
|
+
- name: Set PostgreSQL URI
|
|
93
|
+
if: runner.os != 'Windows'
|
|
94
|
+
run: |
|
|
95
|
+
FULL_URI="${{ steps.postgres.outputs.connection-uri }}"
|
|
96
|
+
echo "TEST_POSTGRES_URI=${FULL_URI%/*}" >> "$GITHUB_ENV"
|
|
97
|
+
shell: bash
|
|
98
|
+
|
|
81
99
|
- name: Set up Python ${{ matrix.pyv }}
|
|
82
100
|
uses: actions/setup-python@v5
|
|
83
101
|
with:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.30.
|
|
3
|
+
Version: 0.30.2
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -81,8 +81,10 @@ Provides-Extra: video
|
|
|
81
81
|
Requires-Dist: ffmpeg-python; extra == "video"
|
|
82
82
|
Requires-Dist: imageio[ffmpeg,pyav]>=2.37.0; extra == "video"
|
|
83
83
|
Requires-Dist: opencv-python; extra == "video"
|
|
84
|
+
Provides-Extra: postgres
|
|
85
|
+
Requires-Dist: psycopg2-binary>=2.9.0; extra == "postgres"
|
|
84
86
|
Provides-Extra: tests
|
|
85
|
-
Requires-Dist: datachain[audio,hf,remote,torch,vector,video]; extra == "tests"
|
|
87
|
+
Requires-Dist: datachain[audio,hf,postgres,remote,torch,vector,video]; extra == "tests"
|
|
86
88
|
Requires-Dist: pytest<9,>=8; extra == "tests"
|
|
87
89
|
Requires-Dist: pytest-sugar>=0.9.6; extra == "tests"
|
|
88
90
|
Requires-Dist: pytest-cov>=4.1.0; extra == "tests"
|
|
@@ -90,6 +92,7 @@ Requires-Dist: pytest-mock>=3.12.0; extra == "tests"
|
|
|
90
92
|
Requires-Dist: pytest-servers[all]>=0.5.9; extra == "tests"
|
|
91
93
|
Requires-Dist: pytest-benchmark[histogram]; extra == "tests"
|
|
92
94
|
Requires-Dist: pytest-xdist>=3.3.1; extra == "tests"
|
|
95
|
+
Requires-Dist: pytest-env>=1.1.0; extra == "tests"
|
|
93
96
|
Requires-Dist: virtualenv; extra == "tests"
|
|
94
97
|
Requires-Dist: dulwich; extra == "tests"
|
|
95
98
|
Requires-Dist: hypothesis; extra == "tests"
|
|
@@ -96,8 +96,11 @@ video = [
|
|
|
96
96
|
"imageio[ffmpeg,pyav]>=2.37.0",
|
|
97
97
|
"opencv-python"
|
|
98
98
|
]
|
|
99
|
+
postgres = [
|
|
100
|
+
"psycopg2-binary>=2.9.0"
|
|
101
|
+
]
|
|
99
102
|
tests = [
|
|
100
|
-
"datachain[torch,audio,remote,vector,hf,video]",
|
|
103
|
+
"datachain[torch,audio,remote,vector,hf,video,postgres]",
|
|
101
104
|
"pytest>=8,<9",
|
|
102
105
|
"pytest-sugar>=0.9.6",
|
|
103
106
|
"pytest-cov>=4.1.0",
|
|
@@ -105,6 +108,7 @@ tests = [
|
|
|
105
108
|
"pytest-servers[all]>=0.5.9",
|
|
106
109
|
"pytest-benchmark[histogram]",
|
|
107
110
|
"pytest-xdist>=3.3.1",
|
|
111
|
+
"pytest-env>=1.1.0",
|
|
108
112
|
"virtualenv",
|
|
109
113
|
"dulwich",
|
|
110
114
|
"hypothesis",
|
|
@@ -150,13 +154,16 @@ namespaces = false
|
|
|
150
154
|
|
|
151
155
|
[tool.pytest.ini_options]
|
|
152
156
|
addopts = "-rfEs -m 'not examples' --benchmark-skip"
|
|
157
|
+
env_override_existing_values = true
|
|
158
|
+
env_files = "local/.env.test"
|
|
153
159
|
markers = [
|
|
154
160
|
"e2e: End-to-end tests",
|
|
155
161
|
"examples: All examples",
|
|
156
162
|
"computer_vision: Computer vision examples",
|
|
157
163
|
"get_started: Get started examples",
|
|
158
164
|
"llm_and_nlp: LLM and NLP examples",
|
|
159
|
-
"multimodal: Multimodal examples"
|
|
165
|
+
"multimodal: Multimodal examples",
|
|
166
|
+
"incremental_processing: Delta and retry examples"
|
|
160
167
|
]
|
|
161
168
|
filterwarnings = [
|
|
162
169
|
"error::pandas.errors.PerformanceWarning",
|
|
@@ -7,6 +7,7 @@ from typing import TYPE_CHECKING, Any, Optional, Union
|
|
|
7
7
|
import sqlalchemy
|
|
8
8
|
|
|
9
9
|
from datachain.query.schema import ColumnMeta
|
|
10
|
+
from datachain.utils import batched
|
|
10
11
|
|
|
11
12
|
DEFAULT_DATABASE_BATCH_SIZE = 10_000
|
|
12
13
|
|
|
@@ -74,6 +75,7 @@ def to_database(
|
|
|
74
75
|
*,
|
|
75
76
|
batch_rows: int = DEFAULT_DATABASE_BATCH_SIZE,
|
|
76
77
|
on_conflict: Optional[str] = None,
|
|
78
|
+
conflict_columns: Optional[list[str]] = None,
|
|
77
79
|
column_mapping: Optional[dict[str, Optional[str]]] = None,
|
|
78
80
|
) -> None:
|
|
79
81
|
"""
|
|
@@ -82,8 +84,6 @@ def to_database(
|
|
|
82
84
|
This is the core implementation that handles the actual database operations.
|
|
83
85
|
For user-facing documentation, see DataChain.to_database() method.
|
|
84
86
|
"""
|
|
85
|
-
from datachain.utils import batched
|
|
86
|
-
|
|
87
87
|
if on_conflict and on_conflict not in ("ignore", "update"):
|
|
88
88
|
raise ValueError(
|
|
89
89
|
f"on_conflict must be 'ignore' or 'update', got: {on_conflict}"
|
|
@@ -105,19 +105,26 @@ def to_database(
|
|
|
105
105
|
metadata = sqlalchemy.MetaData()
|
|
106
106
|
table = sqlalchemy.Table(table_name, metadata, *columns)
|
|
107
107
|
|
|
108
|
-
|
|
109
|
-
inspector = sqlalchemy.inspect(conn)
|
|
110
|
-
assert inspector # to satisfy mypy
|
|
111
|
-
table_existed_before = table_name in inspector.get_table_names()
|
|
112
|
-
|
|
108
|
+
table_existed_before = False
|
|
113
109
|
try:
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
110
|
+
with conn.begin():
|
|
111
|
+
# Check if table exists to determine if we should clean up on error.
|
|
112
|
+
inspector = sqlalchemy.inspect(conn)
|
|
113
|
+
assert inspector # to satisfy mypy
|
|
114
|
+
table_existed_before = table_name in inspector.get_table_names()
|
|
115
|
+
|
|
116
|
+
table.create(conn, checkfirst=True)
|
|
117
|
+
|
|
118
|
+
rows_iter = chain._leaf_values()
|
|
119
|
+
for batch in batched(rows_iter, batch_rows):
|
|
120
|
+
_process_batch(
|
|
121
|
+
conn,
|
|
122
|
+
table,
|
|
123
|
+
batch,
|
|
124
|
+
on_conflict,
|
|
125
|
+
conflict_columns,
|
|
126
|
+
column_indices_and_names,
|
|
127
|
+
)
|
|
121
128
|
except Exception:
|
|
122
129
|
if not table_existed_before:
|
|
123
130
|
try:
|
|
@@ -183,7 +190,9 @@ def _prepare_columns(all_columns, column_mapping):
|
|
|
183
190
|
return column_indices_and_names, columns
|
|
184
191
|
|
|
185
192
|
|
|
186
|
-
def _process_batch(
|
|
193
|
+
def _process_batch(
|
|
194
|
+
conn, table, batch, on_conflict, conflict_columns, column_indices_and_names
|
|
195
|
+
):
|
|
187
196
|
"""Process a batch of rows with conflict resolution."""
|
|
188
197
|
|
|
189
198
|
def prepare_row(row_values):
|
|
@@ -217,7 +226,19 @@ def _process_batch(conn, table, batch, on_conflict, column_indices_and_names):
|
|
|
217
226
|
update_values = {
|
|
218
227
|
col.name: insert_stmt.excluded[col.name] for col in table.columns
|
|
219
228
|
}
|
|
220
|
-
|
|
229
|
+
if conn.engine.name == "postgresql":
|
|
230
|
+
if not conflict_columns:
|
|
231
|
+
raise ValueError(
|
|
232
|
+
"conflict_columns parameter is required when "
|
|
233
|
+
"on_conflict='update' with PostgreSQL. Specify the column "
|
|
234
|
+
"names that form a unique constraint."
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
insert_stmt = insert_stmt.on_conflict_do_update(
|
|
238
|
+
index_elements=conflict_columns, set_=update_values
|
|
239
|
+
)
|
|
240
|
+
else:
|
|
241
|
+
insert_stmt = insert_stmt.on_conflict_do_update(set_=update_values)
|
|
221
242
|
elif on_conflict:
|
|
222
243
|
import warnings
|
|
223
244
|
|
|
@@ -2296,6 +2296,7 @@ class DataChain:
|
|
|
2296
2296
|
*,
|
|
2297
2297
|
batch_rows: int = DEFAULT_DATABASE_BATCH_SIZE,
|
|
2298
2298
|
on_conflict: Optional[str] = None,
|
|
2299
|
+
conflict_columns: Optional[list[str]] = None,
|
|
2299
2300
|
column_mapping: Optional[dict[str, Optional[str]]] = None,
|
|
2300
2301
|
) -> None:
|
|
2301
2302
|
"""Save chain to a database table using a given database connection.
|
|
@@ -2319,6 +2320,9 @@ class DataChain:
|
|
|
2319
2320
|
(default)
|
|
2320
2321
|
- "ignore": Skip duplicate rows silently
|
|
2321
2322
|
- "update": Update existing rows with new values
|
|
2323
|
+
conflict_columns: List of column names that form a unique constraint
|
|
2324
|
+
for conflict resolution. Required when on_conflict='update' and
|
|
2325
|
+
using PostgreSQL.
|
|
2322
2326
|
column_mapping: Optional mapping to rename or skip columns:
|
|
2323
2327
|
- Dict mapping DataChain column names to database column names
|
|
2324
2328
|
- Set values to None to skip columns entirely, or use `defaultdict` to
|
|
@@ -2377,6 +2381,7 @@ class DataChain:
|
|
|
2377
2381
|
connection,
|
|
2378
2382
|
batch_rows=batch_rows,
|
|
2379
2383
|
on_conflict=on_conflict,
|
|
2384
|
+
conflict_columns=conflict_columns,
|
|
2380
2385
|
column_mapping=column_mapping,
|
|
2381
2386
|
)
|
|
2382
2387
|
|
|
@@ -665,7 +665,7 @@ class UDFSignal(UDFStep):
|
|
|
665
665
|
original_cols = [c for c in subq.c if c.name not in partition_col_names]
|
|
666
666
|
|
|
667
667
|
# new signal columns that are added to udf_table
|
|
668
|
-
signal_cols = [c for c in udf_table.c if c.name
|
|
668
|
+
signal_cols = [c for c in udf_table.c if not c.name.startswith("sys__")]
|
|
669
669
|
signal_name_cols = {c.name: c for c in signal_cols}
|
|
670
670
|
cols = signal_cols
|
|
671
671
|
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
from sqlalchemy.sql.elements import literal
|
|
2
2
|
from sqlalchemy.sql.expression import column
|
|
3
3
|
|
|
4
|
+
# Import PostgreSQL dialect registration (registers PostgreSQL type converter)
|
|
5
|
+
from . import postgresql_dialect # noqa: F401
|
|
4
6
|
from .default import setup as default_setup
|
|
5
7
|
from .selectable import select, values
|
|
6
8
|
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""
|
|
2
|
+
PostgreSQL dialect registration for DataChain.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from datachain.sql.postgresql_types import PostgreSQLTypeConverter
|
|
6
|
+
from datachain.sql.types import register_backend_types
|
|
7
|
+
|
|
8
|
+
# Register PostgreSQL type converter
|
|
9
|
+
register_backend_types("postgresql", PostgreSQLTypeConverter())
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""
|
|
2
|
+
PostgreSQL-specific type converter for DataChain.
|
|
3
|
+
|
|
4
|
+
Handles PostgreSQL-specific type mappings that differ from the default dialect.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from sqlalchemy.dialects import postgresql
|
|
8
|
+
|
|
9
|
+
from datachain.sql.types import TypeConverter
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class PostgreSQLTypeConverter(TypeConverter):
|
|
13
|
+
"""PostgreSQL-specific type converter."""
|
|
14
|
+
|
|
15
|
+
def datetime(self):
|
|
16
|
+
"""PostgreSQL uses TIMESTAMP WITH TIME ZONE to preserve timezone information."""
|
|
17
|
+
return postgresql.TIMESTAMP(timezone=True)
|
|
18
|
+
|
|
19
|
+
def json(self):
|
|
20
|
+
"""PostgreSQL uses JSONB for better performance and query capabilities."""
|
|
21
|
+
return postgresql.JSONB()
|
|
@@ -304,7 +304,11 @@ def register_user_defined_sql_functions() -> None:
|
|
|
304
304
|
|
|
305
305
|
|
|
306
306
|
def adapt_datetime(val: datetime) -> str:
|
|
307
|
-
|
|
307
|
+
is_utc_check = val.tzinfo is timezone.utc
|
|
308
|
+
tzname_check = val.tzname() == "UTC"
|
|
309
|
+
combined_check = is_utc_check or tzname_check
|
|
310
|
+
|
|
311
|
+
if not combined_check:
|
|
308
312
|
try:
|
|
309
313
|
val = val.astimezone(timezone.utc)
|
|
310
314
|
except (OverflowError, ValueError, OSError):
|
|
@@ -314,6 +318,7 @@ def adapt_datetime(val: datetime) -> str:
|
|
|
314
318
|
val = datetime.min.replace(tzinfo=timezone.utc)
|
|
315
319
|
else:
|
|
316
320
|
raise
|
|
321
|
+
|
|
317
322
|
return val.replace(tzinfo=None).isoformat(" ")
|
|
318
323
|
|
|
319
324
|
|
|
@@ -58,9 +58,14 @@ def converter(dialect) -> "TypeConverter":
|
|
|
58
58
|
try:
|
|
59
59
|
return registry[name]
|
|
60
60
|
except KeyError:
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
61
|
+
# Fall back to default converter if specific dialect not found
|
|
62
|
+
try:
|
|
63
|
+
return registry["default"]
|
|
64
|
+
except KeyError:
|
|
65
|
+
raise ValueError(
|
|
66
|
+
f"No type converter registered for dialect: {dialect.name!r} "
|
|
67
|
+
f"and no default converter available"
|
|
68
|
+
) from None
|
|
64
69
|
|
|
65
70
|
|
|
66
71
|
def read_converter(dialect) -> "TypeReadConverter":
|
|
@@ -68,9 +73,14 @@ def read_converter(dialect) -> "TypeReadConverter":
|
|
|
68
73
|
try:
|
|
69
74
|
return read_converter_registry[name]
|
|
70
75
|
except KeyError:
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
76
|
+
# Fall back to default converter if specific dialect not found
|
|
77
|
+
try:
|
|
78
|
+
return read_converter_registry["default"]
|
|
79
|
+
except KeyError:
|
|
80
|
+
raise ValueError(
|
|
81
|
+
f"No read type converter registered for dialect: {dialect.name!r} "
|
|
82
|
+
f"and no default converter available"
|
|
83
|
+
) from None
|
|
74
84
|
|
|
75
85
|
|
|
76
86
|
def type_defaults(dialect) -> "TypeDefaults":
|
|
@@ -78,7 +88,14 @@ def type_defaults(dialect) -> "TypeDefaults":
|
|
|
78
88
|
try:
|
|
79
89
|
return type_defaults_registry[name]
|
|
80
90
|
except KeyError:
|
|
81
|
-
|
|
91
|
+
# Fall back to default converter if specific dialect not found
|
|
92
|
+
try:
|
|
93
|
+
return type_defaults_registry["default"]
|
|
94
|
+
except KeyError:
|
|
95
|
+
raise ValueError(
|
|
96
|
+
f"No type defaults registered for dialect: {dialect.name!r} "
|
|
97
|
+
f"and no default converter available"
|
|
98
|
+
) from None
|
|
82
99
|
|
|
83
100
|
|
|
84
101
|
def db_defaults(dialect) -> "DBDefaults":
|
|
@@ -86,7 +103,14 @@ def db_defaults(dialect) -> "DBDefaults":
|
|
|
86
103
|
try:
|
|
87
104
|
return db_defaults_registry[name]
|
|
88
105
|
except KeyError:
|
|
89
|
-
|
|
106
|
+
# Fall back to default converter if specific dialect not found
|
|
107
|
+
try:
|
|
108
|
+
return db_defaults_registry["default"]
|
|
109
|
+
except KeyError:
|
|
110
|
+
raise ValueError(
|
|
111
|
+
f"No DB defaults registered for dialect: {dialect.name!r} "
|
|
112
|
+
f"and no default converter available"
|
|
113
|
+
) from None
|
|
90
114
|
|
|
91
115
|
|
|
92
116
|
class SQLType(TypeDecorator):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.30.
|
|
3
|
+
Version: 0.30.2
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -81,8 +81,10 @@ Provides-Extra: video
|
|
|
81
81
|
Requires-Dist: ffmpeg-python; extra == "video"
|
|
82
82
|
Requires-Dist: imageio[ffmpeg,pyav]>=2.37.0; extra == "video"
|
|
83
83
|
Requires-Dist: opencv-python; extra == "video"
|
|
84
|
+
Provides-Extra: postgres
|
|
85
|
+
Requires-Dist: psycopg2-binary>=2.9.0; extra == "postgres"
|
|
84
86
|
Provides-Extra: tests
|
|
85
|
-
Requires-Dist: datachain[audio,hf,remote,torch,vector,video]; extra == "tests"
|
|
87
|
+
Requires-Dist: datachain[audio,hf,postgres,remote,torch,vector,video]; extra == "tests"
|
|
86
88
|
Requires-Dist: pytest<9,>=8; extra == "tests"
|
|
87
89
|
Requires-Dist: pytest-sugar>=0.9.6; extra == "tests"
|
|
88
90
|
Requires-Dist: pytest-cov>=4.1.0; extra == "tests"
|
|
@@ -90,6 +92,7 @@ Requires-Dist: pytest-mock>=3.12.0; extra == "tests"
|
|
|
90
92
|
Requires-Dist: pytest-servers[all]>=0.5.9; extra == "tests"
|
|
91
93
|
Requires-Dist: pytest-benchmark[histogram]; extra == "tests"
|
|
92
94
|
Requires-Dist: pytest-xdist>=3.3.1; extra == "tests"
|
|
95
|
+
Requires-Dist: pytest-env>=1.1.0; extra == "tests"
|
|
93
96
|
Requires-Dist: virtualenv; extra == "tests"
|
|
94
97
|
Requires-Dist: dulwich; extra == "tests"
|
|
95
98
|
Requires-Dist: hypothesis; extra == "tests"
|
|
@@ -231,6 +231,8 @@ src/datachain/query/utils.py
|
|
|
231
231
|
src/datachain/remote/__init__.py
|
|
232
232
|
src/datachain/remote/studio.py
|
|
233
233
|
src/datachain/sql/__init__.py
|
|
234
|
+
src/datachain/sql/postgresql_dialect.py
|
|
235
|
+
src/datachain/sql/postgresql_types.py
|
|
234
236
|
src/datachain/sql/selectable.py
|
|
235
237
|
src/datachain/sql/types.py
|
|
236
238
|
src/datachain/sql/utils.py
|
|
@@ -76,12 +76,15 @@ fsspec>=2024.12.0
|
|
|
76
76
|
[hf:sys_platform == "linux" or sys_platform == "darwin"]
|
|
77
77
|
datasets[audio]>=4.0.0
|
|
78
78
|
|
|
79
|
+
[postgres]
|
|
80
|
+
psycopg2-binary>=2.9.0
|
|
81
|
+
|
|
79
82
|
[remote]
|
|
80
83
|
lz4
|
|
81
84
|
requests>=2.22.0
|
|
82
85
|
|
|
83
86
|
[tests]
|
|
84
|
-
datachain[audio,hf,remote,torch,vector,video]
|
|
87
|
+
datachain[audio,hf,postgres,remote,torch,vector,video]
|
|
85
88
|
pytest<9,>=8
|
|
86
89
|
pytest-sugar>=0.9.6
|
|
87
90
|
pytest-cov>=4.1.0
|
|
@@ -89,6 +92,7 @@ pytest-mock>=3.12.0
|
|
|
89
92
|
pytest-servers[all]>=0.5.9
|
|
90
93
|
pytest-benchmark[histogram]
|
|
91
94
|
pytest-xdist>=3.3.1
|
|
95
|
+
pytest-env>=1.1.0
|
|
92
96
|
virtualenv
|
|
93
97
|
dulwich
|
|
94
98
|
hypothesis
|