datachain 0.3.20__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.3.20 → datachain-0.5.0}/.pre-commit-config.yaml +1 -1
- {datachain-0.3.20/src/datachain.egg-info → datachain-0.5.0}/PKG-INFO +1 -1
- {datachain-0.3.20 → datachain-0.5.0}/mkdocs.yml +2 -0
- datachain-0.5.0/overrides/main.html +12 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/catalog/catalog.py +0 -3
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/cli.py +3 -2
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/data_storage/metastore.py +8 -12
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/data_storage/warehouse.py +1 -3
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/dataset.py +0 -8
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/dc.py +197 -113
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/listing.py +5 -3
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/pytorch.py +5 -1
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/query/dataset.py +1 -1
- {datachain-0.3.20 → datachain-0.5.0/src/datachain.egg-info}/PKG-INFO +1 -1
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain.egg-info/SOURCES.txt +1 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/conftest.py +0 -1
- {datachain-0.3.20 → datachain-0.5.0}/tests/func/test_catalog.py +5 -2
- {datachain-0.3.20 → datachain-0.5.0}/tests/func/test_datachain.py +4 -4
- {datachain-0.3.20 → datachain-0.5.0}/tests/func/test_pull.py +0 -1
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/test_datachain.py +21 -25
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/test_datachain_merge.py +1 -1
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_query.py +1 -1
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_warehouse.py +0 -2
- {datachain-0.3.20 → datachain-0.5.0}/.cruft.json +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/.gitattributes +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/.github/codecov.yaml +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/.github/dependabot.yml +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/.github/workflows/release.yml +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/.github/workflows/tests.yml +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/.gitignore +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/CONTRIBUTING.rst +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/LICENSE +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/README.rst +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/docs/assets/datachain.svg +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/docs/assets/flowchart.png +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/docs/index.md +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/docs/references/datachain.md +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/docs/references/datatype.md +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/docs/references/file.md +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/docs/references/index.md +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/docs/references/sql.md +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/docs/references/torch.md +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/docs/references/udf.md +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/examples/llm_and_nlp/unstructured-embeddings-gen.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/examples/llm_and_nlp/unstructured-summary-map.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/examples/multimodal/wds.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/noxfile.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/pyproject.toml +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/setup.cfg +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/__init__.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/__main__.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/asyn.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/cache.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/cli_utils.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/client/__init__.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/client/azure.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/client/gcs.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/client/hf.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/client/local.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/client/s3.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/config.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/data_storage/id_generator.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/error.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/job.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/clip.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/file.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/hf.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/image.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/settings.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/tar.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/text.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/udf.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/utils.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/vfile.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/listing.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/node.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/progress.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/py.typed +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/query/__init__.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/query/batch.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/query/metrics.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/query/params.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/query/queue.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/query/schema.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/query/session.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/query/udf.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/remote/studio.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/sql/types.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/sql/utils.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/storage.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/telemetry.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain/utils.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain.egg-info/requires.txt +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/__init__.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/data.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/examples/__init__.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/examples/test_examples.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/examples/wds_data.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/func/__init__.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/func/test_client.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/func/test_datasets.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/func/test_listing.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/func/test_ls.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/func/test_metrics.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/func/test_pytorch.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/func/test_query.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/scripts/feature_class.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/test_cli_e2e.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/test_query_e2e.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/test_telemetry.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/__init__.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_asyn.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_cache.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_catalog.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_client.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_dataset.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_id_generator.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_listing.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_metastore.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_query_params.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_serializer.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_session.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_storage.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/unit/test_utils.py +0 -0
- {datachain-0.3.20 → datachain-0.5.0}/tests/utils.py +0 -0
|
@@ -15,6 +15,7 @@ validation:
|
|
|
15
15
|
|
|
16
16
|
theme:
|
|
17
17
|
name: material
|
|
18
|
+
custom_dir: overrides
|
|
18
19
|
logo: assets/datachain-white.svg
|
|
19
20
|
favicon: assets/datachain.svg
|
|
20
21
|
icon:
|
|
@@ -71,6 +72,7 @@ nav:
|
|
|
71
72
|
- references/udf.md
|
|
72
73
|
- references/torch.md
|
|
73
74
|
- references/sql.md
|
|
75
|
+
- DataChain Website: https://datachain.ai" target="_blank"
|
|
74
76
|
|
|
75
77
|
markdown_extensions:
|
|
76
78
|
- abbr
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
{% extends "base.html" %}
|
|
2
|
+
|
|
3
|
+
{% block scripts %}
|
|
4
|
+
|
|
5
|
+
{{ super() }}
|
|
6
|
+
|
|
7
|
+
<script type="text/javascript">
|
|
8
|
+
!function () { var e, t, n; e = "14ffd92a6cbf5f2", t = function () { Reo.init({ clientID: "14ffd92a6cbf5f2" }) }, (n = document.createElement("script")).src = "https://static.reo.dev/" + e + "/reo.js", n.async = !0, n.onload = t, document.head.appendChild(n) }();
|
|
9
|
+
</script>
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
{% endblock %}
|
|
@@ -979,7 +979,6 @@ class Catalog:
|
|
|
979
979
|
script_output="",
|
|
980
980
|
create_rows_table=True,
|
|
981
981
|
job_id: Optional[str] = None,
|
|
982
|
-
is_job_result: bool = False,
|
|
983
982
|
) -> DatasetRecord:
|
|
984
983
|
"""
|
|
985
984
|
Creates dataset version if it doesn't exist.
|
|
@@ -1001,7 +1000,6 @@ class Catalog:
|
|
|
1001
1000
|
script_output=script_output,
|
|
1002
1001
|
schema=schema,
|
|
1003
1002
|
job_id=job_id,
|
|
1004
|
-
is_job_result=is_job_result,
|
|
1005
1003
|
ignore_if_exists=True,
|
|
1006
1004
|
)
|
|
1007
1005
|
|
|
@@ -1211,7 +1209,6 @@ class Catalog:
|
|
|
1211
1209
|
size=dataset_version.size,
|
|
1212
1210
|
preview=dataset_version.preview,
|
|
1213
1211
|
job_id=dataset_version.job_id,
|
|
1214
|
-
is_job_result=dataset_version.is_job_result,
|
|
1215
1212
|
)
|
|
1216
1213
|
# to avoid re-creating rows table, we are just renaming it for a new version
|
|
1217
1214
|
# of target dataset
|
|
@@ -12,7 +12,7 @@ from typing import TYPE_CHECKING, Optional, Union
|
|
|
12
12
|
|
|
13
13
|
import shtab
|
|
14
14
|
|
|
15
|
-
from datachain import utils
|
|
15
|
+
from datachain import Session, utils
|
|
16
16
|
from datachain.cli_utils import BooleanOptionalAction, CommaSeparatedArgs, KeyValueArgs
|
|
17
17
|
from datachain.lib.dc import DataChain
|
|
18
18
|
from datachain.telemetry import telemetry
|
|
@@ -770,7 +770,8 @@ def show(
|
|
|
770
770
|
show_records(records, collapse_columns=not no_collapse)
|
|
771
771
|
if schema and dataset_version.feature_schema:
|
|
772
772
|
print("\nSchema:")
|
|
773
|
-
|
|
773
|
+
session = Session.get(catalog=catalog)
|
|
774
|
+
dc = DataChain.from_dataset(name=name, version=version, session=session)
|
|
774
775
|
dc.print_schema()
|
|
775
776
|
|
|
776
777
|
|
|
@@ -15,7 +15,6 @@ from uuid import uuid4
|
|
|
15
15
|
from sqlalchemy import (
|
|
16
16
|
JSON,
|
|
17
17
|
BigInteger,
|
|
18
|
-
Boolean,
|
|
19
18
|
Column,
|
|
20
19
|
DateTime,
|
|
21
20
|
ForeignKey,
|
|
@@ -228,7 +227,7 @@ class AbstractMetastore(ABC, Serializable):
|
|
|
228
227
|
self,
|
|
229
228
|
dataset: DatasetRecord,
|
|
230
229
|
version: int,
|
|
231
|
-
status: int
|
|
230
|
+
status: int,
|
|
232
231
|
sources: str = "",
|
|
233
232
|
feature_schema: Optional[dict] = None,
|
|
234
233
|
query_script: str = "",
|
|
@@ -243,7 +242,6 @@ class AbstractMetastore(ABC, Serializable):
|
|
|
243
242
|
size: Optional[int] = None,
|
|
244
243
|
preview: Optional[list[dict]] = None,
|
|
245
244
|
job_id: Optional[str] = None,
|
|
246
|
-
is_job_result: bool = False,
|
|
247
245
|
) -> DatasetRecord:
|
|
248
246
|
"""Creates new dataset version."""
|
|
249
247
|
|
|
@@ -449,7 +447,6 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
449
447
|
Column("name", Text, nullable=False),
|
|
450
448
|
Column("description", Text),
|
|
451
449
|
Column("labels", JSON, nullable=True),
|
|
452
|
-
Column("shadow", Boolean, nullable=False),
|
|
453
450
|
Column("status", Integer, nullable=False),
|
|
454
451
|
Column("feature_schema", JSON, nullable=True),
|
|
455
452
|
Column("created_at", DateTime(timezone=True)),
|
|
@@ -482,8 +479,11 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
482
479
|
nullable=False,
|
|
483
480
|
),
|
|
484
481
|
Column("version", Integer, nullable=False),
|
|
485
|
-
|
|
486
|
-
|
|
482
|
+
Column(
|
|
483
|
+
"status",
|
|
484
|
+
Integer,
|
|
485
|
+
nullable=False,
|
|
486
|
+
),
|
|
487
487
|
Column("feature_schema", JSON, nullable=True),
|
|
488
488
|
Column("created_at", DateTime(timezone=True)),
|
|
489
489
|
Column("finished_at", DateTime(timezone=True)),
|
|
@@ -497,7 +497,6 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
497
497
|
Column("query_script", Text, nullable=False, default=""),
|
|
498
498
|
Column("schema", JSON, nullable=True),
|
|
499
499
|
Column("job_id", Text, nullable=True),
|
|
500
|
-
Column("is_job_result", Boolean, nullable=False, default=False),
|
|
501
500
|
UniqueConstraint("dataset_id", "version"),
|
|
502
501
|
]
|
|
503
502
|
|
|
@@ -971,7 +970,6 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
971
970
|
# TODO abstract this method and add registered = True based on kwargs
|
|
972
971
|
query = self._datasets_insert().values(
|
|
973
972
|
name=name,
|
|
974
|
-
shadow=False,
|
|
975
973
|
status=status,
|
|
976
974
|
feature_schema=json.dumps(feature_schema or {}),
|
|
977
975
|
created_at=datetime.now(timezone.utc),
|
|
@@ -994,7 +992,7 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
994
992
|
self,
|
|
995
993
|
dataset: DatasetRecord,
|
|
996
994
|
version: int,
|
|
997
|
-
status: int
|
|
995
|
+
status: int,
|
|
998
996
|
sources: str = "",
|
|
999
997
|
feature_schema: Optional[dict] = None,
|
|
1000
998
|
query_script: str = "",
|
|
@@ -1009,7 +1007,6 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
1009
1007
|
size: Optional[int] = None,
|
|
1010
1008
|
preview: Optional[list[dict]] = None,
|
|
1011
1009
|
job_id: Optional[str] = None,
|
|
1012
|
-
is_job_result: bool = False,
|
|
1013
1010
|
conn=None,
|
|
1014
1011
|
) -> DatasetRecord:
|
|
1015
1012
|
"""Creates new dataset version."""
|
|
@@ -1021,7 +1018,7 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
1021
1018
|
query = self._datasets_versions_insert().values(
|
|
1022
1019
|
dataset_id=dataset.id,
|
|
1023
1020
|
version=version,
|
|
1024
|
-
status=status,
|
|
1021
|
+
status=status,
|
|
1025
1022
|
feature_schema=json.dumps(feature_schema or {}),
|
|
1026
1023
|
created_at=created_at or datetime.now(timezone.utc),
|
|
1027
1024
|
finished_at=finished_at,
|
|
@@ -1035,7 +1032,6 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
1035
1032
|
size=size,
|
|
1036
1033
|
preview=json.dumps(preview or []),
|
|
1037
1034
|
job_id=job_id or os.getenv("DATACHAIN_JOB_ID"),
|
|
1038
|
-
is_job_result=is_job_result,
|
|
1039
1035
|
)
|
|
1040
1036
|
if ignore_if_exists and hasattr(query, "on_conflict_do_nothing"):
|
|
1041
1037
|
# SQLite and PostgreSQL both support 'on_conflict_do_nothing',
|
|
@@ -919,9 +919,7 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
919
919
|
def is_temp_table_name(self, name: str) -> bool:
|
|
920
920
|
"""Returns if the given table name refers to a temporary
|
|
921
921
|
or no longer needed table."""
|
|
922
|
-
return name.startswith(
|
|
923
|
-
(self.TMP_TABLE_NAME_PREFIX, self.UDF_TABLE_NAME_PREFIX, "ds_shadow_")
|
|
924
|
-
) or name.endswith("_shadow")
|
|
922
|
+
return name.startswith((self.TMP_TABLE_NAME_PREFIX, self.UDF_TABLE_NAME_PREFIX))
|
|
925
923
|
|
|
926
924
|
def get_temp_table_names(self) -> list[str]:
|
|
927
925
|
return [
|
|
@@ -179,7 +179,6 @@ class DatasetVersion:
|
|
|
179
179
|
sources: str = ""
|
|
180
180
|
query_script: str = ""
|
|
181
181
|
job_id: Optional[str] = None
|
|
182
|
-
is_job_result: bool = False
|
|
183
182
|
|
|
184
183
|
@classmethod
|
|
185
184
|
def parse( # noqa: PLR0913
|
|
@@ -201,7 +200,6 @@ class DatasetVersion:
|
|
|
201
200
|
sources: str = "",
|
|
202
201
|
query_script: str = "",
|
|
203
202
|
job_id: Optional[str] = None,
|
|
204
|
-
is_job_result: bool = False,
|
|
205
203
|
):
|
|
206
204
|
return cls(
|
|
207
205
|
id,
|
|
@@ -221,7 +219,6 @@ class DatasetVersion:
|
|
|
221
219
|
sources,
|
|
222
220
|
query_script,
|
|
223
221
|
job_id,
|
|
224
|
-
is_job_result,
|
|
225
222
|
)
|
|
226
223
|
|
|
227
224
|
def __eq__(self, other):
|
|
@@ -270,7 +267,6 @@ class DatasetRecord:
|
|
|
270
267
|
name: str
|
|
271
268
|
description: Optional[str]
|
|
272
269
|
labels: list[str]
|
|
273
|
-
shadow: bool
|
|
274
270
|
schema: dict[str, Union[SQLType, type[SQLType]]]
|
|
275
271
|
feature_schema: dict
|
|
276
272
|
versions: list[DatasetVersion]
|
|
@@ -299,7 +295,6 @@ class DatasetRecord:
|
|
|
299
295
|
name: str,
|
|
300
296
|
description: Optional[str],
|
|
301
297
|
labels: str,
|
|
302
|
-
shadow: int,
|
|
303
298
|
status: int,
|
|
304
299
|
feature_schema: Optional[str],
|
|
305
300
|
created_at: datetime,
|
|
@@ -327,7 +322,6 @@ class DatasetRecord:
|
|
|
327
322
|
version_query_script: Optional[str],
|
|
328
323
|
version_schema: str,
|
|
329
324
|
version_job_id: Optional[str] = None,
|
|
330
|
-
version_is_job_result: bool = False,
|
|
331
325
|
) -> "DatasetRecord":
|
|
332
326
|
labels_lst: list[str] = json.loads(labels) if labels else []
|
|
333
327
|
schema_dct: dict[str, Any] = json.loads(schema) if schema else {}
|
|
@@ -353,7 +347,6 @@ class DatasetRecord:
|
|
|
353
347
|
version_sources, # type: ignore[arg-type]
|
|
354
348
|
version_query_script, # type: ignore[arg-type]
|
|
355
349
|
version_job_id,
|
|
356
|
-
version_is_job_result,
|
|
357
350
|
)
|
|
358
351
|
|
|
359
352
|
return cls(
|
|
@@ -361,7 +354,6 @@ class DatasetRecord:
|
|
|
361
354
|
name,
|
|
362
355
|
description,
|
|
363
356
|
labels_lst,
|
|
364
|
-
bool(shadow),
|
|
365
357
|
cls.parse_schema(schema_dct), # type: ignore[arg-type]
|
|
366
358
|
json.loads(feature_schema) if feature_schema else {},
|
|
367
359
|
[dataset_version],
|