atdata 0.2.3b1__tar.gz → 0.3.0b1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/issues.db +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.github/workflows/uv-test.yml +58 -3
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.gitignore +3 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.vscode/settings.json +6 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/CHANGELOG.md +105 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/CLAUDE.md +4 -2
- {atdata-0.2.3b1 → atdata-0.3.0b1}/PKG-INFO +4 -1
- atdata-0.3.0b1/benchmarks/bench_atmosphere.py +220 -0
- atdata-0.3.0b1/benchmarks/bench_dataset_io.py +293 -0
- atdata-0.3.0b1/benchmarks/bench_index_providers.py +215 -0
- atdata-0.3.0b1/benchmarks/bench_query.py +278 -0
- atdata-0.3.0b1/benchmarks/conftest.py +345 -0
- atdata-0.3.0b1/benchmarks/render_report.py +462 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/AbstractDataStore.html +28 -192
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/AbstractIndex.html +28 -192
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/AtUri.html +28 -192
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/AtmosphereClient.html +28 -192
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/AtmosphereIndex.html +28 -192
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/AtmosphereIndexEntry.html +28 -192
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/BlobSource.html +28 -192
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/DataSource.html +28 -192
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/Dataset.html +82 -228
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/DatasetDict.html +28 -192
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/DatasetLoader.html +28 -192
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/DatasetPublisher.html +28 -192
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/DictSample.html +28 -192
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/IndexEntry.html +28 -192
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/Lens.html +28 -192
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/LensLoader.html +28 -192
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/LensPublisher.html +28 -192
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/PDSBlobStore.html +28 -192
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/Packable-protocol.html +28 -192
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/PackableSample.html +28 -192
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/S3Source.html +28 -192
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/SampleBatch.html +28 -192
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/SchemaLoader.html +28 -192
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/SchemaPublisher.html +28 -192
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/URLSource.html +28 -192
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/index.html +28 -192
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/load_dataset.html +28 -192
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/local.Index.html +28 -192
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/local.LocalDatasetEntry.html +28 -192
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/local.S3DataStore.html +28 -192
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/packable.html +36 -195
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/promote_to_atmosphere.html +28 -192
- atdata-0.3.0b1/docs/benchmarks/index.html +1331 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/index.html +44 -208
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/reference/architecture.html +47 -211
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/reference/atmosphere.html +55 -219
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/reference/datasets.html +46 -210
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/reference/deployment.html +33 -197
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/reference/lenses.html +44 -208
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/reference/load-dataset.html +45 -209
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/reference/local-storage.html +44 -208
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/reference/packable-samples.html +46 -210
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/reference/promotion.html +41 -205
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/reference/protocols.html +45 -209
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/reference/troubleshooting.html +33 -197
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/reference/uri-spec.html +35 -199
- atdata-0.3.0b1/docs/robots.txt +1 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/search.json +12 -183
- atdata-0.3.0b1/docs/site_libs/bootstrap/bootstrap-62ce3d63edf8507b4d15f75c6b92352a.min.css +12 -0
- atdata-0.2.3b1/docs/site_libs/quarto-html/quarto-syntax-highlighting-9582434199d49cc9e91654cdeeb4866b.css → atdata-0.3.0b1/docs/site_libs/quarto-html/quarto-syntax-highlighting-b854dd4081d6110d4acfde180236d7b2.css +2 -2
- atdata-0.3.0b1/docs/sitemap.xml +199 -0
- atdata-0.3.0b1/docs/styles.css +50 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/tutorials/atmosphere.html +47 -211
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/tutorials/local-workflow.html +41 -205
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/tutorials/promotion.html +45 -209
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/tutorials/quickstart.html +39 -203
- atdata-0.3.0b1/docs_src/.nojekyll +0 -0
- atdata-0.3.0b1/docs_src/_brand.yml +73 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/_quarto.yml +57 -13
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/Dataset.qmd +35 -15
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/packable.qmd +13 -5
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/index.qmd +2 -2
- atdata-0.3.0b1/docs_src/styles.css +50 -0
- atdata-0.3.0b1/docs_src/theme-dark.scss +1 -0
- atdata-0.3.0b1/docs_src/theme-light.scss +15 -0
- atdata-0.3.0b1/justfile +49 -0
- atdata-0.3.0b1/prototyping/human-review-atmosphere.ipynb +66 -0
- atdata-0.3.0b1/prototyping/human-review-local.ipynb +674 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/pyproject.toml +12 -1
- atdata-0.3.0b1/src/atdata/.gitignore +1 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/__init__.py +30 -0
- atdata-0.3.0b1/src/atdata/_exceptions.py +168 -0
- atdata-0.3.0b1/src/atdata/_helpers.py +74 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/_hf_api.py +63 -11
- atdata-0.3.0b1/src/atdata/_logging.py +70 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/_protocols.py +19 -62
- {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/_schema_codec.py +5 -4
- {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/_type_utils.py +28 -2
- {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/atmosphere/__init__.py +19 -9
- {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/atmosphere/records.py +3 -2
- {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/atmosphere/schema.py +2 -2
- atdata-0.3.0b1/src/atdata/cli/__init__.py +208 -0
- atdata-0.3.0b1/src/atdata/cli/inspect.py +69 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/cli/local.py +1 -1
- atdata-0.3.0b1/src/atdata/cli/preview.py +63 -0
- atdata-0.3.0b1/src/atdata/cli/schema.py +109 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/dataset.py +428 -326
- {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/lens.py +9 -2
- atdata-0.3.0b1/src/atdata/local/__init__.py +71 -0
- atdata-0.3.0b1/src/atdata/local/_entry.py +157 -0
- atdata-0.3.0b1/src/atdata/local/_index.py +940 -0
- atdata-0.3.0b1/src/atdata/local/_repo_legacy.py +218 -0
- atdata-0.3.0b1/src/atdata/local/_s3.py +349 -0
- atdata-0.3.0b1/src/atdata/local/_schema.py +380 -0
- atdata-0.3.0b1/src/atdata/manifest/__init__.py +28 -0
- atdata-0.3.0b1/src/atdata/manifest/_aggregates.py +156 -0
- atdata-0.3.0b1/src/atdata/manifest/_builder.py +163 -0
- atdata-0.3.0b1/src/atdata/manifest/_fields.py +154 -0
- atdata-0.3.0b1/src/atdata/manifest/_manifest.py +146 -0
- atdata-0.3.0b1/src/atdata/manifest/_query.py +150 -0
- atdata-0.3.0b1/src/atdata/manifest/_writer.py +74 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/promote.py +4 -4
- atdata-0.3.0b1/src/atdata/providers/__init__.py +25 -0
- atdata-0.3.0b1/src/atdata/providers/_base.py +140 -0
- atdata-0.3.0b1/src/atdata/providers/_factory.py +69 -0
- atdata-0.3.0b1/src/atdata/providers/_postgres.py +214 -0
- atdata-0.3.0b1/src/atdata/providers/_redis.py +171 -0
- atdata-0.3.0b1/src/atdata/providers/_sqlite.py +191 -0
- atdata-0.3.0b1/src/atdata/repository.py +323 -0
- atdata-0.3.0b1/src/atdata/testing.py +337 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/EXPECTED_WARNINGS.md +2 -2
- atdata-0.3.0b1/tests/test_cli.py +794 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_dataset.py +11 -10
- atdata-0.3.0b1/tests/test_dev_experience.py +423 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_hf_api.py +7 -3
- atdata-0.3.0b1/tests/test_index_providers.py +477 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_integration.py +1 -1
- {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_integration_atmosphere_live.py +3 -3
- {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_integration_cross_backend.py +12 -12
- {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_integration_dynamic_types.py +2 -2
- {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_integration_edge_cases.py +3 -3
- {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_integration_error_handling.py +25 -31
- {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_integration_lens.py +1 -1
- atdata-0.3.0b1/tests/test_integration_manifest.py +263 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_integration_promotion.py +10 -10
- {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_lens.py +1 -1
- {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_local.py +9 -8
- atdata-0.3.0b1/tests/test_logging.py +60 -0
- atdata-0.3.0b1/tests/test_manifest.py +528 -0
- atdata-0.3.0b1/tests/test_partial_failure.py +152 -0
- atdata-0.3.0b1/tests/test_postgres_provider.py +411 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_protocols.py +5 -5
- atdata-0.3.0b1/tests/test_query_coverage.py +215 -0
- atdata-0.3.0b1/tests/test_repository.py +377 -0
- atdata-0.3.0b1/tests/test_repository_coverage.py +265 -0
- atdata-0.3.0b1/tests/test_stub_manager.py +556 -0
- atdata-0.3.0b1/tests/test_testing.py +205 -0
- atdata-0.3.0b1/tests/test_type_utils.py +181 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/uv.lock +114 -2
- atdata-0.2.3b1/docs/robots.txt +0 -1
- atdata-0.2.3b1/docs/site_libs/bootstrap/bootstrap-62bce24ca844314e7bb1a34dbdfe05cc.min.css +0 -12
- atdata-0.2.3b1/docs/site_libs/bootstrap/bootstrap-dark-7964ffd8887b0991fe8d71c6c8bc75d6.min.css +0 -12
- atdata-0.2.3b1/docs/site_libs/quarto-html/quarto-syntax-highlighting-dark-8dcd8563ea6803ab7cbb3d71ca5772e1.css +0 -210
- atdata-0.2.3b1/docs/sitemap.xml +0 -199
- atdata-0.2.3b1/justfile +0 -2
- atdata-0.2.3b1/prototyping/human-review-atmosphere.ipynb +0 -25
- atdata-0.2.3b1/prototyping/human-review-local.ipynb +0 -634
- atdata-0.2.3b1/src/atdata/_helpers.py +0 -60
- atdata-0.2.3b1/src/atdata/cli/__init__.py +0 -222
- atdata-0.2.3b1/src/atdata/local.py +0 -1720
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/c.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/cpp.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/csharp.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/global.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/go.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/java.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/javascript-react.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/javascript.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/kotlin.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/odin.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/php.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/project.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/python.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/ruby.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/rust.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/scala.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/swift.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/typescript-react.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/typescript.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/zig.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.claude/hooks/post-edit-check.py +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.claude/hooks/prompt-guard.py +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.claude/hooks/session-start.py +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.claude/settings.json +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.github/workflows/uv-publish-pypi.yml +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/roadmap/v0.2/03_human-review-assessment.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/roadmap/v0.3/01_codebase-review.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/roadmap/v0.3/02_synthesis-roadmap.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/roadmap/v0.3/architecture-doc.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/01_overview.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/02_lexicon_design.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/03_python_client.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/04_appview.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/05_codegen.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/README.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/atproto_integration.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/decisions/01_schema_representation_format.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/decisions/02_lens_code_storage.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/decisions/03_webdataset_storage.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/decisions/04_schema_evolution.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/decisions/05_lexicon_namespace.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/decisions/06_lexicon_validation.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/decisions/README.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/decisions/assessment.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/decisions/record_lexicon_assessment.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/decisions/sampleSchema_design_questions.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/examples/code/ndarray_roundtrip.py +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/examples/code/validate_ndarray_shim.py +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/examples/dataset_blob_storage.json +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/examples/dataset_external_storage.json +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/examples/lens_example.json +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/examples/sampleSchema_example.json +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/lexicons/README.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/lexicons/README_ARRAY_FORMATS.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/lexicons/README_SCHEMA_TYPES.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/lexicons/ac.foundation.dataset.arrayFormat.json +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/lexicons/ac.foundation.dataset.getLatestSchema.json +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/lexicons/ac.foundation.dataset.lens.json +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/lexicons/ac.foundation.dataset.record.json +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/lexicons/ac.foundation.dataset.sampleSchema.json +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/lexicons/ac.foundation.dataset.schemaType.json +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/lexicons/ac.foundation.dataset.storageBlobs.json +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/lexicons/ac.foundation.dataset.storageExternal.json +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/lexicons/ndarray_shim.json +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/ndarray_shim_spec.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.python-version +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.reference/atproto_lexicon_guide.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.reference/atproto_lexicon_spec.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.reference/huggingface-datasets/architecture.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.reference/huggingface-datasets/loading-guide.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.reference/huggingface-datasets/loading-methods.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.reference/huggingface-datasets/main-classes.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.reference/python_atproto_sdk.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.review/comprehensive-review.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/.review/human-review.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/LICENSE +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/README.md +0 -0
- /atdata-0.2.3b1/docs/.nojekyll → /atdata-0.3.0b1/benchmarks/__init__.py +0 -0
- {atdata-0.2.3b1/docs_src → atdata-0.3.0b1/docs}/.nojekyll +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/assets/styles.css +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/site_libs/bootstrap/bootstrap-icons.css +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/site_libs/bootstrap/bootstrap-icons.woff +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/site_libs/bootstrap/bootstrap.min.js +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/site_libs/clipboard/clipboard.min.js +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/site_libs/quarto-html/anchor.min.js +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/site_libs/quarto-html/popper.min.js +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/site_libs/quarto-html/quarto.js +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/site_libs/quarto-html/tabsets/tabsets.js +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/site_libs/quarto-html/tippy.css +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/site_libs/quarto-html/tippy.umd.min.js +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/site_libs/quarto-nav/headroom.min.js +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/site_libs/quarto-nav/quarto-nav.js +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/site_libs/quarto-search/autocomplete.umd.js +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/site_libs/quarto-search/fuse.min.js +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/site_libs/quarto-search/quarto-search.js +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/.backup/api-index-handwritten.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/.backup/atmosphere.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/.backup/datasets.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/.backup/index.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/.backup/lenses.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/.backup/load-dataset.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/.backup/local-storage.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/.backup/packable-samples.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/.backup/promotion.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/.backup/protocols.md +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/.gitignore +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/AbstractDataStore.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/AbstractIndex.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/AtUri.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/AtmosphereClient.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/AtmosphereIndex.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/AtmosphereIndexEntry.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/BlobSource.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/DataSource.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/DatasetDict.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/DatasetLoader.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/DatasetPublisher.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/DictSample.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/IndexEntry.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/Lens.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/LensLoader.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/LensPublisher.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/PDSBlobStore.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/Packable-protocol.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/PackableSample.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/S3Source.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/SampleBatch.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/SchemaLoader.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/SchemaPublisher.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/URLSource.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/index.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/load_dataset.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/local.Index.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/local.LocalDatasetEntry.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/local.S3DataStore.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/promote_to_atmosphere.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/assets/styles.css +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/objects.json +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/reference/architecture.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/reference/atmosphere.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/reference/datasets.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/reference/deployment.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/reference/lenses.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/reference/load-dataset.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/reference/local-storage.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/reference/packable-samples.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/reference/promotion.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/reference/protocols.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/reference/troubleshooting.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/reference/uri-spec.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/tutorials/atmosphere.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/tutorials/local-workflow.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/tutorials/promotion.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/tutorials/quickstart.qmd +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/examples/atmosphere_demo.py +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/examples/local_workflow.py +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/examples/promote_workflow.py +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/issues.db +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/prototyping/.credentials/.gitignore +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/prototyping/data/.gitignore +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/_cid.py +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/_sources.py +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/_stub_manager.py +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/atmosphere/_types.py +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/atmosphere/client.py +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/atmosphere/lens.py +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/atmosphere/store.py +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/cli/diagnose.py +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/conftest.py +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/fixtures/test_samples.tar +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_atmosphere.py +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_cid.py +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_helpers.py +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_integration_atmosphere.py +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_integration_e2e.py +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_integration_local.py +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_promote.py +0 -0
- {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_sources.py +0 -0
|
Binary file
|
|
@@ -4,13 +4,11 @@ on:
|
|
|
4
4
|
push:
|
|
5
5
|
branches:
|
|
6
6
|
- main
|
|
7
|
-
- release/*
|
|
8
7
|
pull_request:
|
|
9
|
-
branches:
|
|
10
|
-
- main
|
|
11
8
|
|
|
12
9
|
permissions:
|
|
13
10
|
contents: read
|
|
11
|
+
actions: read
|
|
14
12
|
|
|
15
13
|
concurrency:
|
|
16
14
|
group: ${{ github.workflow }}-${{ github.ref }}
|
|
@@ -77,3 +75,60 @@ jobs:
|
|
|
77
75
|
with:
|
|
78
76
|
fail_ci_if_error: false
|
|
79
77
|
token: ${{ secrets.CODECOV_TOKEN }}
|
|
78
|
+
|
|
79
|
+
benchmark:
|
|
80
|
+
name: Benchmarks
|
|
81
|
+
runs-on: ubuntu-latest
|
|
82
|
+
needs: [lint]
|
|
83
|
+
permissions:
|
|
84
|
+
contents: write
|
|
85
|
+
actions: write
|
|
86
|
+
steps:
|
|
87
|
+
- uses: actions/checkout@v5
|
|
88
|
+
|
|
89
|
+
- name: Set up Python
|
|
90
|
+
uses: actions/setup-python@v5
|
|
91
|
+
with:
|
|
92
|
+
python-version: "3.14"
|
|
93
|
+
|
|
94
|
+
- name: Install uv
|
|
95
|
+
uses: astral-sh/setup-uv@v6
|
|
96
|
+
with:
|
|
97
|
+
enable-cache: true
|
|
98
|
+
|
|
99
|
+
- name: Install just
|
|
100
|
+
uses: extractions/setup-just@v2
|
|
101
|
+
|
|
102
|
+
- name: Install the project
|
|
103
|
+
run: uv sync --locked --all-extras --dev
|
|
104
|
+
|
|
105
|
+
- name: Start Redis
|
|
106
|
+
uses: supercharge/redis-github-action@1.8.1
|
|
107
|
+
with:
|
|
108
|
+
redis-version: 7
|
|
109
|
+
|
|
110
|
+
- name: Run benchmarks
|
|
111
|
+
run: just bench
|
|
112
|
+
|
|
113
|
+
- name: Copy report to docs
|
|
114
|
+
run: |
|
|
115
|
+
mkdir -p docs/benchmarks
|
|
116
|
+
cp .bench/report.html docs/benchmarks/index.html
|
|
117
|
+
|
|
118
|
+
- name: Commit updated benchmark docs
|
|
119
|
+
if: github.event_name == 'push'
|
|
120
|
+
run: |
|
|
121
|
+
git config user.name "github-actions[bot]"
|
|
122
|
+
git config user.email "github-actions[bot]@users.noreply.github.com"
|
|
123
|
+
git add docs/benchmarks/index.html
|
|
124
|
+
git diff --cached --quiet || git commit -m "docs: update benchmark report [skip ci]"
|
|
125
|
+
git push
|
|
126
|
+
|
|
127
|
+
- name: Upload benchmark report
|
|
128
|
+
uses: actions/upload-artifact@v4
|
|
129
|
+
if: always()
|
|
130
|
+
with:
|
|
131
|
+
name: benchmark-report
|
|
132
|
+
path: |
|
|
133
|
+
.bench/report.html
|
|
134
|
+
.bench/*.json
|
|
@@ -5,15 +5,19 @@
|
|
|
5
5
|
"atproto",
|
|
6
6
|
"creds",
|
|
7
7
|
"dtype",
|
|
8
|
+
"fastparquet",
|
|
8
9
|
"getattr",
|
|
9
10
|
"hgetall",
|
|
10
11
|
"hset",
|
|
12
|
+
"libipld",
|
|
11
13
|
"maxcount",
|
|
12
14
|
"minioadmin",
|
|
13
15
|
"msgpack",
|
|
14
16
|
"ndarray",
|
|
15
17
|
"NSID",
|
|
16
18
|
"ormsgpack",
|
|
19
|
+
"psycopg",
|
|
20
|
+
"pydantic",
|
|
17
21
|
"pypi",
|
|
18
22
|
"pyproject",
|
|
19
23
|
"pytest",
|
|
@@ -24,6 +28,8 @@
|
|
|
24
28
|
"schemamodels",
|
|
25
29
|
"shardlists",
|
|
26
30
|
"tariterators",
|
|
31
|
+
"tqdm",
|
|
32
|
+
"typer",
|
|
27
33
|
"unpackb",
|
|
28
34
|
"webdataset"
|
|
29
35
|
],
|
|
@@ -25,6 +25,111 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
|
|
25
25
|
- **Comprehensive integration test suite**: 593 tests covering E2E flows, error handling, edge cases
|
|
26
26
|
|
|
27
27
|
### Changed
|
|
28
|
+
- Investigate upload-artifact not finding benchmark output (#512)
|
|
29
|
+
- Fix duplicate CI runs for push+PR overlap (#511)
|
|
30
|
+
- Scope contents:write permission to benchmark job only (#510)
|
|
31
|
+
- Add benchmark docs auto-commit to CI workflow (#509)
|
|
32
|
+
- Submit PR for v0.3.0b1 release to upstream/main (#508)
|
|
33
|
+
- Implement GH#39: Production hardening (observability, error handling, testing infra) (#504)
|
|
34
|
+
- Add pluggable structured logging via atdata.configure_logging (#507)
|
|
35
|
+
- Add PartialFailureError and shard-level error handling to Dataset.map (#506)
|
|
36
|
+
- Add atdata.testing module with mock clients, fixtures, and helpers (#505)
|
|
37
|
+
- Fix CI linting failures (20 ruff errors) (#503)
|
|
38
|
+
- Adversarial review: Post-benchmark suite assessment (#494)
|
|
39
|
+
- Remove redundant protocol docstrings that restate signatures (#500)
|
|
40
|
+
- Add missing unit tests for _type_utils.py (#499)
|
|
41
|
+
- Strengthen weak assertions (assert X is not None → value checks) (#498)
|
|
42
|
+
- Trim verbose exception constructor docstrings (#501)
|
|
43
|
+
- Analyze benchmark results for performance improvement opportunities (#502)
|
|
44
|
+
- Consolidate remaining duplicate sample types in test files (#497)
|
|
45
|
+
- Remove dead code: _repo_legacy.py legacy UUID field, unused imports (#496)
|
|
46
|
+
- Trim verbose docstrings in dataset.py and _index.py (#495)
|
|
47
|
+
- Benchmark report: replace mean/stddev with median/IQR, add per-sample columns (#492)
|
|
48
|
+
- Add parameter descriptions to benchmark suite with automatic report introspection (#491)
|
|
49
|
+
- HTML benchmark reports with CI integration (#487)
|
|
50
|
+
- Add bench + render step to CI on highest Python version only (#490)
|
|
51
|
+
- Update justfile bench commands to export JSON and render (#489)
|
|
52
|
+
- Create render_report.py script to convert JSON to HTML (#488)
|
|
53
|
+
- Increase test coverage for low-coverage modules (#480)
|
|
54
|
+
- Add providers/_postgres.py tests (mock-based) (#485)
|
|
55
|
+
- Add _stub_manager.py tests (#484)
|
|
56
|
+
- Add manifest/_query.py tests (#483)
|
|
57
|
+
- Add repository.py tests (#482)
|
|
58
|
+
- Add CLI tests (cli/__init__, diagnose, local, preview, schema) (#481)
|
|
59
|
+
- Check test coverage for CLI utils (#479)
|
|
60
|
+
- Add performance benchmark suite for atdata (#471)
|
|
61
|
+
- Verify benchmarks run (#478)
|
|
62
|
+
- Update pyproject.toml and justfile (#477)
|
|
63
|
+
- Create bench_atmosphere.py (#476)
|
|
64
|
+
- Create bench_query.py (#475)
|
|
65
|
+
- Create bench_dataset_io.py (#474)
|
|
66
|
+
- Create bench_index_providers.py (#473)
|
|
67
|
+
- Create benchmarks/conftest.py with shared fixtures (#472)
|
|
68
|
+
- Add per-shard manifest and query system (GH #35) (#462)
|
|
69
|
+
- Write unit and integration tests (#470)
|
|
70
|
+
- Integrate manifest into write path and Dataset.query() (#469)
|
|
71
|
+
- Implement QueryExecutor and SampleLocation (#468)
|
|
72
|
+
- Implement ManifestWriter (JSON + parquet) (#467)
|
|
73
|
+
- Implement ManifestBuilder (#465)
|
|
74
|
+
- Implement ShardManifest data model (#466)
|
|
75
|
+
- Implement aggregate collectors (categorical, numeric, set) (#464)
|
|
76
|
+
- Implement ManifestField annotation and resolve_manifest_fields() (#463)
|
|
77
|
+
- Migrate type annotations from PackableSample to Packable protocol (#461)
|
|
78
|
+
- Remove LocalIndex factory — consolidate to Index (#460)
|
|
79
|
+
- Split local.py monolith into local/ package (#452)
|
|
80
|
+
- Verify tests and lint pass (#459)
|
|
81
|
+
- Create __init__.py re-export facade and delete local.py (#458)
|
|
82
|
+
- Create _repo_legacy.py with deprecated Repo class (#457)
|
|
83
|
+
- Create _index.py with Index class and LocalIndex factory (#456)
|
|
84
|
+
- Create _s3.py with S3DataStore and S3 helpers (#455)
|
|
85
|
+
- Create _schema.py with schema models and helpers (#454)
|
|
86
|
+
- Create _entry.py with LocalDatasetEntry and constants (#453)
|
|
87
|
+
- Migrate CLI from argparse to typer (#449)
|
|
88
|
+
- Investigate test failures (#450)
|
|
89
|
+
- Fix ensure_stub receiving LocalSchemaRecord instead of dict (#451)
|
|
90
|
+
- GH#38: Developer experience improvements (#437)
|
|
91
|
+
- CLI: atdata preview command (#440)
|
|
92
|
+
- CLI: atdata schema show/diff commands (#439)
|
|
93
|
+
- CLI: atdata inspect command (#438)
|
|
94
|
+
- Dataset.__len__ and Dataset.select() for sample count and indexed access (#447)
|
|
95
|
+
- Dataset.to_pandas() and Dataset.to_dict() export methods (#446)
|
|
96
|
+
- Dataset.filter() and Dataset.map() streaming transforms (#445)
|
|
97
|
+
- Dataset.get(key) for keyed sample access (#442)
|
|
98
|
+
- Dataset.describe() summary statistics (#444)
|
|
99
|
+
- Dataset.schema property and column_names (#443)
|
|
100
|
+
- Dataset.head(n) and Dataset.__iter__ convenience methods (#441)
|
|
101
|
+
- Custom exception hierarchy with actionable error messages (#448)
|
|
102
|
+
- Adversarial review: Post-Repository consolidation assessment (#430)
|
|
103
|
+
- Remove backwards-compat dict-access methods from SchemaField and LocalSchemaRecord (#436)
|
|
104
|
+
- Add missing test coverage for Repository prefix routing edge cases and error paths (#435)
|
|
105
|
+
- Trim over-verbose docstrings in local.py module/class level (#434)
|
|
106
|
+
- Fix formally incorrect test assertions (batch_size, CID, brace notation) (#433)
|
|
107
|
+
- Consolidate duplicate test sample types across test files into conftest.py (#432)
|
|
108
|
+
- Consolidate duplicate entry-creation logic in Index (add_entry vs _insert_dataset_to_provider) (#431)
|
|
109
|
+
- Switch default Index provider from Redis to SQLite (#429)
|
|
110
|
+
- Consolidated Index with Repository system (#424)
|
|
111
|
+
- Phase 4: Deprecate AtmosphereIndex, update exports (#428)
|
|
112
|
+
- Phase 3: Default Index singleton and load_dataset integration (#427)
|
|
113
|
+
- Phase 2: Extend Index with repos/atmosphere params and prefix routing (#426)
|
|
114
|
+
- Phase 1: Create Repository dataclass and _AtmosphereBackend in repository.py (#425)
|
|
115
|
+
- Adversarial review: Post-IndexProvider pluggable storage assessment (#417)
|
|
116
|
+
- Convert TODO comments to tracked issues or remove (#422)
|
|
117
|
+
- Remove deprecated shard_list property references from docstrings (#421)
|
|
118
|
+
- Replace bare except in _stub_manager.py and cli/local.py with specific exceptions (#423)
|
|
119
|
+
- Tighten generic pytest.raises(Exception) to specific exception types in tests (#420)
|
|
120
|
+
- Replace assert statements with ValueError in production code (#419)
|
|
121
|
+
- Consolidate duplicated _parse_semver into _type_utils.py (#418)
|
|
122
|
+
- feat: Add SQLite/PostgreSQL index providers (GH #42) (#409)
|
|
123
|
+
- Update documentation and public API exports (#416)
|
|
124
|
+
- Add tests for all providers (#415)
|
|
125
|
+
- Refactor Index class to accept provider parameter (#414)
|
|
126
|
+
- Implement PostgresIndexProvider (#413)
|
|
127
|
+
- Implement SqliteIndexProvider (#412)
|
|
128
|
+
- Implement RedisIndexProvider (extract from Index class) (#411)
|
|
129
|
+
- Define IndexProvider protocol in _protocols.py (#410)
|
|
130
|
+
- Add just lint command to justfile (#408)
|
|
131
|
+
- Add SQLite/PostgreSQL providers for LocalIndex (in addition to Redis) (#407)
|
|
132
|
+
- Fix type hints for @atdata.packable decorator to show PackableSample methods (#406)
|
|
28
133
|
- Review GitHub workflows and recommend CI improvements (#405)
|
|
29
134
|
- Fix type signatures for Dataset.ordered and Dataset.shuffled (GH#28) (#404)
|
|
30
135
|
- Investigate quartodoc Example section rendering - missing CSS classes on pre/code tags (#401)
|
|
@@ -46,8 +46,10 @@ uv build
|
|
|
46
46
|
Development tasks are managed with [just](https://github.com/casey/just), a command runner. Available commands:
|
|
47
47
|
|
|
48
48
|
```bash
|
|
49
|
-
#
|
|
50
|
-
just
|
|
49
|
+
just test # Run all tests with coverage
|
|
50
|
+
just test tests/test_dataset.py # Run specific test file
|
|
51
|
+
just lint # Run ruff check + format check
|
|
52
|
+
just docs # Build documentation (runs quartodoc + quarto)
|
|
51
53
|
```
|
|
52
54
|
|
|
53
55
|
The `justfile` is in the project root. Add new dev tasks there rather than creating shell scripts.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: atdata
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0b1
|
|
4
4
|
Summary: A loose federation of distributed, typed datasets
|
|
5
5
|
Author-email: Maxine Levesque <hello@maxine.science>, "Maxine @ Forecast Bio" <maxine@forecast.bio>
|
|
6
6
|
License-File: LICENSE
|
|
@@ -20,9 +20,12 @@ Requires-Dist: requests>=2.32.5
|
|
|
20
20
|
Requires-Dist: s3fs>=2025.12.0
|
|
21
21
|
Requires-Dist: schemamodels>=0.9.1
|
|
22
22
|
Requires-Dist: tqdm>=4.67.1
|
|
23
|
+
Requires-Dist: typer>=0.21.1
|
|
23
24
|
Requires-Dist: webdataset>=1.0.2
|
|
24
25
|
Provides-Extra: atmosphere
|
|
25
26
|
Requires-Dist: atproto>=0.0.55; extra == 'atmosphere'
|
|
27
|
+
Provides-Extra: postgres
|
|
28
|
+
Requires-Dist: psycopg[binary]>=3.1; extra == 'postgres'
|
|
26
29
|
Description-Content-Type: text/markdown
|
|
27
30
|
|
|
28
31
|
# atdata
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
"""Performance benchmarks for remote storage backends.
|
|
2
|
+
|
|
3
|
+
Covers S3DataStore (via moto mock) and Atmosphere/ATProto (network-gated).
|
|
4
|
+
S3 benchmarks use moto for reproducible local measurement.
|
|
5
|
+
Atmosphere benchmarks are marked ``network`` and skip unless a live PDS is available.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
import pytest
|
|
14
|
+
from moto import mock_aws
|
|
15
|
+
|
|
16
|
+
import atdata
|
|
17
|
+
|
|
18
|
+
from .conftest import (
|
|
19
|
+
IMAGE_SHAPE,
|
|
20
|
+
BenchBasicSample,
|
|
21
|
+
BenchManifestSample,
|
|
22
|
+
BenchNumpySample,
|
|
23
|
+
generate_basic_samples,
|
|
24
|
+
generate_manifest_samples,
|
|
25
|
+
generate_numpy_samples,
|
|
26
|
+
write_tar,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# =============================================================================
|
|
31
|
+
# S3 Fixtures
|
|
32
|
+
# =============================================================================
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@pytest.fixture
|
|
36
|
+
def mock_s3():
|
|
37
|
+
"""Provide mock S3 environment using moto."""
|
|
38
|
+
with mock_aws():
|
|
39
|
+
import boto3
|
|
40
|
+
|
|
41
|
+
creds = {
|
|
42
|
+
"AWS_ACCESS_KEY_ID": "testing",
|
|
43
|
+
"AWS_SECRET_ACCESS_KEY": "testing",
|
|
44
|
+
}
|
|
45
|
+
s3_client = boto3.client(
|
|
46
|
+
"s3",
|
|
47
|
+
aws_access_key_id=creds["AWS_ACCESS_KEY_ID"],
|
|
48
|
+
aws_secret_access_key=creds["AWS_SECRET_ACCESS_KEY"],
|
|
49
|
+
region_name="us-east-1",
|
|
50
|
+
)
|
|
51
|
+
bucket_name = "bench-bucket"
|
|
52
|
+
s3_client.create_bucket(Bucket=bucket_name)
|
|
53
|
+
yield {
|
|
54
|
+
"credentials": creds,
|
|
55
|
+
"bucket": bucket_name,
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _make_s3_store(mock_s3_env):
|
|
60
|
+
from atdata.local._s3 import S3DataStore
|
|
61
|
+
|
|
62
|
+
return S3DataStore(
|
|
63
|
+
credentials=mock_s3_env["credentials"],
|
|
64
|
+
bucket=mock_s3_env["bucket"],
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _make_source_dataset(tmp_path, samples):
|
|
69
|
+
"""Create a local dataset from samples for use as S3 write source."""
|
|
70
|
+
tar_path = write_tar(tmp_path / "source-000000.tar", samples)
|
|
71
|
+
sample_type = type(samples[0])
|
|
72
|
+
return atdata.Dataset[sample_type](url=str(tar_path))
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
# =============================================================================
|
|
76
|
+
# S3 Write Benchmarks
|
|
77
|
+
# =============================================================================
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@pytest.mark.bench_s3
|
|
81
|
+
@pytest.mark.filterwarnings("ignore::pytest.PytestUnraisableExceptionWarning")
|
|
82
|
+
@pytest.mark.filterwarnings("ignore:coroutine.*was never awaited:RuntimeWarning")
|
|
83
|
+
class TestS3WriteBenchmarks:
|
|
84
|
+
"""S3 shard writing benchmarks via moto mock."""
|
|
85
|
+
|
|
86
|
+
PARAM_LABELS = {"n": "samples per shard"}
|
|
87
|
+
|
|
88
|
+
@pytest.mark.parametrize("n", [100, 500], ids=["100", "500"])
|
|
89
|
+
def test_s3_write_shards(self, benchmark, tmp_path, mock_s3, n):
|
|
90
|
+
benchmark.extra_info["n_samples"] = n
|
|
91
|
+
samples = generate_basic_samples(n)
|
|
92
|
+
ds = _make_source_dataset(tmp_path, samples)
|
|
93
|
+
store = _make_s3_store(mock_s3)
|
|
94
|
+
counter = [0]
|
|
95
|
+
|
|
96
|
+
def _write():
|
|
97
|
+
idx = counter[0]
|
|
98
|
+
counter[0] += 1
|
|
99
|
+
store.write_shards(ds, prefix=f"bench/basic-{n}-{idx}")
|
|
100
|
+
|
|
101
|
+
benchmark(_write)
|
|
102
|
+
|
|
103
|
+
def test_s3_write_with_manifest(self, benchmark, tmp_path, mock_s3):
|
|
104
|
+
benchmark.extra_info["n_samples"] = 200
|
|
105
|
+
samples = generate_manifest_samples(200)
|
|
106
|
+
ds = _make_source_dataset(tmp_path, samples)
|
|
107
|
+
store = _make_s3_store(mock_s3)
|
|
108
|
+
counter = [0]
|
|
109
|
+
|
|
110
|
+
def _write():
|
|
111
|
+
idx = counter[0]
|
|
112
|
+
counter[0] += 1
|
|
113
|
+
store.write_shards(
|
|
114
|
+
ds, prefix=f"bench/manifest-{idx}", manifest=True,
|
|
115
|
+
cache_local=True,
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
benchmark(_write)
|
|
119
|
+
|
|
120
|
+
def test_s3_write_cache_local(self, benchmark, tmp_path, mock_s3):
|
|
121
|
+
benchmark.extra_info["n_samples"] = 200
|
|
122
|
+
samples = generate_basic_samples(200)
|
|
123
|
+
ds = _make_source_dataset(tmp_path, samples)
|
|
124
|
+
store = _make_s3_store(mock_s3)
|
|
125
|
+
counter = [0]
|
|
126
|
+
|
|
127
|
+
def _write():
|
|
128
|
+
idx = counter[0]
|
|
129
|
+
counter[0] += 1
|
|
130
|
+
store.write_shards(
|
|
131
|
+
ds, prefix=f"bench/cache-{idx}", cache_local=True
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
benchmark(_write)
|
|
135
|
+
|
|
136
|
+
def test_s3_write_direct(self, benchmark, tmp_path, mock_s3):
|
|
137
|
+
benchmark.extra_info["n_samples"] = 200
|
|
138
|
+
samples = generate_basic_samples(200)
|
|
139
|
+
ds = _make_source_dataset(tmp_path, samples)
|
|
140
|
+
store = _make_s3_store(mock_s3)
|
|
141
|
+
counter = [0]
|
|
142
|
+
|
|
143
|
+
def _write():
|
|
144
|
+
idx = counter[0]
|
|
145
|
+
counter[0] += 1
|
|
146
|
+
store.write_shards(
|
|
147
|
+
ds, prefix=f"bench/direct-{idx}", cache_local=False
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
benchmark(_write)
|
|
151
|
+
|
|
152
|
+
def test_s3_write_numpy(self, benchmark, tmp_path, mock_s3):
|
|
153
|
+
benchmark.extra_info["n_samples"] = 100
|
|
154
|
+
samples = generate_numpy_samples(100)
|
|
155
|
+
ds = _make_source_dataset(tmp_path, samples)
|
|
156
|
+
store = _make_s3_store(mock_s3)
|
|
157
|
+
counter = [0]
|
|
158
|
+
|
|
159
|
+
def _write():
|
|
160
|
+
idx = counter[0]
|
|
161
|
+
counter[0] += 1
|
|
162
|
+
store.write_shards(ds, prefix=f"bench/numpy-{idx}")
|
|
163
|
+
|
|
164
|
+
benchmark(_write)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
# =============================================================================
|
|
168
|
+
# Atmosphere Benchmarks (network-gated)
|
|
169
|
+
# =============================================================================
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
@pytest.mark.network
|
|
173
|
+
class TestAtmosphereBenchmarks:
|
|
174
|
+
"""Atmosphere/ATProto benchmarks. Require live PDS access.
|
|
175
|
+
|
|
176
|
+
Run with: just bench -m network
|
|
177
|
+
"""
|
|
178
|
+
|
|
179
|
+
def test_atmosphere_publish_dataset(self, benchmark, tmp_path):
|
|
180
|
+
"""End-to-end dataset publish to Atmosphere."""
|
|
181
|
+
import os
|
|
182
|
+
|
|
183
|
+
handle = os.environ.get("ATDATA_BENCH_ATP_HANDLE")
|
|
184
|
+
password = os.environ.get("ATDATA_BENCH_ATP_PASSWORD")
|
|
185
|
+
if not handle or not password:
|
|
186
|
+
pytest.skip("ATDATA_BENCH_ATP_HANDLE/PASSWORD not set")
|
|
187
|
+
|
|
188
|
+
from atdata.atmosphere.client import AtmosphereClient
|
|
189
|
+
|
|
190
|
+
client = AtmosphereClient(handle=handle, password=password)
|
|
191
|
+
|
|
192
|
+
samples = generate_basic_samples(10)
|
|
193
|
+
tar_path = write_tar(tmp_path / "atmo-000000.tar", samples)
|
|
194
|
+
ds = atdata.Dataset[BenchBasicSample](url=str(tar_path))
|
|
195
|
+
|
|
196
|
+
counter = [0]
|
|
197
|
+
|
|
198
|
+
def _publish():
|
|
199
|
+
idx = counter[0]
|
|
200
|
+
counter[0] += 1
|
|
201
|
+
from atdata.atmosphere.records import DatasetPublisher
|
|
202
|
+
|
|
203
|
+
publisher = DatasetPublisher(client)
|
|
204
|
+
publisher.publish(ds, name=f"bench-atmo-{idx}")
|
|
205
|
+
|
|
206
|
+
benchmark(_publish)
|
|
207
|
+
|
|
208
|
+
def test_atmosphere_resolve_dataset(self, benchmark):
|
|
209
|
+
"""Resolve a dataset record from Atmosphere (read-only, anonymous)."""
|
|
210
|
+
import os
|
|
211
|
+
|
|
212
|
+
ref = os.environ.get("ATDATA_BENCH_ATP_DATASET_REF")
|
|
213
|
+
if not ref:
|
|
214
|
+
pytest.skip("ATDATA_BENCH_ATP_DATASET_REF not set")
|
|
215
|
+
|
|
216
|
+
from atdata.local._index import Index
|
|
217
|
+
|
|
218
|
+
index = Index()
|
|
219
|
+
|
|
220
|
+
benchmark(index.get_dataset, ref)
|