atdata 0.2.3b1__tar.gz → 0.3.0b1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (341) hide show
  1. {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/issues.db +0 -0
  2. {atdata-0.2.3b1 → atdata-0.3.0b1}/.github/workflows/uv-test.yml +58 -3
  3. {atdata-0.2.3b1 → atdata-0.3.0b1}/.gitignore +3 -0
  4. {atdata-0.2.3b1 → atdata-0.3.0b1}/.vscode/settings.json +6 -0
  5. {atdata-0.2.3b1 → atdata-0.3.0b1}/CHANGELOG.md +105 -0
  6. {atdata-0.2.3b1 → atdata-0.3.0b1}/CLAUDE.md +4 -2
  7. {atdata-0.2.3b1 → atdata-0.3.0b1}/PKG-INFO +4 -1
  8. atdata-0.3.0b1/benchmarks/bench_atmosphere.py +220 -0
  9. atdata-0.3.0b1/benchmarks/bench_dataset_io.py +293 -0
  10. atdata-0.3.0b1/benchmarks/bench_index_providers.py +215 -0
  11. atdata-0.3.0b1/benchmarks/bench_query.py +278 -0
  12. atdata-0.3.0b1/benchmarks/conftest.py +345 -0
  13. atdata-0.3.0b1/benchmarks/render_report.py +462 -0
  14. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/AbstractDataStore.html +28 -192
  15. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/AbstractIndex.html +28 -192
  16. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/AtUri.html +28 -192
  17. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/AtmosphereClient.html +28 -192
  18. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/AtmosphereIndex.html +28 -192
  19. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/AtmosphereIndexEntry.html +28 -192
  20. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/BlobSource.html +28 -192
  21. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/DataSource.html +28 -192
  22. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/Dataset.html +82 -228
  23. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/DatasetDict.html +28 -192
  24. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/DatasetLoader.html +28 -192
  25. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/DatasetPublisher.html +28 -192
  26. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/DictSample.html +28 -192
  27. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/IndexEntry.html +28 -192
  28. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/Lens.html +28 -192
  29. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/LensLoader.html +28 -192
  30. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/LensPublisher.html +28 -192
  31. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/PDSBlobStore.html +28 -192
  32. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/Packable-protocol.html +28 -192
  33. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/PackableSample.html +28 -192
  34. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/S3Source.html +28 -192
  35. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/SampleBatch.html +28 -192
  36. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/SchemaLoader.html +28 -192
  37. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/SchemaPublisher.html +28 -192
  38. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/URLSource.html +28 -192
  39. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/index.html +28 -192
  40. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/load_dataset.html +28 -192
  41. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/local.Index.html +28 -192
  42. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/local.LocalDatasetEntry.html +28 -192
  43. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/local.S3DataStore.html +28 -192
  44. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/packable.html +36 -195
  45. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/api/promote_to_atmosphere.html +28 -192
  46. atdata-0.3.0b1/docs/benchmarks/index.html +1331 -0
  47. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/index.html +44 -208
  48. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/reference/architecture.html +47 -211
  49. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/reference/atmosphere.html +55 -219
  50. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/reference/datasets.html +46 -210
  51. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/reference/deployment.html +33 -197
  52. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/reference/lenses.html +44 -208
  53. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/reference/load-dataset.html +45 -209
  54. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/reference/local-storage.html +44 -208
  55. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/reference/packable-samples.html +46 -210
  56. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/reference/promotion.html +41 -205
  57. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/reference/protocols.html +45 -209
  58. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/reference/troubleshooting.html +33 -197
  59. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/reference/uri-spec.html +35 -199
  60. atdata-0.3.0b1/docs/robots.txt +1 -0
  61. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/search.json +12 -183
  62. atdata-0.3.0b1/docs/site_libs/bootstrap/bootstrap-62ce3d63edf8507b4d15f75c6b92352a.min.css +12 -0
  63. atdata-0.2.3b1/docs/site_libs/quarto-html/quarto-syntax-highlighting-9582434199d49cc9e91654cdeeb4866b.css → atdata-0.3.0b1/docs/site_libs/quarto-html/quarto-syntax-highlighting-b854dd4081d6110d4acfde180236d7b2.css +2 -2
  64. atdata-0.3.0b1/docs/sitemap.xml +199 -0
  65. atdata-0.3.0b1/docs/styles.css +50 -0
  66. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/tutorials/atmosphere.html +47 -211
  67. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/tutorials/local-workflow.html +41 -205
  68. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/tutorials/promotion.html +45 -209
  69. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/tutorials/quickstart.html +39 -203
  70. atdata-0.3.0b1/docs_src/.nojekyll +0 -0
  71. atdata-0.3.0b1/docs_src/_brand.yml +73 -0
  72. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/_quarto.yml +57 -13
  73. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/Dataset.qmd +35 -15
  74. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/packable.qmd +13 -5
  75. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/index.qmd +2 -2
  76. atdata-0.3.0b1/docs_src/styles.css +50 -0
  77. atdata-0.3.0b1/docs_src/theme-dark.scss +1 -0
  78. atdata-0.3.0b1/docs_src/theme-light.scss +15 -0
  79. atdata-0.3.0b1/justfile +49 -0
  80. atdata-0.3.0b1/prototyping/human-review-atmosphere.ipynb +66 -0
  81. atdata-0.3.0b1/prototyping/human-review-local.ipynb +674 -0
  82. {atdata-0.2.3b1 → atdata-0.3.0b1}/pyproject.toml +12 -1
  83. atdata-0.3.0b1/src/atdata/.gitignore +1 -0
  84. {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/__init__.py +30 -0
  85. atdata-0.3.0b1/src/atdata/_exceptions.py +168 -0
  86. atdata-0.3.0b1/src/atdata/_helpers.py +74 -0
  87. {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/_hf_api.py +63 -11
  88. atdata-0.3.0b1/src/atdata/_logging.py +70 -0
  89. {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/_protocols.py +19 -62
  90. {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/_schema_codec.py +5 -4
  91. {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/_type_utils.py +28 -2
  92. {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/atmosphere/__init__.py +19 -9
  93. {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/atmosphere/records.py +3 -2
  94. {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/atmosphere/schema.py +2 -2
  95. atdata-0.3.0b1/src/atdata/cli/__init__.py +208 -0
  96. atdata-0.3.0b1/src/atdata/cli/inspect.py +69 -0
  97. {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/cli/local.py +1 -1
  98. atdata-0.3.0b1/src/atdata/cli/preview.py +63 -0
  99. atdata-0.3.0b1/src/atdata/cli/schema.py +109 -0
  100. {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/dataset.py +428 -326
  101. {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/lens.py +9 -2
  102. atdata-0.3.0b1/src/atdata/local/__init__.py +71 -0
  103. atdata-0.3.0b1/src/atdata/local/_entry.py +157 -0
  104. atdata-0.3.0b1/src/atdata/local/_index.py +940 -0
  105. atdata-0.3.0b1/src/atdata/local/_repo_legacy.py +218 -0
  106. atdata-0.3.0b1/src/atdata/local/_s3.py +349 -0
  107. atdata-0.3.0b1/src/atdata/local/_schema.py +380 -0
  108. atdata-0.3.0b1/src/atdata/manifest/__init__.py +28 -0
  109. atdata-0.3.0b1/src/atdata/manifest/_aggregates.py +156 -0
  110. atdata-0.3.0b1/src/atdata/manifest/_builder.py +163 -0
  111. atdata-0.3.0b1/src/atdata/manifest/_fields.py +154 -0
  112. atdata-0.3.0b1/src/atdata/manifest/_manifest.py +146 -0
  113. atdata-0.3.0b1/src/atdata/manifest/_query.py +150 -0
  114. atdata-0.3.0b1/src/atdata/manifest/_writer.py +74 -0
  115. {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/promote.py +4 -4
  116. atdata-0.3.0b1/src/atdata/providers/__init__.py +25 -0
  117. atdata-0.3.0b1/src/atdata/providers/_base.py +140 -0
  118. atdata-0.3.0b1/src/atdata/providers/_factory.py +69 -0
  119. atdata-0.3.0b1/src/atdata/providers/_postgres.py +214 -0
  120. atdata-0.3.0b1/src/atdata/providers/_redis.py +171 -0
  121. atdata-0.3.0b1/src/atdata/providers/_sqlite.py +191 -0
  122. atdata-0.3.0b1/src/atdata/repository.py +323 -0
  123. atdata-0.3.0b1/src/atdata/testing.py +337 -0
  124. {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/EXPECTED_WARNINGS.md +2 -2
  125. atdata-0.3.0b1/tests/test_cli.py +794 -0
  126. {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_dataset.py +11 -10
  127. atdata-0.3.0b1/tests/test_dev_experience.py +423 -0
  128. {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_hf_api.py +7 -3
  129. atdata-0.3.0b1/tests/test_index_providers.py +477 -0
  130. {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_integration.py +1 -1
  131. {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_integration_atmosphere_live.py +3 -3
  132. {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_integration_cross_backend.py +12 -12
  133. {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_integration_dynamic_types.py +2 -2
  134. {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_integration_edge_cases.py +3 -3
  135. {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_integration_error_handling.py +25 -31
  136. {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_integration_lens.py +1 -1
  137. atdata-0.3.0b1/tests/test_integration_manifest.py +263 -0
  138. {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_integration_promotion.py +10 -10
  139. {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_lens.py +1 -1
  140. {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_local.py +9 -8
  141. atdata-0.3.0b1/tests/test_logging.py +60 -0
  142. atdata-0.3.0b1/tests/test_manifest.py +528 -0
  143. atdata-0.3.0b1/tests/test_partial_failure.py +152 -0
  144. atdata-0.3.0b1/tests/test_postgres_provider.py +411 -0
  145. {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_protocols.py +5 -5
  146. atdata-0.3.0b1/tests/test_query_coverage.py +215 -0
  147. atdata-0.3.0b1/tests/test_repository.py +377 -0
  148. atdata-0.3.0b1/tests/test_repository_coverage.py +265 -0
  149. atdata-0.3.0b1/tests/test_stub_manager.py +556 -0
  150. atdata-0.3.0b1/tests/test_testing.py +205 -0
  151. atdata-0.3.0b1/tests/test_type_utils.py +181 -0
  152. {atdata-0.2.3b1 → atdata-0.3.0b1}/uv.lock +114 -2
  153. atdata-0.2.3b1/docs/robots.txt +0 -1
  154. atdata-0.2.3b1/docs/site_libs/bootstrap/bootstrap-62bce24ca844314e7bb1a34dbdfe05cc.min.css +0 -12
  155. atdata-0.2.3b1/docs/site_libs/bootstrap/bootstrap-dark-7964ffd8887b0991fe8d71c6c8bc75d6.min.css +0 -12
  156. atdata-0.2.3b1/docs/site_libs/quarto-html/quarto-syntax-highlighting-dark-8dcd8563ea6803ab7cbb3d71ca5772e1.css +0 -210
  157. atdata-0.2.3b1/docs/sitemap.xml +0 -199
  158. atdata-0.2.3b1/justfile +0 -2
  159. atdata-0.2.3b1/prototyping/human-review-atmosphere.ipynb +0 -25
  160. atdata-0.2.3b1/prototyping/human-review-local.ipynb +0 -634
  161. atdata-0.2.3b1/src/atdata/_helpers.py +0 -60
  162. atdata-0.2.3b1/src/atdata/cli/__init__.py +0 -222
  163. atdata-0.2.3b1/src/atdata/local.py +0 -1720
  164. {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/c.md +0 -0
  165. {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/cpp.md +0 -0
  166. {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/csharp.md +0 -0
  167. {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/global.md +0 -0
  168. {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/go.md +0 -0
  169. {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/java.md +0 -0
  170. {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/javascript-react.md +0 -0
  171. {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/javascript.md +0 -0
  172. {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/kotlin.md +0 -0
  173. {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/odin.md +0 -0
  174. {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/php.md +0 -0
  175. {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/project.md +0 -0
  176. {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/python.md +0 -0
  177. {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/ruby.md +0 -0
  178. {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/rust.md +0 -0
  179. {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/scala.md +0 -0
  180. {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/swift.md +0 -0
  181. {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/typescript-react.md +0 -0
  182. {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/typescript.md +0 -0
  183. {atdata-0.2.3b1 → atdata-0.3.0b1}/.chainlink/rules/zig.md +0 -0
  184. {atdata-0.2.3b1 → atdata-0.3.0b1}/.claude/hooks/post-edit-check.py +0 -0
  185. {atdata-0.2.3b1 → atdata-0.3.0b1}/.claude/hooks/prompt-guard.py +0 -0
  186. {atdata-0.2.3b1 → atdata-0.3.0b1}/.claude/hooks/session-start.py +0 -0
  187. {atdata-0.2.3b1 → atdata-0.3.0b1}/.claude/settings.json +0 -0
  188. {atdata-0.2.3b1 → atdata-0.3.0b1}/.github/workflows/uv-publish-pypi.yml +0 -0
  189. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/roadmap/v0.2/03_human-review-assessment.md +0 -0
  190. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/roadmap/v0.3/01_codebase-review.md +0 -0
  191. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/roadmap/v0.3/02_synthesis-roadmap.md +0 -0
  192. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/roadmap/v0.3/architecture-doc.md +0 -0
  193. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/01_overview.md +0 -0
  194. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/02_lexicon_design.md +0 -0
  195. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/03_python_client.md +0 -0
  196. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/04_appview.md +0 -0
  197. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/05_codegen.md +0 -0
  198. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/README.md +0 -0
  199. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/atproto_integration.md +0 -0
  200. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/decisions/01_schema_representation_format.md +0 -0
  201. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/decisions/02_lens_code_storage.md +0 -0
  202. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/decisions/03_webdataset_storage.md +0 -0
  203. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/decisions/04_schema_evolution.md +0 -0
  204. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/decisions/05_lexicon_namespace.md +0 -0
  205. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/decisions/06_lexicon_validation.md +0 -0
  206. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/decisions/README.md +0 -0
  207. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/decisions/assessment.md +0 -0
  208. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/decisions/record_lexicon_assessment.md +0 -0
  209. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/decisions/sampleSchema_design_questions.md +0 -0
  210. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/examples/code/ndarray_roundtrip.py +0 -0
  211. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/examples/code/validate_ndarray_shim.py +0 -0
  212. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/examples/dataset_blob_storage.json +0 -0
  213. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/examples/dataset_external_storage.json +0 -0
  214. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/examples/lens_example.json +0 -0
  215. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/examples/sampleSchema_example.json +0 -0
  216. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/lexicons/README.md +0 -0
  217. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/lexicons/README_ARRAY_FORMATS.md +0 -0
  218. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/lexicons/README_SCHEMA_TYPES.md +0 -0
  219. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/lexicons/ac.foundation.dataset.arrayFormat.json +0 -0
  220. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/lexicons/ac.foundation.dataset.getLatestSchema.json +0 -0
  221. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/lexicons/ac.foundation.dataset.lens.json +0 -0
  222. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/lexicons/ac.foundation.dataset.record.json +0 -0
  223. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/lexicons/ac.foundation.dataset.sampleSchema.json +0 -0
  224. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/lexicons/ac.foundation.dataset.schemaType.json +0 -0
  225. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/lexicons/ac.foundation.dataset.storageBlobs.json +0 -0
  226. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/lexicons/ac.foundation.dataset.storageExternal.json +0 -0
  227. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/lexicons/ndarray_shim.json +0 -0
  228. {atdata-0.2.3b1 → atdata-0.3.0b1}/.planning/setup/ndarray_shim_spec.md +0 -0
  229. {atdata-0.2.3b1 → atdata-0.3.0b1}/.python-version +0 -0
  230. {atdata-0.2.3b1 → atdata-0.3.0b1}/.reference/atproto_lexicon_guide.md +0 -0
  231. {atdata-0.2.3b1 → atdata-0.3.0b1}/.reference/atproto_lexicon_spec.md +0 -0
  232. {atdata-0.2.3b1 → atdata-0.3.0b1}/.reference/huggingface-datasets/architecture.md +0 -0
  233. {atdata-0.2.3b1 → atdata-0.3.0b1}/.reference/huggingface-datasets/loading-guide.md +0 -0
  234. {atdata-0.2.3b1 → atdata-0.3.0b1}/.reference/huggingface-datasets/loading-methods.md +0 -0
  235. {atdata-0.2.3b1 → atdata-0.3.0b1}/.reference/huggingface-datasets/main-classes.md +0 -0
  236. {atdata-0.2.3b1 → atdata-0.3.0b1}/.reference/python_atproto_sdk.md +0 -0
  237. {atdata-0.2.3b1 → atdata-0.3.0b1}/.review/comprehensive-review.md +0 -0
  238. {atdata-0.2.3b1 → atdata-0.3.0b1}/.review/human-review.md +0 -0
  239. {atdata-0.2.3b1 → atdata-0.3.0b1}/LICENSE +0 -0
  240. {atdata-0.2.3b1 → atdata-0.3.0b1}/README.md +0 -0
  241. /atdata-0.2.3b1/docs/.nojekyll → /atdata-0.3.0b1/benchmarks/__init__.py +0 -0
  242. {atdata-0.2.3b1/docs_src → atdata-0.3.0b1/docs}/.nojekyll +0 -0
  243. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/assets/styles.css +0 -0
  244. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/site_libs/bootstrap/bootstrap-icons.css +0 -0
  245. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/site_libs/bootstrap/bootstrap-icons.woff +0 -0
  246. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/site_libs/bootstrap/bootstrap.min.js +0 -0
  247. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/site_libs/clipboard/clipboard.min.js +0 -0
  248. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/site_libs/quarto-html/anchor.min.js +0 -0
  249. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/site_libs/quarto-html/popper.min.js +0 -0
  250. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/site_libs/quarto-html/quarto.js +0 -0
  251. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/site_libs/quarto-html/tabsets/tabsets.js +0 -0
  252. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/site_libs/quarto-html/tippy.css +0 -0
  253. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/site_libs/quarto-html/tippy.umd.min.js +0 -0
  254. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/site_libs/quarto-nav/headroom.min.js +0 -0
  255. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/site_libs/quarto-nav/quarto-nav.js +0 -0
  256. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/site_libs/quarto-search/autocomplete.umd.js +0 -0
  257. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/site_libs/quarto-search/fuse.min.js +0 -0
  258. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs/site_libs/quarto-search/quarto-search.js +0 -0
  259. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/.backup/api-index-handwritten.qmd +0 -0
  260. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/.backup/atmosphere.md +0 -0
  261. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/.backup/datasets.md +0 -0
  262. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/.backup/index.md +0 -0
  263. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/.backup/lenses.md +0 -0
  264. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/.backup/load-dataset.md +0 -0
  265. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/.backup/local-storage.md +0 -0
  266. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/.backup/packable-samples.md +0 -0
  267. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/.backup/promotion.md +0 -0
  268. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/.backup/protocols.md +0 -0
  269. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/.gitignore +0 -0
  270. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/AbstractDataStore.qmd +0 -0
  271. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/AbstractIndex.qmd +0 -0
  272. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/AtUri.qmd +0 -0
  273. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/AtmosphereClient.qmd +0 -0
  274. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/AtmosphereIndex.qmd +0 -0
  275. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/AtmosphereIndexEntry.qmd +0 -0
  276. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/BlobSource.qmd +0 -0
  277. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/DataSource.qmd +0 -0
  278. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/DatasetDict.qmd +0 -0
  279. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/DatasetLoader.qmd +0 -0
  280. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/DatasetPublisher.qmd +0 -0
  281. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/DictSample.qmd +0 -0
  282. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/IndexEntry.qmd +0 -0
  283. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/Lens.qmd +0 -0
  284. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/LensLoader.qmd +0 -0
  285. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/LensPublisher.qmd +0 -0
  286. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/PDSBlobStore.qmd +0 -0
  287. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/Packable-protocol.qmd +0 -0
  288. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/PackableSample.qmd +0 -0
  289. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/S3Source.qmd +0 -0
  290. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/SampleBatch.qmd +0 -0
  291. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/SchemaLoader.qmd +0 -0
  292. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/SchemaPublisher.qmd +0 -0
  293. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/URLSource.qmd +0 -0
  294. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/index.qmd +0 -0
  295. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/load_dataset.qmd +0 -0
  296. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/local.Index.qmd +0 -0
  297. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/local.LocalDatasetEntry.qmd +0 -0
  298. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/local.S3DataStore.qmd +0 -0
  299. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/api/promote_to_atmosphere.qmd +0 -0
  300. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/assets/styles.css +0 -0
  301. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/objects.json +0 -0
  302. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/reference/architecture.qmd +0 -0
  303. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/reference/atmosphere.qmd +0 -0
  304. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/reference/datasets.qmd +0 -0
  305. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/reference/deployment.qmd +0 -0
  306. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/reference/lenses.qmd +0 -0
  307. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/reference/load-dataset.qmd +0 -0
  308. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/reference/local-storage.qmd +0 -0
  309. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/reference/packable-samples.qmd +0 -0
  310. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/reference/promotion.qmd +0 -0
  311. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/reference/protocols.qmd +0 -0
  312. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/reference/troubleshooting.qmd +0 -0
  313. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/reference/uri-spec.qmd +0 -0
  314. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/tutorials/atmosphere.qmd +0 -0
  315. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/tutorials/local-workflow.qmd +0 -0
  316. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/tutorials/promotion.qmd +0 -0
  317. {atdata-0.2.3b1 → atdata-0.3.0b1}/docs_src/tutorials/quickstart.qmd +0 -0
  318. {atdata-0.2.3b1 → atdata-0.3.0b1}/examples/atmosphere_demo.py +0 -0
  319. {atdata-0.2.3b1 → atdata-0.3.0b1}/examples/local_workflow.py +0 -0
  320. {atdata-0.2.3b1 → atdata-0.3.0b1}/examples/promote_workflow.py +0 -0
  321. {atdata-0.2.3b1 → atdata-0.3.0b1}/issues.db +0 -0
  322. {atdata-0.2.3b1 → atdata-0.3.0b1}/prototyping/.credentials/.gitignore +0 -0
  323. {atdata-0.2.3b1 → atdata-0.3.0b1}/prototyping/data/.gitignore +0 -0
  324. {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/_cid.py +0 -0
  325. {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/_sources.py +0 -0
  326. {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/_stub_manager.py +0 -0
  327. {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/atmosphere/_types.py +0 -0
  328. {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/atmosphere/client.py +0 -0
  329. {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/atmosphere/lens.py +0 -0
  330. {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/atmosphere/store.py +0 -0
  331. {atdata-0.2.3b1 → atdata-0.3.0b1}/src/atdata/cli/diagnose.py +0 -0
  332. {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/conftest.py +0 -0
  333. {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/fixtures/test_samples.tar +0 -0
  334. {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_atmosphere.py +0 -0
  335. {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_cid.py +0 -0
  336. {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_helpers.py +0 -0
  337. {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_integration_atmosphere.py +0 -0
  338. {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_integration_e2e.py +0 -0
  339. {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_integration_local.py +0 -0
  340. {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_promote.py +0 -0
  341. {atdata-0.2.3b1 → atdata-0.3.0b1}/tests/test_sources.py +0 -0
@@ -4,13 +4,11 @@ on:
4
4
  push:
5
5
  branches:
6
6
  - main
7
- - release/*
8
7
  pull_request:
9
- branches:
10
- - main
11
8
 
12
9
  permissions:
13
10
  contents: read
11
+ actions: read
14
12
 
15
13
  concurrency:
16
14
  group: ${{ github.workflow }}-${{ github.ref }}
@@ -77,3 +75,60 @@ jobs:
77
75
  with:
78
76
  fail_ci_if_error: false
79
77
  token: ${{ secrets.CODECOV_TOKEN }}
78
+
79
+ benchmark:
80
+ name: Benchmarks
81
+ runs-on: ubuntu-latest
82
+ needs: [lint]
83
+ permissions:
84
+ contents: write
85
+ actions: write
86
+ steps:
87
+ - uses: actions/checkout@v5
88
+
89
+ - name: Set up Python
90
+ uses: actions/setup-python@v5
91
+ with:
92
+ python-version: "3.14"
93
+
94
+ - name: Install uv
95
+ uses: astral-sh/setup-uv@v6
96
+ with:
97
+ enable-cache: true
98
+
99
+ - name: Install just
100
+ uses: extractions/setup-just@v2
101
+
102
+ - name: Install the project
103
+ run: uv sync --locked --all-extras --dev
104
+
105
+ - name: Start Redis
106
+ uses: supercharge/redis-github-action@1.8.1
107
+ with:
108
+ redis-version: 7
109
+
110
+ - name: Run benchmarks
111
+ run: just bench
112
+
113
+ - name: Copy report to docs
114
+ run: |
115
+ mkdir -p docs/benchmarks
116
+ cp .bench/report.html docs/benchmarks/index.html
117
+
118
+ - name: Commit updated benchmark docs
119
+ if: github.event_name == 'push'
120
+ run: |
121
+ git config user.name "github-actions[bot]"
122
+ git config user.email "github-actions[bot]@users.noreply.github.com"
123
+ git add docs/benchmarks/index.html
124
+ git diff --cached --quiet || git commit -m "docs: update benchmark report [skip ci]"
125
+ git push
126
+
127
+ - name: Upload benchmark report
128
+ uses: actions/upload-artifact@v4
129
+ if: always()
130
+ with:
131
+ name: benchmark-report
132
+ path: |
133
+ .bench/report.html
134
+ .bench/*.json
@@ -52,6 +52,9 @@ MANIFEST
52
52
  pip-log.txt
53
53
  pip-delete-this-directory.txt
54
54
 
55
+ # Benchmark results
56
+ .bench/
57
+
55
58
  # Unit test / coverage reports
56
59
  htmlcov/
57
60
  .tox/
@@ -5,15 +5,19 @@
5
5
  "atproto",
6
6
  "creds",
7
7
  "dtype",
8
+ "fastparquet",
8
9
  "getattr",
9
10
  "hgetall",
10
11
  "hset",
12
+ "libipld",
11
13
  "maxcount",
12
14
  "minioadmin",
13
15
  "msgpack",
14
16
  "ndarray",
15
17
  "NSID",
16
18
  "ormsgpack",
19
+ "psycopg",
20
+ "pydantic",
17
21
  "pypi",
18
22
  "pyproject",
19
23
  "pytest",
@@ -24,6 +28,8 @@
24
28
  "schemamodels",
25
29
  "shardlists",
26
30
  "tariterators",
31
+ "tqdm",
32
+ "typer",
27
33
  "unpackb",
28
34
  "webdataset"
29
35
  ],
@@ -25,6 +25,111 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
25
25
  - **Comprehensive integration test suite**: 593 tests covering E2E flows, error handling, edge cases
26
26
 
27
27
  ### Changed
28
+ - Investigate upload-artifact not finding benchmark output (#512)
29
+ - Fix duplicate CI runs for push+PR overlap (#511)
30
+ - Scope contents:write permission to benchmark job only (#510)
31
+ - Add benchmark docs auto-commit to CI workflow (#509)
32
+ - Submit PR for v0.3.0b1 release to upstream/main (#508)
33
+ - Implement GH#39: Production hardening (observability, error handling, testing infra) (#504)
34
+ - Add pluggable structured logging via atdata.configure_logging (#507)
35
+ - Add PartialFailureError and shard-level error handling to Dataset.map (#506)
36
+ - Add atdata.testing module with mock clients, fixtures, and helpers (#505)
37
+ - Fix CI linting failures (20 ruff errors) (#503)
38
+ - Adversarial review: Post-benchmark suite assessment (#494)
39
+ - Remove redundant protocol docstrings that restate signatures (#500)
40
+ - Add missing unit tests for _type_utils.py (#499)
41
+ - Strengthen weak assertions (assert X is not None → value checks) (#498)
42
+ - Trim verbose exception constructor docstrings (#501)
43
+ - Analyze benchmark results for performance improvement opportunities (#502)
44
+ - Consolidate remaining duplicate sample types in test files (#497)
45
+ - Remove dead code: _repo_legacy.py legacy UUID field, unused imports (#496)
46
+ - Trim verbose docstrings in dataset.py and _index.py (#495)
47
+ - Benchmark report: replace mean/stddev with median/IQR, add per-sample columns (#492)
48
+ - Add parameter descriptions to benchmark suite with automatic report introspection (#491)
49
+ - HTML benchmark reports with CI integration (#487)
50
+ - Add bench + render step to CI on highest Python version only (#490)
51
+ - Update justfile bench commands to export JSON and render (#489)
52
+ - Create render_report.py script to convert JSON to HTML (#488)
53
+ - Increase test coverage for low-coverage modules (#480)
54
+ - Add providers/_postgres.py tests (mock-based) (#485)
55
+ - Add _stub_manager.py tests (#484)
56
+ - Add manifest/_query.py tests (#483)
57
+ - Add repository.py tests (#482)
58
+ - Add CLI tests (cli/__init__, diagnose, local, preview, schema) (#481)
59
+ - Check test coverage for CLI utils (#479)
60
+ - Add performance benchmark suite for atdata (#471)
61
+ - Verify benchmarks run (#478)
62
+ - Update pyproject.toml and justfile (#477)
63
+ - Create bench_atmosphere.py (#476)
64
+ - Create bench_query.py (#475)
65
+ - Create bench_dataset_io.py (#474)
66
+ - Create bench_index_providers.py (#473)
67
+ - Create benchmarks/conftest.py with shared fixtures (#472)
68
+ - Add per-shard manifest and query system (GH #35) (#462)
69
+ - Write unit and integration tests (#470)
70
+ - Integrate manifest into write path and Dataset.query() (#469)
71
+ - Implement QueryExecutor and SampleLocation (#468)
72
+ - Implement ManifestWriter (JSON + parquet) (#467)
73
+ - Implement ManifestBuilder (#465)
74
+ - Implement ShardManifest data model (#466)
75
+ - Implement aggregate collectors (categorical, numeric, set) (#464)
76
+ - Implement ManifestField annotation and resolve_manifest_fields() (#463)
77
+ - Migrate type annotations from PackableSample to Packable protocol (#461)
78
+ - Remove LocalIndex factory — consolidate to Index (#460)
79
+ - Split local.py monolith into local/ package (#452)
80
+ - Verify tests and lint pass (#459)
81
+ - Create __init__.py re-export facade and delete local.py (#458)
82
+ - Create _repo_legacy.py with deprecated Repo class (#457)
83
+ - Create _index.py with Index class and LocalIndex factory (#456)
84
+ - Create _s3.py with S3DataStore and S3 helpers (#455)
85
+ - Create _schema.py with schema models and helpers (#454)
86
+ - Create _entry.py with LocalDatasetEntry and constants (#453)
87
+ - Migrate CLI from argparse to typer (#449)
88
+ - Investigate test failures (#450)
89
+ - Fix ensure_stub receiving LocalSchemaRecord instead of dict (#451)
90
+ - GH#38: Developer experience improvements (#437)
91
+ - CLI: atdata preview command (#440)
92
+ - CLI: atdata schema show/diff commands (#439)
93
+ - CLI: atdata inspect command (#438)
94
+ - Dataset.__len__ and Dataset.select() for sample count and indexed access (#447)
95
+ - Dataset.to_pandas() and Dataset.to_dict() export methods (#446)
96
+ - Dataset.filter() and Dataset.map() streaming transforms (#445)
97
+ - Dataset.get(key) for keyed sample access (#442)
98
+ - Dataset.describe() summary statistics (#444)
99
+ - Dataset.schema property and column_names (#443)
100
+ - Dataset.head(n) and Dataset.__iter__ convenience methods (#441)
101
+ - Custom exception hierarchy with actionable error messages (#448)
102
+ - Adversarial review: Post-Repository consolidation assessment (#430)
103
+ - Remove backwards-compat dict-access methods from SchemaField and LocalSchemaRecord (#436)
104
+ - Add missing test coverage for Repository prefix routing edge cases and error paths (#435)
105
+ - Trim over-verbose docstrings in local.py module/class level (#434)
106
+ - Fix formally incorrect test assertions (batch_size, CID, brace notation) (#433)
107
+ - Consolidate duplicate test sample types across test files into conftest.py (#432)
108
+ - Consolidate duplicate entry-creation logic in Index (add_entry vs _insert_dataset_to_provider) (#431)
109
+ - Switch default Index provider from Redis to SQLite (#429)
110
+ - Consolidated Index with Repository system (#424)
111
+ - Phase 4: Deprecate AtmosphereIndex, update exports (#428)
112
+ - Phase 3: Default Index singleton and load_dataset integration (#427)
113
+ - Phase 2: Extend Index with repos/atmosphere params and prefix routing (#426)
114
+ - Phase 1: Create Repository dataclass and _AtmosphereBackend in repository.py (#425)
115
+ - Adversarial review: Post-IndexProvider pluggable storage assessment (#417)
116
+ - Convert TODO comments to tracked issues or remove (#422)
117
+ - Remove deprecated shard_list property references from docstrings (#421)
118
+ - Replace bare except in _stub_manager.py and cli/local.py with specific exceptions (#423)
119
+ - Tighten generic pytest.raises(Exception) to specific exception types in tests (#420)
120
+ - Replace assert statements with ValueError in production code (#419)
121
+ - Consolidate duplicated _parse_semver into _type_utils.py (#418)
122
+ - feat: Add SQLite/PostgreSQL index providers (GH #42) (#409)
123
+ - Update documentation and public API exports (#416)
124
+ - Add tests for all providers (#415)
125
+ - Refactor Index class to accept provider parameter (#414)
126
+ - Implement PostgresIndexProvider (#413)
127
+ - Implement SqliteIndexProvider (#412)
128
+ - Implement RedisIndexProvider (extract from Index class) (#411)
129
+ - Define IndexProvider protocol in _protocols.py (#410)
130
+ - Add just lint command to justfile (#408)
131
+ - Add SQLite/PostgreSQL providers for LocalIndex (in addition to Redis) (#407)
132
+ - Fix type hints for @atdata.packable decorator to show PackableSample methods (#406)
28
133
  - Review GitHub workflows and recommend CI improvements (#405)
29
134
  - Fix type signatures for Dataset.ordered and Dataset.shuffled (GH#28) (#404)
30
135
  - Investigate quartodoc Example section rendering - missing CSS classes on pre/code tags (#401)
@@ -46,8 +46,10 @@ uv build
46
46
  Development tasks are managed with [just](https://github.com/casey/just), a command runner. Available commands:
47
47
 
48
48
  ```bash
49
- # Build documentation (runs quartodoc + quarto)
50
- just docs
49
+ just test # Run all tests with coverage
50
+ just test tests/test_dataset.py # Run specific test file
51
+ just lint # Run ruff check + format check
52
+ just docs # Build documentation (runs quartodoc + quarto)
51
53
  ```
52
54
 
53
55
  The `justfile` is in the project root. Add new dev tasks there rather than creating shell scripts.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: atdata
3
- Version: 0.2.3b1
3
+ Version: 0.3.0b1
4
4
  Summary: A loose federation of distributed, typed datasets
5
5
  Author-email: Maxine Levesque <hello@maxine.science>, "Maxine @ Forecast Bio" <maxine@forecast.bio>
6
6
  License-File: LICENSE
@@ -20,9 +20,12 @@ Requires-Dist: requests>=2.32.5
20
20
  Requires-Dist: s3fs>=2025.12.0
21
21
  Requires-Dist: schemamodels>=0.9.1
22
22
  Requires-Dist: tqdm>=4.67.1
23
+ Requires-Dist: typer>=0.21.1
23
24
  Requires-Dist: webdataset>=1.0.2
24
25
  Provides-Extra: atmosphere
25
26
  Requires-Dist: atproto>=0.0.55; extra == 'atmosphere'
27
+ Provides-Extra: postgres
28
+ Requires-Dist: psycopg[binary]>=3.1; extra == 'postgres'
26
29
  Description-Content-Type: text/markdown
27
30
 
28
31
  # atdata
@@ -0,0 +1,220 @@
1
+ """Performance benchmarks for remote storage backends.
2
+
3
+ Covers S3DataStore (via moto mock) and Atmosphere/ATProto (network-gated).
4
+ S3 benchmarks use moto for reproducible local measurement.
5
+ Atmosphere benchmarks are marked ``network`` and skip unless a live PDS is available.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from pathlib import Path
11
+
12
+ import numpy as np
13
+ import pytest
14
+ from moto import mock_aws
15
+
16
+ import atdata
17
+
18
+ from .conftest import (
19
+ IMAGE_SHAPE,
20
+ BenchBasicSample,
21
+ BenchManifestSample,
22
+ BenchNumpySample,
23
+ generate_basic_samples,
24
+ generate_manifest_samples,
25
+ generate_numpy_samples,
26
+ write_tar,
27
+ )
28
+
29
+
30
+ # =============================================================================
31
+ # S3 Fixtures
32
+ # =============================================================================
33
+
34
+
35
+ @pytest.fixture
36
+ def mock_s3():
37
+ """Provide mock S3 environment using moto."""
38
+ with mock_aws():
39
+ import boto3
40
+
41
+ creds = {
42
+ "AWS_ACCESS_KEY_ID": "testing",
43
+ "AWS_SECRET_ACCESS_KEY": "testing",
44
+ }
45
+ s3_client = boto3.client(
46
+ "s3",
47
+ aws_access_key_id=creds["AWS_ACCESS_KEY_ID"],
48
+ aws_secret_access_key=creds["AWS_SECRET_ACCESS_KEY"],
49
+ region_name="us-east-1",
50
+ )
51
+ bucket_name = "bench-bucket"
52
+ s3_client.create_bucket(Bucket=bucket_name)
53
+ yield {
54
+ "credentials": creds,
55
+ "bucket": bucket_name,
56
+ }
57
+
58
+
59
+ def _make_s3_store(mock_s3_env):
60
+ from atdata.local._s3 import S3DataStore
61
+
62
+ return S3DataStore(
63
+ credentials=mock_s3_env["credentials"],
64
+ bucket=mock_s3_env["bucket"],
65
+ )
66
+
67
+
68
+ def _make_source_dataset(tmp_path, samples):
69
+ """Create a local dataset from samples for use as S3 write source."""
70
+ tar_path = write_tar(tmp_path / "source-000000.tar", samples)
71
+ sample_type = type(samples[0])
72
+ return atdata.Dataset[sample_type](url=str(tar_path))
73
+
74
+
75
+ # =============================================================================
76
+ # S3 Write Benchmarks
77
+ # =============================================================================
78
+
79
+
80
+ @pytest.mark.bench_s3
81
+ @pytest.mark.filterwarnings("ignore::pytest.PytestUnraisableExceptionWarning")
82
+ @pytest.mark.filterwarnings("ignore:coroutine.*was never awaited:RuntimeWarning")
83
+ class TestS3WriteBenchmarks:
84
+ """S3 shard writing benchmarks via moto mock."""
85
+
86
+ PARAM_LABELS = {"n": "samples per shard"}
87
+
88
+ @pytest.mark.parametrize("n", [100, 500], ids=["100", "500"])
89
+ def test_s3_write_shards(self, benchmark, tmp_path, mock_s3, n):
90
+ benchmark.extra_info["n_samples"] = n
91
+ samples = generate_basic_samples(n)
92
+ ds = _make_source_dataset(tmp_path, samples)
93
+ store = _make_s3_store(mock_s3)
94
+ counter = [0]
95
+
96
+ def _write():
97
+ idx = counter[0]
98
+ counter[0] += 1
99
+ store.write_shards(ds, prefix=f"bench/basic-{n}-{idx}")
100
+
101
+ benchmark(_write)
102
+
103
+ def test_s3_write_with_manifest(self, benchmark, tmp_path, mock_s3):
104
+ benchmark.extra_info["n_samples"] = 200
105
+ samples = generate_manifest_samples(200)
106
+ ds = _make_source_dataset(tmp_path, samples)
107
+ store = _make_s3_store(mock_s3)
108
+ counter = [0]
109
+
110
+ def _write():
111
+ idx = counter[0]
112
+ counter[0] += 1
113
+ store.write_shards(
114
+ ds, prefix=f"bench/manifest-{idx}", manifest=True,
115
+ cache_local=True,
116
+ )
117
+
118
+ benchmark(_write)
119
+
120
+ def test_s3_write_cache_local(self, benchmark, tmp_path, mock_s3):
121
+ benchmark.extra_info["n_samples"] = 200
122
+ samples = generate_basic_samples(200)
123
+ ds = _make_source_dataset(tmp_path, samples)
124
+ store = _make_s3_store(mock_s3)
125
+ counter = [0]
126
+
127
+ def _write():
128
+ idx = counter[0]
129
+ counter[0] += 1
130
+ store.write_shards(
131
+ ds, prefix=f"bench/cache-{idx}", cache_local=True
132
+ )
133
+
134
+ benchmark(_write)
135
+
136
+ def test_s3_write_direct(self, benchmark, tmp_path, mock_s3):
137
+ benchmark.extra_info["n_samples"] = 200
138
+ samples = generate_basic_samples(200)
139
+ ds = _make_source_dataset(tmp_path, samples)
140
+ store = _make_s3_store(mock_s3)
141
+ counter = [0]
142
+
143
+ def _write():
144
+ idx = counter[0]
145
+ counter[0] += 1
146
+ store.write_shards(
147
+ ds, prefix=f"bench/direct-{idx}", cache_local=False
148
+ )
149
+
150
+ benchmark(_write)
151
+
152
+ def test_s3_write_numpy(self, benchmark, tmp_path, mock_s3):
153
+ benchmark.extra_info["n_samples"] = 100
154
+ samples = generate_numpy_samples(100)
155
+ ds = _make_source_dataset(tmp_path, samples)
156
+ store = _make_s3_store(mock_s3)
157
+ counter = [0]
158
+
159
+ def _write():
160
+ idx = counter[0]
161
+ counter[0] += 1
162
+ store.write_shards(ds, prefix=f"bench/numpy-{idx}")
163
+
164
+ benchmark(_write)
165
+
166
+
167
+ # =============================================================================
168
+ # Atmosphere Benchmarks (network-gated)
169
+ # =============================================================================
170
+
171
+
172
+ @pytest.mark.network
173
+ class TestAtmosphereBenchmarks:
174
+ """Atmosphere/ATProto benchmarks. Require live PDS access.
175
+
176
+ Run with: just bench -m network
177
+ """
178
+
179
+ def test_atmosphere_publish_dataset(self, benchmark, tmp_path):
180
+ """End-to-end dataset publish to Atmosphere."""
181
+ import os
182
+
183
+ handle = os.environ.get("ATDATA_BENCH_ATP_HANDLE")
184
+ password = os.environ.get("ATDATA_BENCH_ATP_PASSWORD")
185
+ if not handle or not password:
186
+ pytest.skip("ATDATA_BENCH_ATP_HANDLE/PASSWORD not set")
187
+
188
+ from atdata.atmosphere.client import AtmosphereClient
189
+
190
+ client = AtmosphereClient(handle=handle, password=password)
191
+
192
+ samples = generate_basic_samples(10)
193
+ tar_path = write_tar(tmp_path / "atmo-000000.tar", samples)
194
+ ds = atdata.Dataset[BenchBasicSample](url=str(tar_path))
195
+
196
+ counter = [0]
197
+
198
+ def _publish():
199
+ idx = counter[0]
200
+ counter[0] += 1
201
+ from atdata.atmosphere.records import DatasetPublisher
202
+
203
+ publisher = DatasetPublisher(client)
204
+ publisher.publish(ds, name=f"bench-atmo-{idx}")
205
+
206
+ benchmark(_publish)
207
+
208
+ def test_atmosphere_resolve_dataset(self, benchmark):
209
+ """Resolve a dataset record from Atmosphere (read-only, anonymous)."""
210
+ import os
211
+
212
+ ref = os.environ.get("ATDATA_BENCH_ATP_DATASET_REF")
213
+ if not ref:
214
+ pytest.skip("ATDATA_BENCH_ATP_DATASET_REF not set")
215
+
216
+ from atdata.local._index import Index
217
+
218
+ index = Index()
219
+
220
+ benchmark(index.get_dataset, ref)