dataknobs-data 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. dataknobs_data-0.1.0/.gitignore +64 -0
  2. dataknobs_data-0.1.0/PKG-INFO +533 -0
  3. dataknobs_data-0.1.0/README.md +479 -0
  4. dataknobs_data-0.1.0/docs/API_IMPROVEMENTS.md +166 -0
  5. dataknobs_data-0.1.0/docs/API_REFERENCE.md +519 -0
  6. dataknobs_data-0.1.0/docs/BATCH_PROCESSING_GUIDE.md +190 -0
  7. dataknobs_data-0.1.0/docs/BOOLEAN_LOGIC_OPERATORS.md +198 -0
  8. dataknobs_data-0.1.0/docs/DESIGN_PLAN.md +444 -0
  9. dataknobs_data-0.1.0/docs/FEATURE_SUMMARY.md +152 -0
  10. dataknobs_data-0.1.0/docs/IMPLEMENTATION_STATUS.md +326 -0
  11. dataknobs_data-0.1.0/docs/NEXT_STEPS.md +298 -0
  12. dataknobs_data-0.1.0/docs/PHASE6_PLAN.md +237 -0
  13. dataknobs_data-0.1.0/docs/PHASE7_PLAN.md +172 -0
  14. dataknobs_data-0.1.0/docs/PHASE8_DOCUMENTATION_PLAN.md +420 -0
  15. dataknobs_data-0.1.0/docs/PROGRESS_CHECKLIST.md +315 -0
  16. dataknobs_data-0.1.0/docs/RANGE_OPERATORS_IMPLEMENTATION.md +150 -0
  17. dataknobs_data-0.1.0/docs/REDESIGN_CHECKLIST.md +202 -0
  18. dataknobs_data-0.1.0/docs/REDESIGN_PLAN.md +754 -0
  19. dataknobs_data-0.1.0/docs/VECTOR_STORE_DESIGN.md +777 -0
  20. dataknobs_data-0.1.0/examples/complete_example.py +266 -0
  21. dataknobs_data-0.1.0/examples/s3_example.py +205 -0
  22. dataknobs_data-0.1.0/examples/sensor_dashboard/README.md +118 -0
  23. dataknobs_data-0.1.0/examples/sensor_dashboard/SUMMARY.md +84 -0
  24. dataknobs_data-0.1.0/examples/sensor_dashboard/__init__.py +12 -0
  25. dataknobs_data-0.1.0/examples/sensor_dashboard/data_generator.py +257 -0
  26. dataknobs_data-0.1.0/examples/sensor_dashboard/demo_advanced_queries.py +258 -0
  27. dataknobs_data-0.1.0/examples/sensor_dashboard/demo_streaming_improvements.py +254 -0
  28. dataknobs_data-0.1.0/examples/sensor_dashboard/models.py +96 -0
  29. dataknobs_data-0.1.0/examples/sensor_dashboard/sensor_dashboard.py +690 -0
  30. dataknobs_data-0.1.0/htmlcov/.gitignore +2 -0
  31. dataknobs_data-0.1.0/htmlcov/class_index.html +1211 -0
  32. dataknobs_data-0.1.0/htmlcov/coverage_html_cb_6fb7b396.js +733 -0
  33. dataknobs_data-0.1.0/htmlcov/favicon_32_cb_58284776.png +0 -0
  34. dataknobs_data-0.1.0/htmlcov/function_index.html +5331 -0
  35. dataknobs_data-0.1.0/htmlcov/index.html +342 -0
  36. dataknobs_data-0.1.0/htmlcov/keybd_closed_cb_ce680311.png +0 -0
  37. dataknobs_data-0.1.0/htmlcov/status.json +1 -0
  38. dataknobs_data-0.1.0/htmlcov/style_cb_6b508a39.css +377 -0
  39. dataknobs_data-0.1.0/htmlcov/z_0d7d1fbd877256a8_coercer_py.html +374 -0
  40. dataknobs_data-0.1.0/htmlcov/z_0d7d1fbd877256a8_constraints_py.html +477 -0
  41. dataknobs_data-0.1.0/htmlcov/z_0d7d1fbd877256a8_factory_py.html +307 -0
  42. dataknobs_data-0.1.0/htmlcov/z_0d7d1fbd877256a8_result_py.html +278 -0
  43. dataknobs_data-0.1.0/htmlcov/z_0d7d1fbd877256a8_schema_py.html +436 -0
  44. dataknobs_data-0.1.0/htmlcov/z_0d7d1fbd877256a8_type_coercion_py.html +413 -0
  45. dataknobs_data-0.1.0/htmlcov/z_199d8782a13727f6_migrator_py.html +412 -0
  46. dataknobs_data-0.1.0/htmlcov/z_199d8782a13727f6_schema_evolution_py.html +491 -0
  47. dataknobs_data-0.1.0/htmlcov/z_199d8782a13727f6_transformers_py.html +443 -0
  48. dataknobs_data-0.1.0/htmlcov/z_43b794130b4bf4a9_batch_ops_py.html +600 -0
  49. dataknobs_data-0.1.0/htmlcov/z_43b794130b4bf4a9_converter_py.html +513 -0
  50. dataknobs_data-0.1.0/htmlcov/z_43b794130b4bf4a9_metadata_py.html +349 -0
  51. dataknobs_data-0.1.0/htmlcov/z_43b794130b4bf4a9_type_mapper_py.html +624 -0
  52. dataknobs_data-0.1.0/htmlcov/z_6a167ce0c88e0ee6_base_py.html +311 -0
  53. dataknobs_data-0.1.0/htmlcov/z_6a167ce0c88e0ee6_elasticsearch_py.html +204 -0
  54. dataknobs_data-0.1.0/htmlcov/z_6a167ce0c88e0ee6_postgres_py.html +158 -0
  55. dataknobs_data-0.1.0/htmlcov/z_6a167ce0c88e0ee6_s3_py.html +163 -0
  56. dataknobs_data-0.1.0/htmlcov/z_8cad8284e1760d94_constraints_py.html +480 -0
  57. dataknobs_data-0.1.0/htmlcov/z_8cad8284e1760d94_schema_py.html +479 -0
  58. dataknobs_data-0.1.0/htmlcov/z_8cad8284e1760d94_type_coercion_py.html +413 -0
  59. dataknobs_data-0.1.0/htmlcov/z_be37674e1f62e768_database_py.html +725 -0
  60. dataknobs_data-0.1.0/htmlcov/z_be37674e1f62e768_exceptions_py.html +223 -0
  61. dataknobs_data-0.1.0/htmlcov/z_be37674e1f62e768_factory_py.html +321 -0
  62. dataknobs_data-0.1.0/htmlcov/z_be37674e1f62e768_fields_py.html +239 -0
  63. dataknobs_data-0.1.0/htmlcov/z_be37674e1f62e768_query_logic_py.html +465 -0
  64. dataknobs_data-0.1.0/htmlcov/z_be37674e1f62e768_query_py.html +574 -0
  65. dataknobs_data-0.1.0/htmlcov/z_be37674e1f62e768_records_py.html +593 -0
  66. dataknobs_data-0.1.0/htmlcov/z_be37674e1f62e768_streaming_py.html +599 -0
  67. dataknobs_data-0.1.0/htmlcov/z_c1234e08e7c7a3d1_factory_py.html +396 -0
  68. dataknobs_data-0.1.0/htmlcov/z_c1234e08e7c7a3d1_migration_py.html +288 -0
  69. dataknobs_data-0.1.0/htmlcov/z_c1234e08e7c7a3d1_migrator_py.html +492 -0
  70. dataknobs_data-0.1.0/htmlcov/z_c1234e08e7c7a3d1_operations_py.html +380 -0
  71. dataknobs_data-0.1.0/htmlcov/z_c1234e08e7c7a3d1_progress_py.html +389 -0
  72. dataknobs_data-0.1.0/htmlcov/z_c1234e08e7c7a3d1_schema_evolution_py.html +491 -0
  73. dataknobs_data-0.1.0/htmlcov/z_c1234e08e7c7a3d1_transformer_py.html +372 -0
  74. dataknobs_data-0.1.0/htmlcov/z_c1234e08e7c7a3d1_transformers_py.html +443 -0
  75. dataknobs_data-0.1.0/htmlcov/z_c348d435f8109258_coercer_py.html +374 -0
  76. dataknobs_data-0.1.0/htmlcov/z_c348d435f8109258_constraints_py.html +477 -0
  77. dataknobs_data-0.1.0/htmlcov/z_c348d435f8109258_factory_py.html +307 -0
  78. dataknobs_data-0.1.0/htmlcov/z_c348d435f8109258_result_py.html +278 -0
  79. dataknobs_data-0.1.0/htmlcov/z_c348d435f8109258_schema_py.html +436 -0
  80. dataknobs_data-0.1.0/htmlcov/z_c7ce396564e170b6_factory_py.html +396 -0
  81. dataknobs_data-0.1.0/htmlcov/z_c7ce396564e170b6_migration_py.html +288 -0
  82. dataknobs_data-0.1.0/htmlcov/z_c7ce396564e170b6_migrator_py.html +478 -0
  83. dataknobs_data-0.1.0/htmlcov/z_c7ce396564e170b6_operations_py.html +380 -0
  84. dataknobs_data-0.1.0/htmlcov/z_c7ce396564e170b6_progress_py.html +389 -0
  85. dataknobs_data-0.1.0/htmlcov/z_c7ce396564e170b6_transformer_py.html +372 -0
  86. dataknobs_data-0.1.0/htmlcov/z_cb00a6efc47dbd99_pool_manager_py.html +310 -0
  87. dataknobs_data-0.1.0/htmlcov/z_dd3695a71d2e06ed_elasticsearch_async_py.html +577 -0
  88. dataknobs_data-0.1.0/htmlcov/z_dd3695a71d2e06ed_elasticsearch_py.html +599 -0
  89. dataknobs_data-0.1.0/htmlcov/z_dd3695a71d2e06ed_file_py.html +1003 -0
  90. dataknobs_data-0.1.0/htmlcov/z_dd3695a71d2e06ed_memory_py.html +481 -0
  91. dataknobs_data-0.1.0/htmlcov/z_dd3695a71d2e06ed_postgres_async_py.html +682 -0
  92. dataknobs_data-0.1.0/htmlcov/z_dd3695a71d2e06ed_postgres_py.html +1119 -0
  93. dataknobs_data-0.1.0/htmlcov/z_dd3695a71d2e06ed_postgres_refactored_py.html +660 -0
  94. dataknobs_data-0.1.0/htmlcov/z_dd3695a71d2e06ed_s3_async_py.html +654 -0
  95. dataknobs_data-0.1.0/htmlcov/z_dd3695a71d2e06ed_s3_py.html +646 -0
  96. dataknobs_data-0.1.0/pyproject.toml +188 -0
  97. dataknobs_data-0.1.0/scripts/benchmark_performance.py +403 -0
  98. dataknobs_data-0.1.0/src/dataknobs_data/__init__.py +83 -0
  99. dataknobs_data-0.1.0/src/dataknobs_data/backends/__init__.py +72 -0
  100. dataknobs_data-0.1.0/src/dataknobs_data/backends/elasticsearch.py +501 -0
  101. dataknobs_data-0.1.0/src/dataknobs_data/backends/elasticsearch_async.py +476 -0
  102. dataknobs_data-0.1.0/src/dataknobs_data/backends/file.py +907 -0
  103. dataknobs_data-0.1.0/src/dataknobs_data/backends/memory.py +384 -0
  104. dataknobs_data-0.1.0/src/dataknobs_data/backends/postgres.py +1023 -0
  105. dataknobs_data-0.1.0/src/dataknobs_data/backends/s3.py +547 -0
  106. dataknobs_data-0.1.0/src/dataknobs_data/backends/s3_async.py +552 -0
  107. dataknobs_data-0.1.0/src/dataknobs_data/database.py +629 -0
  108. dataknobs_data-0.1.0/src/dataknobs_data/exceptions.py +126 -0
  109. dataknobs_data-0.1.0/src/dataknobs_data/factory.py +225 -0
  110. dataknobs_data-0.1.0/src/dataknobs_data/fields.py +142 -0
  111. dataknobs_data-0.1.0/src/dataknobs_data/migration/__init__.py +59 -0
  112. dataknobs_data-0.1.0/src/dataknobs_data/migration/factory.py +300 -0
  113. dataknobs_data-0.1.0/src/dataknobs_data/migration/migration.py +180 -0
  114. dataknobs_data-0.1.0/src/dataknobs_data/migration/migrator.py +386 -0
  115. dataknobs_data-0.1.0/src/dataknobs_data/migration/operations.py +280 -0
  116. dataknobs_data-0.1.0/src/dataknobs_data/migration/progress.py +275 -0
  117. dataknobs_data-0.1.0/src/dataknobs_data/migration/transformer.py +265 -0
  118. dataknobs_data-0.1.0/src/dataknobs_data/pandas/__init__.py +22 -0
  119. dataknobs_data-0.1.0/src/dataknobs_data/pandas/batch_ops.py +503 -0
  120. dataknobs_data-0.1.0/src/dataknobs_data/pandas/converter.py +416 -0
  121. dataknobs_data-0.1.0/src/dataknobs_data/pandas/metadata.py +251 -0
  122. dataknobs_data-0.1.0/src/dataknobs_data/pandas/type_mapper.py +517 -0
  123. dataknobs_data-0.1.0/src/dataknobs_data/pooling/__init__.py +9 -0
  124. dataknobs_data-0.1.0/src/dataknobs_data/pooling/base.py +212 -0
  125. dataknobs_data-0.1.0/src/dataknobs_data/pooling/elasticsearch.py +106 -0
  126. dataknobs_data-0.1.0/src/dataknobs_data/pooling/postgres.py +61 -0
  127. dataknobs_data-0.1.0/src/dataknobs_data/pooling/s3.py +65 -0
  128. dataknobs_data-0.1.0/src/dataknobs_data/query.py +477 -0
  129. dataknobs_data-0.1.0/src/dataknobs_data/query_logic.py +368 -0
  130. dataknobs_data-0.1.0/src/dataknobs_data/records.py +496 -0
  131. dataknobs_data-0.1.0/src/dataknobs_data/streaming.py +499 -0
  132. dataknobs_data-0.1.0/src/dataknobs_data/validation/__init__.py +53 -0
  133. dataknobs_data-0.1.0/src/dataknobs_data/validation/coercer.py +272 -0
  134. dataknobs_data-0.1.0/src/dataknobs_data/validation/constraints.py +370 -0
  135. dataknobs_data-0.1.0/src/dataknobs_data/validation/factory.py +207 -0
  136. dataknobs_data-0.1.0/src/dataknobs_data/validation/result.py +168 -0
  137. dataknobs_data-0.1.0/src/dataknobs_data/validation/schema.py +328 -0
  138. dataknobs_data-0.1.0/tests/conftest.py +51 -0
  139. dataknobs_data-0.1.0/tests/integration/README.md +333 -0
  140. dataknobs_data-0.1.0/tests/integration/conftest.py +262 -0
  141. dataknobs_data-0.1.0/tests/integration/test_elasticsearch_integration.py +612 -0
  142. dataknobs_data-0.1.0/tests/integration/test_postgres_integration.py +475 -0
  143. dataknobs_data-0.1.0/tests/integration/test_s3_backend.py +525 -0
  144. dataknobs_data-0.1.0/tests/test_async_elasticsearch_native.py +363 -0
  145. dataknobs_data-0.1.0/tests/test_async_generator_debug.py +105 -0
  146. dataknobs_data-0.1.0/tests/test_async_s3_native.py +503 -0
  147. dataknobs_data-0.1.0/tests/test_backend_streaming_consistency.py +141 -0
  148. dataknobs_data-0.1.0/tests/test_backends/test_elasticsearch.py +413 -0
  149. dataknobs_data-0.1.0/tests/test_backends/test_file.py +412 -0
  150. dataknobs_data-0.1.0/tests/test_backends/test_file_edge_cases.py +521 -0
  151. dataknobs_data-0.1.0/tests/test_backends/test_postgres.py +346 -0
  152. dataknobs_data-0.1.0/tests/test_boolean_logic.py +340 -0
  153. dataknobs_data-0.1.0/tests/test_config_integration.py +144 -0
  154. dataknobs_data-0.1.0/tests/test_connection_management.py +81 -0
  155. dataknobs_data-0.1.0/tests/test_cross_backend_integration.py +547 -0
  156. dataknobs_data-0.1.0/tests/test_exceptions.py +308 -0
  157. dataknobs_data-0.1.0/tests/test_factories_validation.py +222 -0
  158. dataknobs_data-0.1.0/tests/test_factory.py +229 -0
  159. dataknobs_data-0.1.0/tests/test_factory_extended.py +355 -0
  160. dataknobs_data-0.1.0/tests/test_fields.py +137 -0
  161. dataknobs_data-0.1.0/tests/test_generator_debug.py +42 -0
  162. dataknobs_data-0.1.0/tests/test_memory_backend.py +397 -0
  163. dataknobs_data-0.1.0/tests/test_migration.py +668 -0
  164. dataknobs_data-0.1.0/tests/test_migrator_extended.py +644 -0
  165. dataknobs_data-0.1.0/tests/test_nested_field_queries.py +236 -0
  166. dataknobs_data-0.1.0/tests/test_pandas_batch_ops.py +455 -0
  167. dataknobs_data-0.1.0/tests/test_pandas_integration.py +747 -0
  168. dataknobs_data-0.1.0/tests/test_pool_manager.py +335 -0
  169. dataknobs_data-0.1.0/tests/test_query.py +256 -0
  170. dataknobs_data-0.1.0/tests/test_query_enhanced.py +397 -0
  171. dataknobs_data-0.1.0/tests/test_range_operators.py +402 -0
  172. dataknobs_data-0.1.0/tests/test_range_operators_integration.py +531 -0
  173. dataknobs_data-0.1.0/tests/test_records.py +353 -0
  174. dataknobs_data-0.1.0/tests/test_s3_config_integration.py +149 -0
  175. dataknobs_data-0.1.0/tests/test_sensor_dashboard_advanced.py +551 -0
  176. dataknobs_data-0.1.0/tests/test_sensor_dashboard_example.py +626 -0
  177. dataknobs_data-0.1.0/tests/test_sensor_dashboard_streaming.py +355 -0
  178. dataknobs_data-0.1.0/tests/test_streaming.py +642 -0
  179. dataknobs_data-0.1.0/tests/test_streaming_mixins.py +308 -0
  180. dataknobs_data-0.1.0/tests/test_streaming_simple.py +248 -0
  181. dataknobs_data-0.1.0/tests/test_unified_batch.py +301 -0
  182. dataknobs_data-0.1.0/tests/test_validation.py +619 -0
@@ -0,0 +1,64 @@
1
+ .#*
2
+ *~
3
+ *#
4
+ .idea
5
+ .vscode
6
+ .pydevproject
7
+ venv/
8
+ .cache
9
+ **/.*env
10
+ *.pyc
11
+ **/__pycache__
12
+ .pytest_cache/
13
+ dist
14
+ .eggs/
15
+ *.egg-info
16
+ **/build
17
+ *.swp
18
+ *.orig
19
+ .project
20
+ .coverage*
21
+ _version.py.bld
22
+ .mypy_cache
23
+ **/build.log
24
+ .eggs
25
+ ignored
26
+ **/.ipynb_checkpoints
27
+ .s3_cache
28
+ .Trash-*
29
+ .DS_Store
30
+ **/_tmp
31
+ .data
32
+ *.so
33
+ .aws
34
+ VERSION
35
+ activate
36
+ .tox
37
+ .docker
38
+ .pypirc
39
+
40
+ # uv
41
+ .venv/
42
+ uv.lock
43
+ test-env/
44
+ .uv-cache/
45
+
46
+ # MkDocs documentation
47
+ site/
48
+
49
+ # Non-essential test coverage reports
50
+ coverage.xml
51
+ packages/data/coverage.xml
52
+
53
+ # Quality check artifacts
54
+ .quality-artifacts/*
55
+ !.quality-artifacts/quality-summary.json
56
+ !.quality-artifacts/environment.json
57
+ !.quality-artifacts/unit-test-results.xml
58
+ !.quality-artifacts/integration-test-results.xml
59
+ !.quality-artifacts/coverage.xml
60
+ !.quality-artifacts/coverage-unit.xml
61
+ !.quality-artifacts/coverage-integration.xml
62
+ !.quality-artifacts/lint-report.json
63
+ !.quality-artifacts/style-check.json
64
+ !.quality-artifacts/signature.sha256
@@ -0,0 +1,533 @@
1
+ Metadata-Version: 2.4
2
+ Name: dataknobs-data
3
+ Version: 0.1.0
4
+ Summary: Unified data abstraction layer for consistent database operations across multiple storage technologies
5
+ Project-URL: Homepage, https://github.com/dataknobs/dataknobs
6
+ Project-URL: Bug Tracker, https://github.com/dataknobs/dataknobs/issues
7
+ Project-URL: Documentation, https://dataknobs.readthedocs.io
8
+ Author-email: DataKnobs Team <team@dataknobs.com>
9
+ Keywords: abstraction,data,database,records,storage
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Requires-Python: >=3.10
17
+ Requires-Dist: aiofiles>=23.0.0
18
+ Requires-Dist: boto3>=1.38.27
19
+ Requires-Dist: dataknobs-config>=0.1.0
20
+ Requires-Dist: dataknobs-utils>=0.1.0
21
+ Requires-Dist: moto>=5.1.10
22
+ Requires-Dist: pandas>=2.0.0
23
+ Requires-Dist: pydantic>=2.0.0
24
+ Provides-Extra: all
25
+ Requires-Dist: aioboto3>=12.0.0; extra == 'all'
26
+ Requires-Dist: asyncpg>=0.29.0; extra == 'all'
27
+ Requires-Dist: boto3>=1.26.0; extra == 'all'
28
+ Requires-Dist: elasticsearch[async]<9.0.0,>=8.0.0; extra == 'all'
29
+ Requires-Dist: psycopg2>=2.9.0; extra == 'all'
30
+ Requires-Dist: pyarrow>=14.0.0; extra == 'all'
31
+ Requires-Dist: sqlalchemy>=2.0.0; extra == 'all'
32
+ Provides-Extra: dev
33
+ Requires-Dist: black>=24.0.0; extra == 'dev'
34
+ Requires-Dist: moto>=4.2.0; extra == 'dev'
35
+ Requires-Dist: mypy>=1.0.0; extra == 'dev'
36
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == 'dev'
37
+ Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
38
+ Requires-Dist: pytest-mock>=3.11.0; extra == 'dev'
39
+ Requires-Dist: pytest>=8.0.0; extra == 'dev'
40
+ Requires-Dist: ruff>=0.1.0; extra == 'dev'
41
+ Requires-Dist: testcontainers>=3.7.0; extra == 'dev'
42
+ Provides-Extra: elasticsearch
43
+ Requires-Dist: elasticsearch[async]<9.0.0,>=8.0.0; extra == 'elasticsearch'
44
+ Provides-Extra: parquet
45
+ Requires-Dist: pyarrow>=14.0.0; extra == 'parquet'
46
+ Provides-Extra: postgres
47
+ Requires-Dist: asyncpg>=0.29.0; extra == 'postgres'
48
+ Requires-Dist: psycopg2>=2.9.0; extra == 'postgres'
49
+ Requires-Dist: sqlalchemy>=2.0.0; extra == 'postgres'
50
+ Provides-Extra: s3
51
+ Requires-Dist: aioboto3>=12.0.0; extra == 's3'
52
+ Requires-Dist: boto3>=1.26.0; extra == 's3'
53
+ Description-Content-Type: text/markdown
54
+
55
+ # DataKnobs Data Package
56
+
57
+ A unified data abstraction layer that provides consistent database operations across multiple storage technologies.
58
+
59
+ ## Overview
60
+
61
+ The `dataknobs-data` package enables seamless data management regardless of the underlying storage mechanism, from in-memory structures to cloud storage and databases. It provides a simple, consistent API for CRUD operations, searching, and data manipulation across diverse backends.
62
+
63
+ ## Features
64
+
65
+ - **Unified Interface**: Same API regardless of storage backend
66
+ - **Multiple Backends**: Memory, File (JSON/CSV/Parquet), PostgreSQL, Elasticsearch, S3
67
+ - **Record-Based**: Data represented as structured records with metadata and first-class ID support
68
+ - **Pandas Integration**: Seamless bidirectional conversion to/from DataFrames with type preservation
69
+ - **Migration Utilities**: Backend-to-backend migration, schema evolution, and data transformation
70
+ - **Schema Validation**: Comprehensive validation system with constraints and type coercion
71
+ - **Streaming Support**: Efficient streaming APIs for large datasets
72
+ - **Type Safety**: Strong typing with field validation and automatic type conversion
73
+ - **Async Support**: Both synchronous and asynchronous APIs
74
+ - **Query System**: Powerful, backend-agnostic query capabilities
75
+ - **Configuration Support**: Full integration with DataKnobs configuration system
76
+ - **Batch Operations**: Efficient bulk insert, update, and upsert operations
77
+ - **Connection Management**: Automatic connection lifecycle management
78
+ - **Extensible**: Easy to add custom storage backends, validators, and transformers
79
+
80
+ ## Installation
81
+
82
+ ```bash
83
+ # Basic installation
84
+ pip install dataknobs-data
85
+
86
+ # With specific backend support
87
+ pip install dataknobs-data[postgres] # PostgreSQL support
88
+ pip install dataknobs-data[s3] # AWS S3 support
89
+ pip install dataknobs-data[elasticsearch] # Elasticsearch support
90
+ pip install dataknobs-data[all] # All backends
91
+ ```
92
+
93
+ ## Quick Start
94
+
95
+ ```python
96
+ from dataknobs_data import AsyncDatabase, Record, Query, Operator
97
+
98
+ # Async usage
99
+ async def main():
100
+ # Create and auto-connect to database
101
+ db = await AsyncDatabase.create("memory")
102
+
103
+ # Create a record
104
+ record = Record({
105
+ "name": "John Doe",
106
+ "age": 30,
107
+ "email": "john@example.com",
108
+ "active": True
109
+ })
110
+
111
+ # CRUD operations
112
+ id = await db.create(record)
113
+ retrieved = await db.read(id)
114
+ record.set_value("age", 31)
115
+ await db.update(id, record)
116
+ await db.delete(id)
117
+
118
+ # Search with queries
119
+ query = (Query()
120
+ .filter("age", Operator.GTE, 25)
121
+ .filter("active", Operator.EQ, True)
122
+ .sort("name")
123
+ .limit(10))
124
+
125
+ results = await db.search(query)
126
+ for record in results:
127
+ print(f"{record.get_value('name')}: {record.get_value('age')}")
128
+
129
+ await db.close()
130
+
131
+ # Synchronous usage
132
+ from dataknobs_data import SyncDatabase
133
+
134
+ db = SyncDatabase.create("memory")
135
+ record = Record({"name": "Jane Doe", "age": 28})
136
+ id = db.create(record)
137
+ retrieved = db.read(id)
138
+ db.close()
139
+ ```
140
+
141
+ ## Backend Configuration
142
+
143
+ ### File Backend
144
+ ```python
145
+ db = await Database.create("file", {
146
+ "path": "/data/records.json",
147
+ "pretty": True,
148
+ "backup": True
149
+ })
150
+ ```
151
+
152
+ ### PostgreSQL Backend
153
+ ```python
154
+ db = await Database.create("postgres", {
155
+ "host": "localhost",
156
+ "database": "mydb",
157
+ "user": "user",
158
+ "password": "pass",
159
+ "table": "records",
160
+ "schema": "public"
161
+ })
162
+ ```
163
+
164
+ ### S3 Backend
165
+ ```python
166
+ db = await Database.create("s3", {
167
+ "bucket": "my-bucket",
168
+ "prefix": "records/",
169
+ "region": "us-west-2",
170
+ "aws_access_key_id": "key",
171
+ "aws_secret_access_key": "secret"
172
+ })
173
+ ```
174
+
175
+ ### Elasticsearch Backend
176
+ ```python
177
+ db = await Database.create("elasticsearch", {
178
+ "host": "localhost",
179
+ "port": 9200,
180
+ "index": "records",
181
+ "refresh": True
182
+ })
183
+ ```
184
+
185
+ ## Configuration Support
186
+
187
+ The data package fully integrates with the DataKnobs configuration system. All backends inherit from `ConfigurableBase` and can be instantiated from configuration files.
188
+
189
+ ### Using Configuration Files
190
+
191
+ ```yaml
192
+ # config.yaml
193
+ databases:
194
+ - name: primary
195
+ class: dataknobs_data.backends.postgres.PostgresDatabase
196
+ host: ${DB_HOST:localhost} # Environment variable with default
197
+ port: ${DB_PORT:5432}
198
+ database: myapp
199
+ user: ${DB_USER:postgres}
200
+ password: ${DB_PASSWORD}
201
+ table: records
202
+
203
+ - name: cache
204
+ class: dataknobs_data.backends.memory.MemoryDatabase
205
+
206
+ - name: archive
207
+ class: dataknobs_data.backends.file.SyncFileDatabase
208
+ path: /data/archive.json
209
+ format: json
210
+ compression: gzip
211
+
212
+ - name: cloud_storage
213
+ class: dataknobs_data.backends.s3.S3Database
214
+ bucket: ${S3_BUCKET:my-data-bucket}
215
+ prefix: ${S3_PREFIX:records/}
216
+ region: ${AWS_REGION:us-east-1}
217
+ endpoint_url: ${S3_ENDPOINT} # Optional, for LocalStack/MinIO
218
+ ```
219
+
220
+ ### Loading from Configuration
221
+
222
+ ```python
223
+ from dataknobs_config import Config
224
+ from dataknobs_data import Record, Query
225
+
226
+ # Load configuration
227
+ config = Config("config.yaml")
228
+
229
+ # Create database instances from config
230
+ primary_db = config.get_instance("databases", "primary")
231
+ cache_db = config.get_instance("databases", "cache")
232
+ archive_db = config.get_instance("databases", "archive")
233
+
234
+ # Use the databases normally
235
+ record = Record({"name": "test", "value": 42})
236
+ record_id = primary_db.create(record)
237
+
238
+ # Cache frequently accessed data
239
+ cache_db.create(record)
240
+
241
+ # Archive old records
242
+ archive_db.create(record)
243
+ ```
244
+
245
+ ### Direct Configuration
246
+
247
+ ```python
248
+ from dataknobs_data.backends.postgres import PostgresDatabase
249
+
250
+ # All backends support from_config classmethod
251
+ db = PostgresDatabase.from_config({
252
+ "host": "localhost",
253
+ "database": "myapp",
254
+ "user": "postgres",
255
+ "password": "secret"
256
+ })
257
+ ```
258
+
259
+ ## Backend Factory
260
+
261
+ The data package provides a factory pattern for dynamic backend selection:
262
+
263
+ ### Using the Factory Directly
264
+
265
+ ```python
266
+ from dataknobs_data import DatabaseFactory
267
+
268
+ factory = DatabaseFactory()
269
+
270
+ # Create different backends
271
+ memory_db = factory.create(backend="memory")
272
+ file_db = factory.create(backend="file", path="data.json", format="json")
273
+ s3_db = factory.create(backend="s3", bucket="my-bucket", prefix="data/")
274
+ ```
275
+
276
+ ### Factory with Configuration
277
+
278
+ ```python
279
+ from dataknobs_config import Config
280
+ from dataknobs_data import database_factory
281
+
282
+ # Register factory for cleaner configs
283
+ config = Config()
284
+ config.register_factory("database", database_factory)
285
+
286
+ # Use registered factory in configuration
287
+ config.load({
288
+ "databases": [{
289
+ "name": "main",
290
+ "factory": "database", # Uses registered factory
291
+ "backend": "postgres",
292
+ "host": "localhost",
293
+ "database": "myapp"
294
+ }]
295
+ })
296
+
297
+ db = config.get_instance("databases", "main")
298
+ ```
299
+
300
+ ### Factory Configuration Examples
301
+
302
+ ```yaml
303
+ # Using registered factory (cleaner)
304
+ databases:
305
+ - name: main
306
+ factory: database
307
+ backend: ${DB_BACKEND:postgres}
308
+ host: ${DB_HOST:localhost}
309
+
310
+ # Using module path (no registration needed)
311
+ databases:
312
+ - name: main
313
+ factory: dataknobs_data.factory.database_factory
314
+ backend: postgres
315
+ host: localhost
316
+ ```
317
+
318
+ ## Pandas Integration
319
+
320
+ The data package provides comprehensive pandas integration for data analysis workflows:
321
+
322
+ ```python
323
+ import pandas as pd
324
+ from dataknobs_data.pandas import DataFrameConverter, BatchOperations
325
+
326
+ # Convert records to DataFrame with type preservation
327
+ converter = DataFrameConverter()
328
+ df = converter.records_to_dataframe(records, preserve_types=True)
329
+
330
+ # Perform pandas operations
331
+ df_filtered = df[df['age'] > 25]
332
+ df_aggregated = df.groupby('category').agg({'price': 'mean'})
333
+
334
+ # Convert back to records
335
+ new_records = converter.dataframe_to_records(df_filtered)
336
+
337
+ # Bulk operations with DataFrames
338
+ batch_ops = BatchOperations(database)
339
+ result = batch_ops.bulk_insert_dataframe(df, batch_size=1000)
340
+ print(f"Inserted {result.successful} records")
341
+
342
+ # Upsert from DataFrame
343
+ result = batch_ops.bulk_upsert_dataframe(
344
+ df,
345
+ id_column="user_id",
346
+ merge_strategy="update"
347
+ )
348
+ ```
349
+
350
+ ## Schema Validation
351
+
352
+ Define and enforce data schemas with comprehensive validation:
353
+
354
+ ```python
355
+ from dataknobs_data.validation import Schema, FieldType
356
+ from dataknobs_data.validation.constraints import *
357
+
358
+ # Define schema with constraints
359
+ user_schema = Schema("UserSchema")
360
+ user_schema.field("email", FieldType.STRING,
361
+ required=True,
362
+ constraints=[Pattern(r"^.+@.+\..+$"), Unique()])
363
+ user_schema.field("age", FieldType.INTEGER,
364
+ constraints=[Range(min=0, max=150)])
365
+ user_schema.field("status", FieldType.STRING,
366
+ default="active",
367
+ constraints=[Enum(["active", "inactive", "suspended"])])
368
+
369
+ # Validate records
370
+ result = user_schema.validate(record)
371
+ if not result.valid:
372
+ for error in result.errors:
373
+ print(error)
374
+
375
+ # Automatic type coercion
376
+ record = Record({"age": "30"}) # String value
377
+ result = user_schema.validate(record, coerce=True) # Converts to int
378
+ if result.valid:
379
+ print(record.get_value("age")) # 30 (as integer)
380
+ ```
381
+
382
+ ## Data Migration
383
+
384
+ Migrate data between backends with transformation support:
385
+
386
+ ```python
387
+ from dataknobs_data.migration import Migration, Migrator
388
+ from dataknobs_data.migration.operations import *
389
+
390
+ # Define migration
391
+ migration = Migration("upgrade_schema", "2.0.0")
392
+ migration.add_operation(AddField("created_at", default=datetime.now()))
393
+ migration.add_operation(RenameField("user_name", "username"))
394
+ migration.add_operation(TransformField("email", lambda x: x.lower()))
395
+
396
+ # Migrate between backends
397
+ async def migrate_data():
398
+ source_db = await Database.create("postgres", postgres_config)
399
+ target_db = await Database.create("s3", s3_config)
400
+
401
+ migrator = Migrator(source_db, target_db)
402
+
403
+ # Run migration with progress tracking
404
+ progress = await migrator.migrate(
405
+ migration=migration,
406
+ batch_size=1000,
407
+ on_progress=lambda p: print(f"Progress: {p.percentage:.1f}%")
408
+ )
409
+
410
+ print(f"Migrated: {progress.successful} records")
411
+ print(f"Failed: {progress.failed} records")
412
+ print(f"Duration: {progress.duration}s")
413
+
414
+ await source_db.close()
415
+ await target_db.close()
416
+ ```
417
+
418
+ ## Advanced Queries
419
+
420
+ ```python
421
+ # Complex query with multiple filters
422
+ query = (Query()
423
+ .filter("status", Operator.IN, ["active", "pending"])
424
+ .filter("created_at", Operator.GTE, "2024-01-01")
425
+ .filter("name", Operator.LIKE, "John%")
426
+ .sort("priority", SortOrder.DESC)
427
+ .sort("created_at", SortOrder.ASC)
428
+ .offset(20)
429
+ .limit(10)
430
+ .select(["name", "email", "status"])) # Select specific fields
431
+
432
+ results = await db.search(query)
433
+ ```
434
+
435
+ ## Streaming Support
436
+
437
+ ```python
438
+ from dataknobs_data import StreamConfig
439
+
440
+ # Stream large datasets efficiently
441
+ config = StreamConfig(
442
+ batch_size=100,
443
+ buffer_size=1000
444
+ )
445
+
446
+ # Stream read
447
+ async for record in db.stream_read(query, config):
448
+ # Process each record without loading all into memory
449
+ process_record(record)
450
+
451
+ # Stream write
452
+ result = await db.stream_write(record_generator(), config)
453
+ print(f"Streamed {result.total_processed} records")
454
+ ```
455
+
456
+
457
+ ## Documentation
458
+
459
+ For complete API documentation, see [API Reference](docs/API_REFERENCE.md).
460
+
461
+ ## Custom Backend
462
+
463
+ ```python
464
+ from dataknobs_data import AsyncDatabase, DatabaseBackend
465
+
466
+ class CustomBackend(DatabaseBackend):
467
+ def create(self, record):
468
+ # Implementation
469
+ pass
470
+
471
+ def read(self, record_id):
472
+ # Implementation
473
+ pass
474
+
475
+ # ... other methods
476
+
477
+ # Register custom backend
478
+ AsyncDatabase.register_backend("custom", CustomBackend)
479
+
480
+ # Use custom backend
481
+ db = AsyncDatabase.create("custom", config)
482
+ ```
483
+
484
+ ## Development
485
+
486
+ ```bash
487
+ # Install development dependencies
488
+ pip install -e ".[dev]"
489
+
490
+ # Run tests
491
+ pytest
492
+
493
+ # Run tests with coverage
494
+ pytest --cov=dataknobs_data
495
+
496
+ # Type checking
497
+ mypy src/dataknobs_data
498
+
499
+ # Linting
500
+ ruff check src/dataknobs_data
501
+
502
+ # Format code
503
+ black src/dataknobs_data
504
+ ```
505
+
506
+ ## Architecture
507
+
508
+ The package follows a modular architecture:
509
+
510
+ - **Records**: Data representation with fields and metadata
511
+ - **Database Interface**: Abstract base classes (AsyncDatabase/SyncDatabase) for all backends
512
+ - **Query System**: Backend-agnostic query building
513
+ - **Backends**: Implementations for different storage technologies
514
+ - **Serializers**: Type conversion and format handling
515
+ - **Utils**: Pandas integration and migration tools
516
+
517
+ ## Performance
518
+
519
+ The package is designed for optimal performance:
520
+
521
+ - Connection pooling for database backends
522
+ - Batch operations for efficiency
523
+ - Lazy loading and pagination
524
+ - Caching for frequently accessed data
525
+ - Async support for concurrent operations
526
+
527
+ ## Contributing
528
+
529
+ Contributions are welcome! Please see our [Contributing Guide](../../CONTRIBUTING.md) for details.
530
+
531
+ ## License
532
+
533
+ This project is licensed under the MIT License - see the [LICENSE](../../LICENSE) file for details.