dataknobs-data 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataknobs_data-0.1.0/.gitignore +64 -0
- dataknobs_data-0.1.0/PKG-INFO +533 -0
- dataknobs_data-0.1.0/README.md +479 -0
- dataknobs_data-0.1.0/docs/API_IMPROVEMENTS.md +166 -0
- dataknobs_data-0.1.0/docs/API_REFERENCE.md +519 -0
- dataknobs_data-0.1.0/docs/BATCH_PROCESSING_GUIDE.md +190 -0
- dataknobs_data-0.1.0/docs/BOOLEAN_LOGIC_OPERATORS.md +198 -0
- dataknobs_data-0.1.0/docs/DESIGN_PLAN.md +444 -0
- dataknobs_data-0.1.0/docs/FEATURE_SUMMARY.md +152 -0
- dataknobs_data-0.1.0/docs/IMPLEMENTATION_STATUS.md +326 -0
- dataknobs_data-0.1.0/docs/NEXT_STEPS.md +298 -0
- dataknobs_data-0.1.0/docs/PHASE6_PLAN.md +237 -0
- dataknobs_data-0.1.0/docs/PHASE7_PLAN.md +172 -0
- dataknobs_data-0.1.0/docs/PHASE8_DOCUMENTATION_PLAN.md +420 -0
- dataknobs_data-0.1.0/docs/PROGRESS_CHECKLIST.md +315 -0
- dataknobs_data-0.1.0/docs/RANGE_OPERATORS_IMPLEMENTATION.md +150 -0
- dataknobs_data-0.1.0/docs/REDESIGN_CHECKLIST.md +202 -0
- dataknobs_data-0.1.0/docs/REDESIGN_PLAN.md +754 -0
- dataknobs_data-0.1.0/docs/VECTOR_STORE_DESIGN.md +777 -0
- dataknobs_data-0.1.0/examples/complete_example.py +266 -0
- dataknobs_data-0.1.0/examples/s3_example.py +205 -0
- dataknobs_data-0.1.0/examples/sensor_dashboard/README.md +118 -0
- dataknobs_data-0.1.0/examples/sensor_dashboard/SUMMARY.md +84 -0
- dataknobs_data-0.1.0/examples/sensor_dashboard/__init__.py +12 -0
- dataknobs_data-0.1.0/examples/sensor_dashboard/data_generator.py +257 -0
- dataknobs_data-0.1.0/examples/sensor_dashboard/demo_advanced_queries.py +258 -0
- dataknobs_data-0.1.0/examples/sensor_dashboard/demo_streaming_improvements.py +254 -0
- dataknobs_data-0.1.0/examples/sensor_dashboard/models.py +96 -0
- dataknobs_data-0.1.0/examples/sensor_dashboard/sensor_dashboard.py +690 -0
- dataknobs_data-0.1.0/htmlcov/.gitignore +2 -0
- dataknobs_data-0.1.0/htmlcov/class_index.html +1211 -0
- dataknobs_data-0.1.0/htmlcov/coverage_html_cb_6fb7b396.js +733 -0
- dataknobs_data-0.1.0/htmlcov/favicon_32_cb_58284776.png +0 -0
- dataknobs_data-0.1.0/htmlcov/function_index.html +5331 -0
- dataknobs_data-0.1.0/htmlcov/index.html +342 -0
- dataknobs_data-0.1.0/htmlcov/keybd_closed_cb_ce680311.png +0 -0
- dataknobs_data-0.1.0/htmlcov/status.json +1 -0
- dataknobs_data-0.1.0/htmlcov/style_cb_6b508a39.css +377 -0
- dataknobs_data-0.1.0/htmlcov/z_0d7d1fbd877256a8_coercer_py.html +374 -0
- dataknobs_data-0.1.0/htmlcov/z_0d7d1fbd877256a8_constraints_py.html +477 -0
- dataknobs_data-0.1.0/htmlcov/z_0d7d1fbd877256a8_factory_py.html +307 -0
- dataknobs_data-0.1.0/htmlcov/z_0d7d1fbd877256a8_result_py.html +278 -0
- dataknobs_data-0.1.0/htmlcov/z_0d7d1fbd877256a8_schema_py.html +436 -0
- dataknobs_data-0.1.0/htmlcov/z_0d7d1fbd877256a8_type_coercion_py.html +413 -0
- dataknobs_data-0.1.0/htmlcov/z_199d8782a13727f6_migrator_py.html +412 -0
- dataknobs_data-0.1.0/htmlcov/z_199d8782a13727f6_schema_evolution_py.html +491 -0
- dataknobs_data-0.1.0/htmlcov/z_199d8782a13727f6_transformers_py.html +443 -0
- dataknobs_data-0.1.0/htmlcov/z_43b794130b4bf4a9_batch_ops_py.html +600 -0
- dataknobs_data-0.1.0/htmlcov/z_43b794130b4bf4a9_converter_py.html +513 -0
- dataknobs_data-0.1.0/htmlcov/z_43b794130b4bf4a9_metadata_py.html +349 -0
- dataknobs_data-0.1.0/htmlcov/z_43b794130b4bf4a9_type_mapper_py.html +624 -0
- dataknobs_data-0.1.0/htmlcov/z_6a167ce0c88e0ee6_base_py.html +311 -0
- dataknobs_data-0.1.0/htmlcov/z_6a167ce0c88e0ee6_elasticsearch_py.html +204 -0
- dataknobs_data-0.1.0/htmlcov/z_6a167ce0c88e0ee6_postgres_py.html +158 -0
- dataknobs_data-0.1.0/htmlcov/z_6a167ce0c88e0ee6_s3_py.html +163 -0
- dataknobs_data-0.1.0/htmlcov/z_8cad8284e1760d94_constraints_py.html +480 -0
- dataknobs_data-0.1.0/htmlcov/z_8cad8284e1760d94_schema_py.html +479 -0
- dataknobs_data-0.1.0/htmlcov/z_8cad8284e1760d94_type_coercion_py.html +413 -0
- dataknobs_data-0.1.0/htmlcov/z_be37674e1f62e768_database_py.html +725 -0
- dataknobs_data-0.1.0/htmlcov/z_be37674e1f62e768_exceptions_py.html +223 -0
- dataknobs_data-0.1.0/htmlcov/z_be37674e1f62e768_factory_py.html +321 -0
- dataknobs_data-0.1.0/htmlcov/z_be37674e1f62e768_fields_py.html +239 -0
- dataknobs_data-0.1.0/htmlcov/z_be37674e1f62e768_query_logic_py.html +465 -0
- dataknobs_data-0.1.0/htmlcov/z_be37674e1f62e768_query_py.html +574 -0
- dataknobs_data-0.1.0/htmlcov/z_be37674e1f62e768_records_py.html +593 -0
- dataknobs_data-0.1.0/htmlcov/z_be37674e1f62e768_streaming_py.html +599 -0
- dataknobs_data-0.1.0/htmlcov/z_c1234e08e7c7a3d1_factory_py.html +396 -0
- dataknobs_data-0.1.0/htmlcov/z_c1234e08e7c7a3d1_migration_py.html +288 -0
- dataknobs_data-0.1.0/htmlcov/z_c1234e08e7c7a3d1_migrator_py.html +492 -0
- dataknobs_data-0.1.0/htmlcov/z_c1234e08e7c7a3d1_operations_py.html +380 -0
- dataknobs_data-0.1.0/htmlcov/z_c1234e08e7c7a3d1_progress_py.html +389 -0
- dataknobs_data-0.1.0/htmlcov/z_c1234e08e7c7a3d1_schema_evolution_py.html +491 -0
- dataknobs_data-0.1.0/htmlcov/z_c1234e08e7c7a3d1_transformer_py.html +372 -0
- dataknobs_data-0.1.0/htmlcov/z_c1234e08e7c7a3d1_transformers_py.html +443 -0
- dataknobs_data-0.1.0/htmlcov/z_c348d435f8109258_coercer_py.html +374 -0
- dataknobs_data-0.1.0/htmlcov/z_c348d435f8109258_constraints_py.html +477 -0
- dataknobs_data-0.1.0/htmlcov/z_c348d435f8109258_factory_py.html +307 -0
- dataknobs_data-0.1.0/htmlcov/z_c348d435f8109258_result_py.html +278 -0
- dataknobs_data-0.1.0/htmlcov/z_c348d435f8109258_schema_py.html +436 -0
- dataknobs_data-0.1.0/htmlcov/z_c7ce396564e170b6_factory_py.html +396 -0
- dataknobs_data-0.1.0/htmlcov/z_c7ce396564e170b6_migration_py.html +288 -0
- dataknobs_data-0.1.0/htmlcov/z_c7ce396564e170b6_migrator_py.html +478 -0
- dataknobs_data-0.1.0/htmlcov/z_c7ce396564e170b6_operations_py.html +380 -0
- dataknobs_data-0.1.0/htmlcov/z_c7ce396564e170b6_progress_py.html +389 -0
- dataknobs_data-0.1.0/htmlcov/z_c7ce396564e170b6_transformer_py.html +372 -0
- dataknobs_data-0.1.0/htmlcov/z_cb00a6efc47dbd99_pool_manager_py.html +310 -0
- dataknobs_data-0.1.0/htmlcov/z_dd3695a71d2e06ed_elasticsearch_async_py.html +577 -0
- dataknobs_data-0.1.0/htmlcov/z_dd3695a71d2e06ed_elasticsearch_py.html +599 -0
- dataknobs_data-0.1.0/htmlcov/z_dd3695a71d2e06ed_file_py.html +1003 -0
- dataknobs_data-0.1.0/htmlcov/z_dd3695a71d2e06ed_memory_py.html +481 -0
- dataknobs_data-0.1.0/htmlcov/z_dd3695a71d2e06ed_postgres_async_py.html +682 -0
- dataknobs_data-0.1.0/htmlcov/z_dd3695a71d2e06ed_postgres_py.html +1119 -0
- dataknobs_data-0.1.0/htmlcov/z_dd3695a71d2e06ed_postgres_refactored_py.html +660 -0
- dataknobs_data-0.1.0/htmlcov/z_dd3695a71d2e06ed_s3_async_py.html +654 -0
- dataknobs_data-0.1.0/htmlcov/z_dd3695a71d2e06ed_s3_py.html +646 -0
- dataknobs_data-0.1.0/pyproject.toml +188 -0
- dataknobs_data-0.1.0/scripts/benchmark_performance.py +403 -0
- dataknobs_data-0.1.0/src/dataknobs_data/__init__.py +83 -0
- dataknobs_data-0.1.0/src/dataknobs_data/backends/__init__.py +72 -0
- dataknobs_data-0.1.0/src/dataknobs_data/backends/elasticsearch.py +501 -0
- dataknobs_data-0.1.0/src/dataknobs_data/backends/elasticsearch_async.py +476 -0
- dataknobs_data-0.1.0/src/dataknobs_data/backends/file.py +907 -0
- dataknobs_data-0.1.0/src/dataknobs_data/backends/memory.py +384 -0
- dataknobs_data-0.1.0/src/dataknobs_data/backends/postgres.py +1023 -0
- dataknobs_data-0.1.0/src/dataknobs_data/backends/s3.py +547 -0
- dataknobs_data-0.1.0/src/dataknobs_data/backends/s3_async.py +552 -0
- dataknobs_data-0.1.0/src/dataknobs_data/database.py +629 -0
- dataknobs_data-0.1.0/src/dataknobs_data/exceptions.py +126 -0
- dataknobs_data-0.1.0/src/dataknobs_data/factory.py +225 -0
- dataknobs_data-0.1.0/src/dataknobs_data/fields.py +142 -0
- dataknobs_data-0.1.0/src/dataknobs_data/migration/__init__.py +59 -0
- dataknobs_data-0.1.0/src/dataknobs_data/migration/factory.py +300 -0
- dataknobs_data-0.1.0/src/dataknobs_data/migration/migration.py +180 -0
- dataknobs_data-0.1.0/src/dataknobs_data/migration/migrator.py +386 -0
- dataknobs_data-0.1.0/src/dataknobs_data/migration/operations.py +280 -0
- dataknobs_data-0.1.0/src/dataknobs_data/migration/progress.py +275 -0
- dataknobs_data-0.1.0/src/dataknobs_data/migration/transformer.py +265 -0
- dataknobs_data-0.1.0/src/dataknobs_data/pandas/__init__.py +22 -0
- dataknobs_data-0.1.0/src/dataknobs_data/pandas/batch_ops.py +503 -0
- dataknobs_data-0.1.0/src/dataknobs_data/pandas/converter.py +416 -0
- dataknobs_data-0.1.0/src/dataknobs_data/pandas/metadata.py +251 -0
- dataknobs_data-0.1.0/src/dataknobs_data/pandas/type_mapper.py +517 -0
- dataknobs_data-0.1.0/src/dataknobs_data/pooling/__init__.py +9 -0
- dataknobs_data-0.1.0/src/dataknobs_data/pooling/base.py +212 -0
- dataknobs_data-0.1.0/src/dataknobs_data/pooling/elasticsearch.py +106 -0
- dataknobs_data-0.1.0/src/dataknobs_data/pooling/postgres.py +61 -0
- dataknobs_data-0.1.0/src/dataknobs_data/pooling/s3.py +65 -0
- dataknobs_data-0.1.0/src/dataknobs_data/query.py +477 -0
- dataknobs_data-0.1.0/src/dataknobs_data/query_logic.py +368 -0
- dataknobs_data-0.1.0/src/dataknobs_data/records.py +496 -0
- dataknobs_data-0.1.0/src/dataknobs_data/streaming.py +499 -0
- dataknobs_data-0.1.0/src/dataknobs_data/validation/__init__.py +53 -0
- dataknobs_data-0.1.0/src/dataknobs_data/validation/coercer.py +272 -0
- dataknobs_data-0.1.0/src/dataknobs_data/validation/constraints.py +370 -0
- dataknobs_data-0.1.0/src/dataknobs_data/validation/factory.py +207 -0
- dataknobs_data-0.1.0/src/dataknobs_data/validation/result.py +168 -0
- dataknobs_data-0.1.0/src/dataknobs_data/validation/schema.py +328 -0
- dataknobs_data-0.1.0/tests/conftest.py +51 -0
- dataknobs_data-0.1.0/tests/integration/README.md +333 -0
- dataknobs_data-0.1.0/tests/integration/conftest.py +262 -0
- dataknobs_data-0.1.0/tests/integration/test_elasticsearch_integration.py +612 -0
- dataknobs_data-0.1.0/tests/integration/test_postgres_integration.py +475 -0
- dataknobs_data-0.1.0/tests/integration/test_s3_backend.py +525 -0
- dataknobs_data-0.1.0/tests/test_async_elasticsearch_native.py +363 -0
- dataknobs_data-0.1.0/tests/test_async_generator_debug.py +105 -0
- dataknobs_data-0.1.0/tests/test_async_s3_native.py +503 -0
- dataknobs_data-0.1.0/tests/test_backend_streaming_consistency.py +141 -0
- dataknobs_data-0.1.0/tests/test_backends/test_elasticsearch.py +413 -0
- dataknobs_data-0.1.0/tests/test_backends/test_file.py +412 -0
- dataknobs_data-0.1.0/tests/test_backends/test_file_edge_cases.py +521 -0
- dataknobs_data-0.1.0/tests/test_backends/test_postgres.py +346 -0
- dataknobs_data-0.1.0/tests/test_boolean_logic.py +340 -0
- dataknobs_data-0.1.0/tests/test_config_integration.py +144 -0
- dataknobs_data-0.1.0/tests/test_connection_management.py +81 -0
- dataknobs_data-0.1.0/tests/test_cross_backend_integration.py +547 -0
- dataknobs_data-0.1.0/tests/test_exceptions.py +308 -0
- dataknobs_data-0.1.0/tests/test_factories_validation.py +222 -0
- dataknobs_data-0.1.0/tests/test_factory.py +229 -0
- dataknobs_data-0.1.0/tests/test_factory_extended.py +355 -0
- dataknobs_data-0.1.0/tests/test_fields.py +137 -0
- dataknobs_data-0.1.0/tests/test_generator_debug.py +42 -0
- dataknobs_data-0.1.0/tests/test_memory_backend.py +397 -0
- dataknobs_data-0.1.0/tests/test_migration.py +668 -0
- dataknobs_data-0.1.0/tests/test_migrator_extended.py +644 -0
- dataknobs_data-0.1.0/tests/test_nested_field_queries.py +236 -0
- dataknobs_data-0.1.0/tests/test_pandas_batch_ops.py +455 -0
- dataknobs_data-0.1.0/tests/test_pandas_integration.py +747 -0
- dataknobs_data-0.1.0/tests/test_pool_manager.py +335 -0
- dataknobs_data-0.1.0/tests/test_query.py +256 -0
- dataknobs_data-0.1.0/tests/test_query_enhanced.py +397 -0
- dataknobs_data-0.1.0/tests/test_range_operators.py +402 -0
- dataknobs_data-0.1.0/tests/test_range_operators_integration.py +531 -0
- dataknobs_data-0.1.0/tests/test_records.py +353 -0
- dataknobs_data-0.1.0/tests/test_s3_config_integration.py +149 -0
- dataknobs_data-0.1.0/tests/test_sensor_dashboard_advanced.py +551 -0
- dataknobs_data-0.1.0/tests/test_sensor_dashboard_example.py +626 -0
- dataknobs_data-0.1.0/tests/test_sensor_dashboard_streaming.py +355 -0
- dataknobs_data-0.1.0/tests/test_streaming.py +642 -0
- dataknobs_data-0.1.0/tests/test_streaming_mixins.py +308 -0
- dataknobs_data-0.1.0/tests/test_streaming_simple.py +248 -0
- dataknobs_data-0.1.0/tests/test_unified_batch.py +301 -0
- dataknobs_data-0.1.0/tests/test_validation.py +619 -0
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
.#*
|
|
2
|
+
*~
|
|
3
|
+
*#
|
|
4
|
+
.idea
|
|
5
|
+
.vscode
|
|
6
|
+
.pydevproject
|
|
7
|
+
venv/
|
|
8
|
+
.cache
|
|
9
|
+
**/.*env
|
|
10
|
+
*.pyc
|
|
11
|
+
**/__pycache__
|
|
12
|
+
.pytest_cache/
|
|
13
|
+
dist
|
|
14
|
+
.eggs/
|
|
15
|
+
*.egg-info
|
|
16
|
+
**/build
|
|
17
|
+
*.swp
|
|
18
|
+
*.orig
|
|
19
|
+
.project
|
|
20
|
+
.coverage*
|
|
21
|
+
_version.py.bld
|
|
22
|
+
.mypy_cache
|
|
23
|
+
**/build.log
|
|
24
|
+
.eggs
|
|
25
|
+
ignored
|
|
26
|
+
**/.ipynb_checkpoints
|
|
27
|
+
.s3_cache
|
|
28
|
+
.Trash-*
|
|
29
|
+
.DS_Store
|
|
30
|
+
**/_tmp
|
|
31
|
+
.data
|
|
32
|
+
*.so
|
|
33
|
+
.aws
|
|
34
|
+
VERSION
|
|
35
|
+
activate
|
|
36
|
+
.tox
|
|
37
|
+
.docker
|
|
38
|
+
.pypirc
|
|
39
|
+
|
|
40
|
+
# uv
|
|
41
|
+
.venv/
|
|
42
|
+
uv.lock
|
|
43
|
+
test-env/
|
|
44
|
+
.uv-cache/
|
|
45
|
+
|
|
46
|
+
# MkDocs documentation
|
|
47
|
+
site/
|
|
48
|
+
|
|
49
|
+
# Non-essential test coverage reports
|
|
50
|
+
coverage.xml
|
|
51
|
+
packages/data/coverage.xml
|
|
52
|
+
|
|
53
|
+
# Quality check artifacts
|
|
54
|
+
.quality-artifacts/*
|
|
55
|
+
!.quality-artifacts/quality-summary.json
|
|
56
|
+
!.quality-artifacts/environment.json
|
|
57
|
+
!.quality-artifacts/unit-test-results.xml
|
|
58
|
+
!.quality-artifacts/integration-test-results.xml
|
|
59
|
+
!.quality-artifacts/coverage.xml
|
|
60
|
+
!.quality-artifacts/coverage-unit.xml
|
|
61
|
+
!.quality-artifacts/coverage-integration.xml
|
|
62
|
+
!.quality-artifacts/lint-report.json
|
|
63
|
+
!.quality-artifacts/style-check.json
|
|
64
|
+
!.quality-artifacts/signature.sha256
|
|
@@ -0,0 +1,533 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dataknobs-data
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Unified data abstraction layer for consistent database operations across multiple storage technologies
|
|
5
|
+
Project-URL: Homepage, https://github.com/dataknobs/dataknobs
|
|
6
|
+
Project-URL: Bug Tracker, https://github.com/dataknobs/dataknobs/issues
|
|
7
|
+
Project-URL: Documentation, https://dataknobs.readthedocs.io
|
|
8
|
+
Author-email: DataKnobs Team <team@dataknobs.com>
|
|
9
|
+
Keywords: abstraction,data,database,records,storage
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Requires-Python: >=3.10
|
|
17
|
+
Requires-Dist: aiofiles>=23.0.0
|
|
18
|
+
Requires-Dist: boto3>=1.38.27
|
|
19
|
+
Requires-Dist: dataknobs-config>=0.1.0
|
|
20
|
+
Requires-Dist: dataknobs-utils>=0.1.0
|
|
21
|
+
Requires-Dist: moto>=5.1.10
|
|
22
|
+
Requires-Dist: pandas>=2.0.0
|
|
23
|
+
Requires-Dist: pydantic>=2.0.0
|
|
24
|
+
Provides-Extra: all
|
|
25
|
+
Requires-Dist: aioboto3>=12.0.0; extra == 'all'
|
|
26
|
+
Requires-Dist: asyncpg>=0.29.0; extra == 'all'
|
|
27
|
+
Requires-Dist: boto3>=1.26.0; extra == 'all'
|
|
28
|
+
Requires-Dist: elasticsearch[async]<9.0.0,>=8.0.0; extra == 'all'
|
|
29
|
+
Requires-Dist: psycopg2>=2.9.0; extra == 'all'
|
|
30
|
+
Requires-Dist: pyarrow>=14.0.0; extra == 'all'
|
|
31
|
+
Requires-Dist: sqlalchemy>=2.0.0; extra == 'all'
|
|
32
|
+
Provides-Extra: dev
|
|
33
|
+
Requires-Dist: black>=24.0.0; extra == 'dev'
|
|
34
|
+
Requires-Dist: moto>=4.2.0; extra == 'dev'
|
|
35
|
+
Requires-Dist: mypy>=1.0.0; extra == 'dev'
|
|
36
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == 'dev'
|
|
37
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
|
|
38
|
+
Requires-Dist: pytest-mock>=3.11.0; extra == 'dev'
|
|
39
|
+
Requires-Dist: pytest>=8.0.0; extra == 'dev'
|
|
40
|
+
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
41
|
+
Requires-Dist: testcontainers>=3.7.0; extra == 'dev'
|
|
42
|
+
Provides-Extra: elasticsearch
|
|
43
|
+
Requires-Dist: elasticsearch[async]<9.0.0,>=8.0.0; extra == 'elasticsearch'
|
|
44
|
+
Provides-Extra: parquet
|
|
45
|
+
Requires-Dist: pyarrow>=14.0.0; extra == 'parquet'
|
|
46
|
+
Provides-Extra: postgres
|
|
47
|
+
Requires-Dist: asyncpg>=0.29.0; extra == 'postgres'
|
|
48
|
+
Requires-Dist: psycopg2>=2.9.0; extra == 'postgres'
|
|
49
|
+
Requires-Dist: sqlalchemy>=2.0.0; extra == 'postgres'
|
|
50
|
+
Provides-Extra: s3
|
|
51
|
+
Requires-Dist: aioboto3>=12.0.0; extra == 's3'
|
|
52
|
+
Requires-Dist: boto3>=1.26.0; extra == 's3'
|
|
53
|
+
Description-Content-Type: text/markdown
|
|
54
|
+
|
|
55
|
+
# DataKnobs Data Package
|
|
56
|
+
|
|
57
|
+
A unified data abstraction layer that provides consistent database operations across multiple storage technologies.
|
|
58
|
+
|
|
59
|
+
## Overview
|
|
60
|
+
|
|
61
|
+
The `dataknobs-data` package enables seamless data management regardless of the underlying storage mechanism, from in-memory structures to cloud storage and databases. It provides a simple, consistent API for CRUD operations, searching, and data manipulation across diverse backends.
|
|
62
|
+
|
|
63
|
+
## Features
|
|
64
|
+
|
|
65
|
+
- **Unified Interface**: Same API regardless of storage backend
|
|
66
|
+
- **Multiple Backends**: Memory, File (JSON/CSV/Parquet), PostgreSQL, Elasticsearch, S3
|
|
67
|
+
- **Record-Based**: Data represented as structured records with metadata and first-class ID support
|
|
68
|
+
- **Pandas Integration**: Seamless bidirectional conversion to/from DataFrames with type preservation
|
|
69
|
+
- **Migration Utilities**: Backend-to-backend migration, schema evolution, and data transformation
|
|
70
|
+
- **Schema Validation**: Comprehensive validation system with constraints and type coercion
|
|
71
|
+
- **Streaming Support**: Efficient streaming APIs for large datasets
|
|
72
|
+
- **Type Safety**: Strong typing with field validation and automatic type conversion
|
|
73
|
+
- **Async Support**: Both synchronous and asynchronous APIs
|
|
74
|
+
- **Query System**: Powerful, backend-agnostic query capabilities
|
|
75
|
+
- **Configuration Support**: Full integration with DataKnobs configuration system
|
|
76
|
+
- **Batch Operations**: Efficient bulk insert, update, and upsert operations
|
|
77
|
+
- **Connection Management**: Automatic connection lifecycle management
|
|
78
|
+
- **Extensible**: Easy to add custom storage backends, validators, and transformers
|
|
79
|
+
|
|
80
|
+
## Installation
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
# Basic installation
|
|
84
|
+
pip install dataknobs-data
|
|
85
|
+
|
|
86
|
+
# With specific backend support
|
|
87
|
+
pip install dataknobs-data[postgres] # PostgreSQL support
|
|
88
|
+
pip install dataknobs-data[s3] # AWS S3 support
|
|
89
|
+
pip install dataknobs-data[elasticsearch] # Elasticsearch support
|
|
90
|
+
pip install dataknobs-data[all] # All backends
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## Quick Start
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
from dataknobs_data import AsyncDatabase, Record, Query, Operator
|
|
97
|
+
|
|
98
|
+
# Async usage
|
|
99
|
+
async def main():
|
|
100
|
+
# Create and auto-connect to database
|
|
101
|
+
db = await AsyncDatabase.create("memory")
|
|
102
|
+
|
|
103
|
+
# Create a record
|
|
104
|
+
record = Record({
|
|
105
|
+
"name": "John Doe",
|
|
106
|
+
"age": 30,
|
|
107
|
+
"email": "john@example.com",
|
|
108
|
+
"active": True
|
|
109
|
+
})
|
|
110
|
+
|
|
111
|
+
# CRUD operations
|
|
112
|
+
id = await db.create(record)
|
|
113
|
+
retrieved = await db.read(id)
|
|
114
|
+
record.set_value("age", 31)
|
|
115
|
+
await db.update(id, record)
|
|
116
|
+
await db.delete(id)
|
|
117
|
+
|
|
118
|
+
# Search with queries
|
|
119
|
+
query = (Query()
|
|
120
|
+
.filter("age", Operator.GTE, 25)
|
|
121
|
+
.filter("active", Operator.EQ, True)
|
|
122
|
+
.sort("name")
|
|
123
|
+
.limit(10))
|
|
124
|
+
|
|
125
|
+
results = await db.search(query)
|
|
126
|
+
for record in results:
|
|
127
|
+
print(f"{record.get_value('name')}: {record.get_value('age')}")
|
|
128
|
+
|
|
129
|
+
await db.close()
|
|
130
|
+
|
|
131
|
+
# Synchronous usage
|
|
132
|
+
from dataknobs_data import SyncDatabase
|
|
133
|
+
|
|
134
|
+
db = SyncDatabase.create("memory")
|
|
135
|
+
record = Record({"name": "Jane Doe", "age": 28})
|
|
136
|
+
id = db.create(record)
|
|
137
|
+
retrieved = db.read(id)
|
|
138
|
+
db.close()
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
## Backend Configuration
|
|
142
|
+
|
|
143
|
+
### File Backend
|
|
144
|
+
```python
|
|
145
|
+
db = await Database.create("file", {
|
|
146
|
+
"path": "/data/records.json",
|
|
147
|
+
"pretty": True,
|
|
148
|
+
"backup": True
|
|
149
|
+
})
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
### PostgreSQL Backend
|
|
153
|
+
```python
|
|
154
|
+
db = await Database.create("postgres", {
|
|
155
|
+
"host": "localhost",
|
|
156
|
+
"database": "mydb",
|
|
157
|
+
"user": "user",
|
|
158
|
+
"password": "pass",
|
|
159
|
+
"table": "records",
|
|
160
|
+
"schema": "public"
|
|
161
|
+
})
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
### S3 Backend
|
|
165
|
+
```python
|
|
166
|
+
db = await Database.create("s3", {
|
|
167
|
+
"bucket": "my-bucket",
|
|
168
|
+
"prefix": "records/",
|
|
169
|
+
"region": "us-west-2",
|
|
170
|
+
"aws_access_key_id": "key",
|
|
171
|
+
"aws_secret_access_key": "secret"
|
|
172
|
+
})
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### Elasticsearch Backend
|
|
176
|
+
```python
|
|
177
|
+
db = await Database.create("elasticsearch", {
|
|
178
|
+
"host": "localhost",
|
|
179
|
+
"port": 9200,
|
|
180
|
+
"index": "records",
|
|
181
|
+
"refresh": True
|
|
182
|
+
})
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
## Configuration Support
|
|
186
|
+
|
|
187
|
+
The data package fully integrates with the DataKnobs configuration system. All backends inherit from `ConfigurableBase` and can be instantiated from configuration files.
|
|
188
|
+
|
|
189
|
+
### Using Configuration Files
|
|
190
|
+
|
|
191
|
+
```yaml
|
|
192
|
+
# config.yaml
|
|
193
|
+
databases:
|
|
194
|
+
- name: primary
|
|
195
|
+
class: dataknobs_data.backends.postgres.PostgresDatabase
|
|
196
|
+
host: ${DB_HOST:localhost} # Environment variable with default
|
|
197
|
+
port: ${DB_PORT:5432}
|
|
198
|
+
database: myapp
|
|
199
|
+
user: ${DB_USER:postgres}
|
|
200
|
+
password: ${DB_PASSWORD}
|
|
201
|
+
table: records
|
|
202
|
+
|
|
203
|
+
- name: cache
|
|
204
|
+
class: dataknobs_data.backends.memory.MemoryDatabase
|
|
205
|
+
|
|
206
|
+
- name: archive
|
|
207
|
+
class: dataknobs_data.backends.file.SyncFileDatabase
|
|
208
|
+
path: /data/archive.json
|
|
209
|
+
format: json
|
|
210
|
+
compression: gzip
|
|
211
|
+
|
|
212
|
+
- name: cloud_storage
|
|
213
|
+
class: dataknobs_data.backends.s3.S3Database
|
|
214
|
+
bucket: ${S3_BUCKET:my-data-bucket}
|
|
215
|
+
prefix: ${S3_PREFIX:records/}
|
|
216
|
+
region: ${AWS_REGION:us-east-1}
|
|
217
|
+
endpoint_url: ${S3_ENDPOINT} # Optional, for LocalStack/MinIO
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
### Loading from Configuration
|
|
221
|
+
|
|
222
|
+
```python
|
|
223
|
+
from dataknobs_config import Config
|
|
224
|
+
from dataknobs_data import Record, Query
|
|
225
|
+
|
|
226
|
+
# Load configuration
|
|
227
|
+
config = Config("config.yaml")
|
|
228
|
+
|
|
229
|
+
# Create database instances from config
|
|
230
|
+
primary_db = config.get_instance("databases", "primary")
|
|
231
|
+
cache_db = config.get_instance("databases", "cache")
|
|
232
|
+
archive_db = config.get_instance("databases", "archive")
|
|
233
|
+
|
|
234
|
+
# Use the databases normally
|
|
235
|
+
record = Record({"name": "test", "value": 42})
|
|
236
|
+
record_id = primary_db.create(record)
|
|
237
|
+
|
|
238
|
+
# Cache frequently accessed data
|
|
239
|
+
cache_db.create(record)
|
|
240
|
+
|
|
241
|
+
# Archive old records
|
|
242
|
+
archive_db.create(record)
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
### Direct Configuration
|
|
246
|
+
|
|
247
|
+
```python
|
|
248
|
+
from dataknobs_data.backends.postgres import PostgresDatabase
|
|
249
|
+
|
|
250
|
+
# All backends support from_config classmethod
|
|
251
|
+
db = PostgresDatabase.from_config({
|
|
252
|
+
"host": "localhost",
|
|
253
|
+
"database": "myapp",
|
|
254
|
+
"user": "postgres",
|
|
255
|
+
"password": "secret"
|
|
256
|
+
})
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
## Backend Factory
|
|
260
|
+
|
|
261
|
+
The data package provides a factory pattern for dynamic backend selection:
|
|
262
|
+
|
|
263
|
+
### Using the Factory Directly
|
|
264
|
+
|
|
265
|
+
```python
|
|
266
|
+
from dataknobs_data import DatabaseFactory
|
|
267
|
+
|
|
268
|
+
factory = DatabaseFactory()
|
|
269
|
+
|
|
270
|
+
# Create different backends
|
|
271
|
+
memory_db = factory.create(backend="memory")
|
|
272
|
+
file_db = factory.create(backend="file", path="data.json", format="json")
|
|
273
|
+
s3_db = factory.create(backend="s3", bucket="my-bucket", prefix="data/")
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
### Factory with Configuration
|
|
277
|
+
|
|
278
|
+
```python
|
|
279
|
+
from dataknobs_config import Config
|
|
280
|
+
from dataknobs_data import database_factory
|
|
281
|
+
|
|
282
|
+
# Register factory for cleaner configs
|
|
283
|
+
config = Config()
|
|
284
|
+
config.register_factory("database", database_factory)
|
|
285
|
+
|
|
286
|
+
# Use registered factory in configuration
|
|
287
|
+
config.load({
|
|
288
|
+
"databases": [{
|
|
289
|
+
"name": "main",
|
|
290
|
+
"factory": "database", # Uses registered factory
|
|
291
|
+
"backend": "postgres",
|
|
292
|
+
"host": "localhost",
|
|
293
|
+
"database": "myapp"
|
|
294
|
+
}]
|
|
295
|
+
})
|
|
296
|
+
|
|
297
|
+
db = config.get_instance("databases", "main")
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
### Factory Configuration Examples
|
|
301
|
+
|
|
302
|
+
```yaml
|
|
303
|
+
# Using registered factory (cleaner)
|
|
304
|
+
databases:
|
|
305
|
+
- name: main
|
|
306
|
+
factory: database
|
|
307
|
+
backend: ${DB_BACKEND:postgres}
|
|
308
|
+
host: ${DB_HOST:localhost}
|
|
309
|
+
|
|
310
|
+
# Using module path (no registration needed)
|
|
311
|
+
databases:
|
|
312
|
+
- name: main
|
|
313
|
+
factory: dataknobs_data.factory.database_factory
|
|
314
|
+
backend: postgres
|
|
315
|
+
host: localhost
|
|
316
|
+
```
|
|
317
|
+
|
|
318
|
+
## Pandas Integration
|
|
319
|
+
|
|
320
|
+
The data package provides comprehensive pandas integration for data analysis workflows:
|
|
321
|
+
|
|
322
|
+
```python
|
|
323
|
+
import pandas as pd
|
|
324
|
+
from dataknobs_data.pandas import DataFrameConverter, BatchOperations
|
|
325
|
+
|
|
326
|
+
# Convert records to DataFrame with type preservation
|
|
327
|
+
converter = DataFrameConverter()
|
|
328
|
+
df = converter.records_to_dataframe(records, preserve_types=True)
|
|
329
|
+
|
|
330
|
+
# Perform pandas operations
|
|
331
|
+
df_filtered = df[df['age'] > 25]
|
|
332
|
+
df_aggregated = df.groupby('category').agg({'price': 'mean'})
|
|
333
|
+
|
|
334
|
+
# Convert back to records
|
|
335
|
+
new_records = converter.dataframe_to_records(df_filtered)
|
|
336
|
+
|
|
337
|
+
# Bulk operations with DataFrames
|
|
338
|
+
batch_ops = BatchOperations(database)
|
|
339
|
+
result = batch_ops.bulk_insert_dataframe(df, batch_size=1000)
|
|
340
|
+
print(f"Inserted {result.successful} records")
|
|
341
|
+
|
|
342
|
+
# Upsert from DataFrame
|
|
343
|
+
result = batch_ops.bulk_upsert_dataframe(
|
|
344
|
+
df,
|
|
345
|
+
id_column="user_id",
|
|
346
|
+
merge_strategy="update"
|
|
347
|
+
)
|
|
348
|
+
```
|
|
349
|
+
|
|
350
|
+
## Schema Validation
|
|
351
|
+
|
|
352
|
+
Define and enforce data schemas with comprehensive validation:
|
|
353
|
+
|
|
354
|
+
```python
|
|
355
|
+
from dataknobs_data.validation import Schema, FieldType
|
|
356
|
+
from dataknobs_data.validation.constraints import *
|
|
357
|
+
|
|
358
|
+
# Define schema with constraints
|
|
359
|
+
user_schema = Schema("UserSchema")
|
|
360
|
+
user_schema.field("email", FieldType.STRING,
|
|
361
|
+
required=True,
|
|
362
|
+
constraints=[Pattern(r"^.+@.+\..+$"), Unique()])
|
|
363
|
+
user_schema.field("age", FieldType.INTEGER,
|
|
364
|
+
constraints=[Range(min=0, max=150)])
|
|
365
|
+
user_schema.field("status", FieldType.STRING,
|
|
366
|
+
default="active",
|
|
367
|
+
constraints=[Enum(["active", "inactive", "suspended"])])
|
|
368
|
+
|
|
369
|
+
# Validate records
|
|
370
|
+
result = user_schema.validate(record)
|
|
371
|
+
if not result.valid:
|
|
372
|
+
for error in result.errors:
|
|
373
|
+
print(error)
|
|
374
|
+
|
|
375
|
+
# Automatic type coercion
|
|
376
|
+
record = Record({"age": "30"}) # String value
|
|
377
|
+
result = user_schema.validate(record, coerce=True) # Converts to int
|
|
378
|
+
if result.valid:
|
|
379
|
+
print(record.get_value("age")) # 30 (as integer)
|
|
380
|
+
```
|
|
381
|
+
|
|
382
|
+
## Data Migration
|
|
383
|
+
|
|
384
|
+
Migrate data between backends with transformation support:
|
|
385
|
+
|
|
386
|
+
```python
|
|
387
|
+
from dataknobs_data.migration import Migration, Migrator
|
|
388
|
+
from dataknobs_data.migration.operations import *
|
|
389
|
+
|
|
390
|
+
# Define migration
|
|
391
|
+
migration = Migration("upgrade_schema", "2.0.0")
|
|
392
|
+
migration.add_operation(AddField("created_at", default=datetime.now()))
|
|
393
|
+
migration.add_operation(RenameField("user_name", "username"))
|
|
394
|
+
migration.add_operation(TransformField("email", lambda x: x.lower()))
|
|
395
|
+
|
|
396
|
+
# Migrate between backends
|
|
397
|
+
async def migrate_data():
|
|
398
|
+
source_db = await Database.create("postgres", postgres_config)
|
|
399
|
+
target_db = await Database.create("s3", s3_config)
|
|
400
|
+
|
|
401
|
+
migrator = Migrator(source_db, target_db)
|
|
402
|
+
|
|
403
|
+
# Run migration with progress tracking
|
|
404
|
+
progress = await migrator.migrate(
|
|
405
|
+
migration=migration,
|
|
406
|
+
batch_size=1000,
|
|
407
|
+
on_progress=lambda p: print(f"Progress: {p.percentage:.1f}%")
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
print(f"Migrated: {progress.successful} records")
|
|
411
|
+
print(f"Failed: {progress.failed} records")
|
|
412
|
+
print(f"Duration: {progress.duration}s")
|
|
413
|
+
|
|
414
|
+
await source_db.close()
|
|
415
|
+
await target_db.close()
|
|
416
|
+
```
|
|
417
|
+
|
|
418
|
+
## Advanced Queries
|
|
419
|
+
|
|
420
|
+
```python
|
|
421
|
+
# Complex query with multiple filters
|
|
422
|
+
query = (Query()
|
|
423
|
+
.filter("status", Operator.IN, ["active", "pending"])
|
|
424
|
+
.filter("created_at", Operator.GTE, "2024-01-01")
|
|
425
|
+
.filter("name", Operator.LIKE, "John%")
|
|
426
|
+
.sort("priority", SortOrder.DESC)
|
|
427
|
+
.sort("created_at", SortOrder.ASC)
|
|
428
|
+
.offset(20)
|
|
429
|
+
.limit(10)
|
|
430
|
+
.select(["name", "email", "status"])) # Select specific fields
|
|
431
|
+
|
|
432
|
+
results = await db.search(query)
|
|
433
|
+
```
|
|
434
|
+
|
|
435
|
+
## Streaming Support
|
|
436
|
+
|
|
437
|
+
```python
|
|
438
|
+
from dataknobs_data import StreamConfig
|
|
439
|
+
|
|
440
|
+
# Stream large datasets efficiently
|
|
441
|
+
config = StreamConfig(
|
|
442
|
+
batch_size=100,
|
|
443
|
+
buffer_size=1000
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
# Stream read
|
|
447
|
+
async for record in db.stream_read(query, config):
|
|
448
|
+
# Process each record without loading all into memory
|
|
449
|
+
process_record(record)
|
|
450
|
+
|
|
451
|
+
# Stream write
|
|
452
|
+
result = await db.stream_write(record_generator(), config)
|
|
453
|
+
print(f"Streamed {result.total_processed} records")
|
|
454
|
+
```
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
## Documentation
|
|
458
|
+
|
|
459
|
+
For complete API documentation, see [API Reference](docs/API_REFERENCE.md).
|
|
460
|
+
|
|
461
|
+
## Custom Backend
|
|
462
|
+
|
|
463
|
+
```python
|
|
464
|
+
from dataknobs_data import AsyncDatabase, DatabaseBackend
|
|
465
|
+
|
|
466
|
+
class CustomBackend(DatabaseBackend):
|
|
467
|
+
def create(self, record):
|
|
468
|
+
# Implementation
|
|
469
|
+
pass
|
|
470
|
+
|
|
471
|
+
def read(self, record_id):
|
|
472
|
+
# Implementation
|
|
473
|
+
pass
|
|
474
|
+
|
|
475
|
+
# ... other methods
|
|
476
|
+
|
|
477
|
+
# Register custom backend
|
|
478
|
+
AsyncDatabase.register_backend("custom", CustomBackend)
|
|
479
|
+
|
|
480
|
+
# Use custom backend
|
|
481
|
+
db = AsyncDatabase.create("custom", config)
|
|
482
|
+
```
|
|
483
|
+
|
|
484
|
+
## Development
|
|
485
|
+
|
|
486
|
+
```bash
|
|
487
|
+
# Install development dependencies
|
|
488
|
+
pip install -e ".[dev]"
|
|
489
|
+
|
|
490
|
+
# Run tests
|
|
491
|
+
pytest
|
|
492
|
+
|
|
493
|
+
# Run tests with coverage
|
|
494
|
+
pytest --cov=dataknobs_data
|
|
495
|
+
|
|
496
|
+
# Type checking
|
|
497
|
+
mypy src/dataknobs_data
|
|
498
|
+
|
|
499
|
+
# Linting
|
|
500
|
+
ruff check src/dataknobs_data
|
|
501
|
+
|
|
502
|
+
# Format code
|
|
503
|
+
black src/dataknobs_data
|
|
504
|
+
```
|
|
505
|
+
|
|
506
|
+
## Architecture
|
|
507
|
+
|
|
508
|
+
The package follows a modular architecture:
|
|
509
|
+
|
|
510
|
+
- **Records**: Data representation with fields and metadata
|
|
511
|
+
- **Database Interface**: Abstract base classes (AsyncDatabase/SyncDatabase) for all backends
|
|
512
|
+
- **Query System**: Backend-agnostic query building
|
|
513
|
+
- **Backends**: Implementations for different storage technologies
|
|
514
|
+
- **Serializers**: Type conversion and format handling
|
|
515
|
+
- **Utils**: Pandas integration and migration tools
|
|
516
|
+
|
|
517
|
+
## Performance
|
|
518
|
+
|
|
519
|
+
The package is designed for optimal performance:
|
|
520
|
+
|
|
521
|
+
- Connection pooling for database backends
|
|
522
|
+
- Batch operations for efficiency
|
|
523
|
+
- Lazy loading and pagination
|
|
524
|
+
- Caching for frequently accessed data
|
|
525
|
+
- Async support for concurrent operations
|
|
526
|
+
|
|
527
|
+
## Contributing
|
|
528
|
+
|
|
529
|
+
Contributions are welcome! Please see our [Contributing Guide](../../CONTRIBUTING.md) for details.
|
|
530
|
+
|
|
531
|
+
## License
|
|
532
|
+
|
|
533
|
+
This project is licensed under the MIT License - see the [LICENSE](../../LICENSE) file for details.
|