datafun-streaming 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/CHANGELOG.md +21 -1
  2. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/PKG-INFO +2 -2
  3. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/README.md +1 -1
  4. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/pyproject.toml +2 -2
  5. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/_version.py +2 -2
  6. datafun_streaming-0.3.0/src/datafun_streaming/storage/duckdb_sql.py +83 -0
  7. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/storage/duckdb_utils.py +0 -10
  8. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/.gitignore +0 -0
  9. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/LICENSE +0 -0
  10. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/__init__.py +0 -0
  11. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/core/__init__.py +0 -0
  12. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/core/types.py +0 -0
  13. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/data_validation/__init__.py +0 -0
  14. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/data_validation/errors.py +0 -0
  15. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/data_validation/reference.py +0 -0
  16. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/data_validation/types.py +0 -0
  17. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/data_validation/validation_utils.py +0 -0
  18. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/io/__init__.py +0 -0
  19. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/io/errors.py +0 -0
  20. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/io/io_utils.py +0 -0
  21. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/kafka/__init__.py +0 -0
  22. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/kafka/errors.py +0 -0
  23. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/kafka/kafka_admin_utils.py +0 -0
  24. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/kafka/kafka_connection_utils.py +0 -0
  25. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/kafka/kafka_consumer_utils.py +0 -0
  26. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/kafka/kafka_producer_utils.py +0 -0
  27. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/kafka/kafka_settings.py +0 -0
  28. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/py.typed +0 -0
  29. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/stats/__init__.py +0 -0
  30. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/stats/stats_utils.py +0 -0
  31. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/storage/__init__.py +0 -0
  32. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/visualization/__init__.py +0 -0
  33. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/visualization/chart_utils.py +0 -0
  34. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/tests/__init__.py +0 -0
  35. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/tests/test_core_types.py +0 -0
  36. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/tests/test_data_validation_reference.py +0 -0
  37. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/tests/test_data_validation_types.py +0 -0
  38. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/tests/test_data_validation_utils.py +0 -0
  39. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/tests/test_duckdb_utils.py +0 -0
  40. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/tests/test_io_utils.py +0 -0
  41. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/tests/test_kafka_error_messages.py +0 -0
  42. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/tests/test_kafka_settings.py +0 -0
  43. {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/tests/test_stats_utils.py +0 -0
@@ -13,6 +13,25 @@ and this project adheres to **[Semantic Versioning](https://semver.org/spec/v2.0
13
13
 
14
14
  ---
15
15
 
16
+ ## [0.3.0] - 2026-05-08
17
+
18
+ ### Added
19
+
20
+ - `datafun_streaming.storage.duckdb_sql` - pure SQL string builder functions
21
+ (`build_create_table_sql`, `build_clear_table_sql`, `build_insert_sql`)
22
+ with no database connection required, fully testable in isolation
23
+ - Tests for all three SQL builder functions in `tests/test_duckdb_sql.py`
24
+
25
+ ### Changed
26
+
27
+ - `upsert_row` now requires caller-supplied `allowed_tables: frozenset[str]`
28
+ parameter - removes the module-level placeholder allowlist and gives
29
+ callers full control over which tables are permitted
30
+ - Removed `_ALLOWED_TABLE_NAMES` placeholder constant from `duckdb_utils.py`
31
+ - pytest `minversion` updated to `9.0`
32
+
33
+ ---
34
+
16
35
  ## [0.2.0] - 2026-05-08
17
36
 
18
37
  ### Changed
@@ -109,7 +128,8 @@ git push origin :refs/tags/vX.Z.Y
109
128
 
110
129
  ## Links
111
130
 
112
- [Unreleased]: https://github.com/denisecase/datafun-streaming/compare/v0.2.0...HEAD
131
+ [Unreleased]: https://github.com/denisecase/datafun-streaming/compare/v0.3.0...HEAD
132
+ [0.3.0]: https://github.com/denisecase/datafun-streaming/releases/tag/v0.3.0
113
133
  [0.2.0]: https://github.com/denisecase/datafun-streaming/releases/tag/v0.2.0
114
134
  [0.1.0]: https://github.com/denisecase/datafun-streaming/releases/tag/v0.1.0
115
135
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datafun-streaming
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: Utilities for streaming data analytics with Kafka and DuckDB.
5
5
  Project-URL: Homepage, https://github.com/denisecase/datafun-streaming
6
6
  Project-URL: Repository, https://github.com/denisecase/datafun-streaming
@@ -43,7 +43,7 @@ Description-Content-Type: text/markdown
43
43
  [![PyPI](https://img.shields.io/pypi/v/datafun-streaming?logo=pypi&label=pypi)](https://pypi.org/project/datafun-streaming/)
44
44
  [![Docs Site](https://img.shields.io/badge/docs-site-blue?logo=github)](https://denisecase.github.io/datafun-streaming/)
45
45
  [![Repo](https://img.shields.io/badge/repo-GitHub-black?logo=github)](https://github.com/denisecase/datafun-streaming)
46
- [![Python 3.15+](https://img.shields.io/badge/python-3.15%2B-blue?logo=python)](https://github.com/denisecase/datafun-streaming/blob/main/pyproject.toml)
46
+ [![Python 3.14+](https://img.shields.io/badge/python-3.14%2B-blue?logo=python)](https://github.com/denisecase/datafun-streaming/blob/main/pyproject.toml)
47
47
  [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
48
48
 
49
49
  [![CI](https://github.com/denisecase/datafun-streaming/actions/workflows/ci-python-zensical.yml/badge.svg?branch=main)](https://github.com/denisecase/datafun-streaming/actions/workflows/ci-python-zensical.yml)
@@ -4,7 +4,7 @@
4
4
  [![PyPI](https://img.shields.io/pypi/v/datafun-streaming?logo=pypi&label=pypi)](https://pypi.org/project/datafun-streaming/)
5
5
  [![Docs Site](https://img.shields.io/badge/docs-site-blue?logo=github)](https://denisecase.github.io/datafun-streaming/)
6
6
  [![Repo](https://img.shields.io/badge/repo-GitHub-black?logo=github)](https://github.com/denisecase/datafun-streaming)
7
- [![Python 3.15+](https://img.shields.io/badge/python-3.15%2B-blue?logo=python)](https://github.com/denisecase/datafun-streaming/blob/main/pyproject.toml)
7
+ [![Python 3.14+](https://img.shields.io/badge/python-3.14%2B-blue?logo=python)](https://github.com/denisecase/datafun-streaming/blob/main/pyproject.toml)
8
8
  [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
9
9
 
10
10
  [![CI](https://github.com/denisecase/datafun-streaming/actions/workflows/ci-python-zensical.yml/badge.svg?branch=main)](https://github.com/denisecase/datafun-streaming/actions/workflows/ci-python-zensical.yml)
@@ -110,7 +110,7 @@ reportCallIssue = "none"
110
110
 
111
111
  [tool.pytest.ini_options]
112
112
  # WHY: Consistent test discovery and coverage visibility.
113
- minversion = "7.0"
113
+ minversion = "9.0"
114
114
  testpaths = ["tests"]
115
115
  addopts = "--cov=datafun_streaming --cov-report=term-missing --cov-fail-under=50"
116
116
 
@@ -226,5 +226,5 @@ packages = ["src/datafun_streaming"] # REQ.PACKAGES: Discovery rooted at src/.
226
226
  [tool.hatch.version]
227
227
  # WHY: Version derived from git tags at build time, no manual source file edits.
228
228
  source = "vcs"
229
- fallback-version = "0.2.0" # Used when no git tags present (fresh clone, CI).
229
+ fallback-version = "0.3.0" # Used when no git tags present (fresh clone, CI).
230
230
  tag-pattern = "^(?:v)?(?P<version>\\d+\\.\\d+\\.\\d+)$"
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
18
18
  commit_id: str | None
19
19
  __commit_id__: str | None
20
20
 
21
- __version__ = version = '0.2.0'
22
- __version_tuple__ = version_tuple = (0, 2, 0)
21
+ __version__ = version = '0.3.0'
22
+ __version_tuple__ = version_tuple = (0, 3, 0)
23
23
 
24
24
  __commit_id__ = commit_id = None
@@ -0,0 +1,83 @@
1
+ """src/datafun_streaming/storage/duckdb_sql.py.
2
+
3
+ Pure SQL string builders for DuckDB streaming tables.
4
+
5
+ These functions build SQL statements as strings.
6
+ They require no database connection and have no side effects.
7
+ All use module-level constant table names, never raw user input.
8
+
9
+ Author: Denise Case
10
+ Date: 2026-05
11
+ """
12
+
13
+ # === EXPORTS ===
14
+
15
+ __all__ = [
16
+ "build_create_table_sql",
17
+ "build_clear_table_sql",
18
+ "build_insert_sql",
19
+ ]
20
+
21
+
22
+ # === DEFINE SQL BUILDER FUNCTIONS ===
23
+
24
+
25
+ def build_create_table_sql(table_name: str, fieldnames: list[str]) -> str:
26
+ """Build a DuckDB CREATE TABLE IF NOT EXISTS statement.
27
+
28
+ All columns are declared as VARCHAR.
29
+ Use this to initialize tables before writing records.
30
+
31
+ Arguments:
32
+ table_name: The table to create.
33
+ fieldnames: The field names to include as VARCHAR columns.
34
+
35
+ Returns:
36
+ A CREATE TABLE IF NOT EXISTS SQL string.
37
+
38
+ Example:
39
+ sql = build_create_table_sql("sales", ["order_id", "region_id"])
40
+ connection.execute(sql)
41
+ """
42
+ columns = ", ".join(f"{field} VARCHAR" for field in fieldnames)
43
+ return f"CREATE TABLE IF NOT EXISTS {table_name} ({columns})" # noqa: S608 - caller responsible for validated table name
44
+
45
+
46
+ def build_clear_table_sql(table_name: str) -> str:
47
+ """Build a DuckDB DELETE statement to clear all rows from a table.
48
+
49
+ Use this at the start of a consumer run to reset the table.
50
+
51
+ Arguments:
52
+ table_name: The table to clear.
53
+
54
+ Returns:
55
+ A DELETE FROM SQL string.
56
+
57
+ Example:
58
+ sql = build_clear_table_sql("sales")
59
+ connection.execute(sql)
60
+ """
61
+ return f"DELETE FROM {table_name}" # noqa: S608 - caller responsible for validated table name
62
+
63
+
64
+ def build_insert_sql(table_name: str, fieldnames: list[str]) -> str:
65
+ """Build a DuckDB INSERT statement with ? parameter placeholders.
66
+
67
+ The returned SQL expects one ? per field, passed as a list of values
68
+ to connection.execute().
69
+
70
+ Arguments:
71
+ table_name: The table to insert into.
72
+ fieldnames: The field names to insert.
73
+
74
+ Returns:
75
+ An INSERT INTO SQL string with ? placeholders.
76
+
77
+ Example:
78
+ sql = build_insert_sql("sales", ["order_id", "region_id"])
79
+ connection.execute(sql, ["S001", "US-MO"])
80
+ """
81
+ columns = ", ".join(fieldnames)
82
+ placeholders = ", ".join("?" for _ in fieldnames)
83
+ return f"INSERT INTO {table_name} ({columns}) VALUES ({placeholders})" # noqa: S608 - caller responsible for validated table name
@@ -45,16 +45,6 @@ _DUCKDB_TYPE_MAP: dict[type, str] = {
45
45
  bool: "BOOLEAN",
46
46
  }
47
47
 
48
- # === DECLARE SQL-SAFE TABLE NAMES ===
49
-
50
- _ALLOWED_TABLE_NAMES: frozenset[str] = frozenset(
51
- {
52
- "valid_table",
53
- "rejected_table",
54
- }
55
- )
56
-
57
-
58
48
  # === DEFINE FUNCTIONS ===
59
49
 
60
50