datafun-streaming 0.2.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/CHANGELOG.md +21 -1
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/PKG-INFO +2 -2
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/README.md +1 -1
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/pyproject.toml +2 -2
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/_version.py +2 -2
- datafun_streaming-0.3.0/src/datafun_streaming/storage/duckdb_sql.py +83 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/storage/duckdb_utils.py +0 -10
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/.gitignore +0 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/LICENSE +0 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/__init__.py +0 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/core/__init__.py +0 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/core/types.py +0 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/data_validation/__init__.py +0 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/data_validation/errors.py +0 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/data_validation/reference.py +0 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/data_validation/types.py +0 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/data_validation/validation_utils.py +0 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/io/__init__.py +0 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/io/errors.py +0 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/io/io_utils.py +0 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/kafka/__init__.py +0 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/kafka/errors.py +0 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/kafka/kafka_admin_utils.py +0 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/kafka/kafka_connection_utils.py +0 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/kafka/kafka_consumer_utils.py +0 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/kafka/kafka_producer_utils.py +0 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/kafka/kafka_settings.py +0 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/py.typed +0 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/stats/__init__.py +0 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/stats/stats_utils.py +0 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/storage/__init__.py +0 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/visualization/__init__.py +0 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/visualization/chart_utils.py +0 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/tests/__init__.py +0 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/tests/test_core_types.py +0 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/tests/test_data_validation_reference.py +0 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/tests/test_data_validation_types.py +0 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/tests/test_data_validation_utils.py +0 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/tests/test_duckdb_utils.py +0 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/tests/test_io_utils.py +0 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/tests/test_kafka_error_messages.py +0 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/tests/test_kafka_settings.py +0 -0
- {datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/tests/test_stats_utils.py +0 -0
|
@@ -13,6 +13,25 @@ and this project adheres to **[Semantic Versioning](https://semver.org/spec/v2.0
|
|
|
13
13
|
|
|
14
14
|
---
|
|
15
15
|
|
|
16
|
+
## [0.3.0] - 2026-05-08
|
|
17
|
+
|
|
18
|
+
### Added
|
|
19
|
+
|
|
20
|
+
- `datafun_streaming.storage.duckdb_sql` - pure SQL string builder functions
|
|
21
|
+
(`build_create_table_sql`, `build_clear_table_sql`, `build_insert_sql`)
|
|
22
|
+
with no database connection required, fully testable in isolation
|
|
23
|
+
- Tests for all three SQL builder functions in `tests/test_duckdb_sql.py`
|
|
24
|
+
|
|
25
|
+
### Changed
|
|
26
|
+
|
|
27
|
+
- `upsert_row` now requires caller-supplied `allowed_tables: frozenset[str]`
|
|
28
|
+
parameter - removes the module-level placeholder allowlist and gives
|
|
29
|
+
callers full control over which tables are permitted
|
|
30
|
+
- Removed `_ALLOWED_TABLE_NAMES` placeholder constant from `duckdb_utils.py`
|
|
31
|
+
- pytest `minversion` updated to `9.0`
|
|
32
|
+
|
|
33
|
+
---
|
|
34
|
+
|
|
16
35
|
## [0.2.0] - 2026-05-08
|
|
17
36
|
|
|
18
37
|
### Changed
|
|
@@ -109,7 +128,8 @@ git push origin :refs/tags/vX.Z.Y
|
|
|
109
128
|
|
|
110
129
|
## Links
|
|
111
130
|
|
|
112
|
-
[Unreleased]: https://github.com/denisecase/datafun-streaming/compare/v0.
|
|
131
|
+
[Unreleased]: https://github.com/denisecase/datafun-streaming/compare/v0.3.0...HEAD
|
|
132
|
+
[0.3.0]: https://github.com/denisecase/datafun-streaming/releases/tag/v0.3.0
|
|
113
133
|
[0.2.0]: https://github.com/denisecase/datafun-streaming/releases/tag/v0.2.0
|
|
114
134
|
[0.1.0]: https://github.com/denisecase/datafun-streaming/releases/tag/v0.1.0
|
|
115
135
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datafun-streaming
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Utilities for streaming data analytics with Kafka and DuckDB.
|
|
5
5
|
Project-URL: Homepage, https://github.com/denisecase/datafun-streaming
|
|
6
6
|
Project-URL: Repository, https://github.com/denisecase/datafun-streaming
|
|
@@ -43,7 +43,7 @@ Description-Content-Type: text/markdown
|
|
|
43
43
|
[](https://pypi.org/project/datafun-streaming/)
|
|
44
44
|
[](https://denisecase.github.io/datafun-streaming/)
|
|
45
45
|
[](https://github.com/denisecase/datafun-streaming)
|
|
46
|
-
[](https://github.com/denisecase/datafun-streaming/blob/main/pyproject.toml)
|
|
47
47
|
[](LICENSE)
|
|
48
48
|
|
|
49
49
|
[](https://github.com/denisecase/datafun-streaming/actions/workflows/ci-python-zensical.yml)
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
[](https://pypi.org/project/datafun-streaming/)
|
|
5
5
|
[](https://denisecase.github.io/datafun-streaming/)
|
|
6
6
|
[](https://github.com/denisecase/datafun-streaming)
|
|
7
|
-
[](https://github.com/denisecase/datafun-streaming/blob/main/pyproject.toml)
|
|
8
8
|
[](LICENSE)
|
|
9
9
|
|
|
10
10
|
[](https://github.com/denisecase/datafun-streaming/actions/workflows/ci-python-zensical.yml)
|
|
@@ -110,7 +110,7 @@ reportCallIssue = "none"
|
|
|
110
110
|
|
|
111
111
|
[tool.pytest.ini_options]
|
|
112
112
|
# WHY: Consistent test discovery and coverage visibility.
|
|
113
|
-
minversion = "
|
|
113
|
+
minversion = "9.0"
|
|
114
114
|
testpaths = ["tests"]
|
|
115
115
|
addopts = "--cov=datafun_streaming --cov-report=term-missing --cov-fail-under=50"
|
|
116
116
|
|
|
@@ -226,5 +226,5 @@ packages = ["src/datafun_streaming"] # REQ.PACKAGES: Discovery rooted at src/.
|
|
|
226
226
|
[tool.hatch.version]
|
|
227
227
|
# WHY: Version derived from git tags at build time, no manual source file edits.
|
|
228
228
|
source = "vcs"
|
|
229
|
-
fallback-version = "0.
|
|
229
|
+
fallback-version = "0.3.0" # Used when no git tags present (fresh clone, CI).
|
|
230
230
|
tag-pattern = "^(?:v)?(?P<version>\\d+\\.\\d+\\.\\d+)$"
|
|
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
|
|
|
18
18
|
commit_id: str | None
|
|
19
19
|
__commit_id__: str | None
|
|
20
20
|
|
|
21
|
-
__version__ = version = '0.
|
|
22
|
-
__version_tuple__ = version_tuple = (0,
|
|
21
|
+
__version__ = version = '0.3.0'
|
|
22
|
+
__version_tuple__ = version_tuple = (0, 3, 0)
|
|
23
23
|
|
|
24
24
|
__commit_id__ = commit_id = None
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""src/datafun_streaming/storage/duckdb_sql.py.
|
|
2
|
+
|
|
3
|
+
Pure SQL string builders for DuckDB streaming tables.
|
|
4
|
+
|
|
5
|
+
These functions build SQL statements as strings.
|
|
6
|
+
They require no database connection and have no side effects.
|
|
7
|
+
All use module-level constant table names, never raw user input.
|
|
8
|
+
|
|
9
|
+
Author: Denise Case
|
|
10
|
+
Date: 2026-05
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
# === EXPORTS ===
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"build_create_table_sql",
|
|
17
|
+
"build_clear_table_sql",
|
|
18
|
+
"build_insert_sql",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# === DEFINE SQL BUILDER FUNCTIONS ===
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def build_create_table_sql(table_name: str, fieldnames: list[str]) -> str:
|
|
26
|
+
"""Build a DuckDB CREATE TABLE IF NOT EXISTS statement.
|
|
27
|
+
|
|
28
|
+
All columns are declared as VARCHAR.
|
|
29
|
+
Use this to initialize tables before writing records.
|
|
30
|
+
|
|
31
|
+
Arguments:
|
|
32
|
+
table_name: The table to create.
|
|
33
|
+
fieldnames: The field names to include as VARCHAR columns.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
A CREATE TABLE IF NOT EXISTS SQL string.
|
|
37
|
+
|
|
38
|
+
Example:
|
|
39
|
+
sql = build_create_table_sql("sales", ["order_id", "region_id"])
|
|
40
|
+
connection.execute(sql)
|
|
41
|
+
"""
|
|
42
|
+
columns = ", ".join(f"{field} VARCHAR" for field in fieldnames)
|
|
43
|
+
return f"CREATE TABLE IF NOT EXISTS {table_name} ({columns})" # noqa: S608 - caller responsible for validated table name
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def build_clear_table_sql(table_name: str) -> str:
|
|
47
|
+
"""Build a DuckDB DELETE statement to clear all rows from a table.
|
|
48
|
+
|
|
49
|
+
Use this at the start of a consumer run to reset the table.
|
|
50
|
+
|
|
51
|
+
Arguments:
|
|
52
|
+
table_name: The table to clear.
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
A DELETE FROM SQL string.
|
|
56
|
+
|
|
57
|
+
Example:
|
|
58
|
+
sql = build_clear_table_sql("sales")
|
|
59
|
+
connection.execute(sql)
|
|
60
|
+
"""
|
|
61
|
+
return f"DELETE FROM {table_name}" # noqa: S608 - caller responsible for validated table name
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def build_insert_sql(table_name: str, fieldnames: list[str]) -> str:
|
|
65
|
+
"""Build a DuckDB INSERT statement with ? parameter placeholders.
|
|
66
|
+
|
|
67
|
+
The returned SQL expects one ? per field, passed as a list of values
|
|
68
|
+
to connection.execute().
|
|
69
|
+
|
|
70
|
+
Arguments:
|
|
71
|
+
table_name: The table to insert into.
|
|
72
|
+
fieldnames: The field names to insert.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
An INSERT INTO SQL string with ? placeholders.
|
|
76
|
+
|
|
77
|
+
Example:
|
|
78
|
+
sql = build_insert_sql("sales", ["order_id", "region_id"])
|
|
79
|
+
connection.execute(sql, ["S001", "US-MO"])
|
|
80
|
+
"""
|
|
81
|
+
columns = ", ".join(fieldnames)
|
|
82
|
+
placeholders = ", ".join("?" for _ in fieldnames)
|
|
83
|
+
return f"INSERT INTO {table_name} ({columns}) VALUES ({placeholders})" # noqa: S608 - caller responsible for validated table name
|
{datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/storage/duckdb_utils.py
RENAMED
|
@@ -45,16 +45,6 @@ _DUCKDB_TYPE_MAP: dict[type, str] = {
|
|
|
45
45
|
bool: "BOOLEAN",
|
|
46
46
|
}
|
|
47
47
|
|
|
48
|
-
# === DECLARE SQL-SAFE TABLE NAMES ===
|
|
49
|
-
|
|
50
|
-
_ALLOWED_TABLE_NAMES: frozenset[str] = frozenset(
|
|
51
|
-
{
|
|
52
|
-
"valid_table",
|
|
53
|
-
"rejected_table",
|
|
54
|
-
}
|
|
55
|
-
)
|
|
56
|
-
|
|
57
|
-
|
|
58
48
|
# === DEFINE FUNCTIONS ===
|
|
59
49
|
|
|
60
50
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/data_validation/errors.py
RENAMED
|
File without changes
|
|
File without changes
|
{datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/data_validation/types.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/kafka/kafka_admin_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/kafka/kafka_settings.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/stats/stats_utils.py
RENAMED
|
File without changes
|
{datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/storage/__init__.py
RENAMED
|
File without changes
|
{datafun_streaming-0.2.0 → datafun_streaming-0.3.0}/src/datafun_streaming/visualization/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|