datafun-streaming 0.1.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/CHANGELOG.md +32 -3
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/PKG-INFO +2 -67
- datafun_streaming-0.3.0/README.md +64 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/pyproject.toml +2 -2
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/src/datafun_streaming/_version.py +2 -2
- datafun_streaming-0.3.0/src/datafun_streaming/storage/duckdb_sql.py +83 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/src/datafun_streaming/storage/duckdb_utils.py +0 -10
- datafun_streaming-0.1.0/README.md +0 -129
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/.gitignore +0 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/LICENSE +0 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/src/datafun_streaming/__init__.py +0 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/src/datafun_streaming/core/__init__.py +0 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/src/datafun_streaming/core/types.py +0 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/src/datafun_streaming/data_validation/__init__.py +0 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/src/datafun_streaming/data_validation/errors.py +0 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/src/datafun_streaming/data_validation/reference.py +0 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/src/datafun_streaming/data_validation/types.py +0 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/src/datafun_streaming/data_validation/validation_utils.py +0 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/src/datafun_streaming/io/__init__.py +0 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/src/datafun_streaming/io/errors.py +0 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/src/datafun_streaming/io/io_utils.py +0 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/src/datafun_streaming/kafka/__init__.py +0 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/src/datafun_streaming/kafka/errors.py +0 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/src/datafun_streaming/kafka/kafka_admin_utils.py +0 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/src/datafun_streaming/kafka/kafka_connection_utils.py +0 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/src/datafun_streaming/kafka/kafka_consumer_utils.py +0 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/src/datafun_streaming/kafka/kafka_producer_utils.py +0 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/src/datafun_streaming/kafka/kafka_settings.py +0 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/src/datafun_streaming/py.typed +0 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/src/datafun_streaming/stats/__init__.py +0 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/src/datafun_streaming/stats/stats_utils.py +0 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/src/datafun_streaming/storage/__init__.py +0 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/src/datafun_streaming/visualization/__init__.py +0 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/src/datafun_streaming/visualization/chart_utils.py +0 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/tests/__init__.py +0 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/tests/test_core_types.py +0 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/tests/test_data_validation_reference.py +0 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/tests/test_data_validation_types.py +0 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/tests/test_data_validation_utils.py +0 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/tests/test_duckdb_utils.py +0 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/tests/test_io_utils.py +0 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/tests/test_kafka_error_messages.py +0 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/tests/test_kafka_settings.py +0 -0
- {datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/tests/test_stats_utils.py +0 -0
|
@@ -13,6 +13,34 @@ and this project adheres to **[Semantic Versioning](https://semver.org/spec/v2.0
|
|
|
13
13
|
|
|
14
14
|
---
|
|
15
15
|
|
|
16
|
+
## [0.3.0] - 2026-05-08
|
|
17
|
+
|
|
18
|
+
### Added
|
|
19
|
+
|
|
20
|
+
- `datafun_streaming.storage.duckdb_sql` - pure SQL string builder functions
|
|
21
|
+
(`build_create_table_sql`, `build_clear_table_sql`, `build_insert_sql`)
|
|
22
|
+
with no database connection required, fully testable in isolation
|
|
23
|
+
- Tests for all three SQL builder functions in `tests/test_duckdb_sql.py`
|
|
24
|
+
|
|
25
|
+
### Changed
|
|
26
|
+
|
|
27
|
+
- `upsert_row` now requires caller-supplied `allowed_tables: frozenset[str]`
|
|
28
|
+
parameter - removes the module-level placeholder allowlist and gives
|
|
29
|
+
callers full control over which tables are permitted
|
|
30
|
+
- Removed `_ALLOWED_TABLE_NAMES` placeholder constant from `duckdb_utils.py`
|
|
31
|
+
- pytest `minversion` updated to `9.0`
|
|
32
|
+
|
|
33
|
+
---
|
|
34
|
+
|
|
35
|
+
## [0.2.0] - 2026-05-08
|
|
36
|
+
|
|
37
|
+
### Changed
|
|
38
|
+
|
|
39
|
+
- updated README.md
|
|
40
|
+
- added `npx markdownlint-cli2 --fix` to pre-commit
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
16
44
|
## [0.1.0] - 2026-05-08
|
|
17
45
|
|
|
18
46
|
### Added
|
|
@@ -62,7 +90,7 @@ Follow these steps exactly when creating a new release.
|
|
|
62
90
|
1.2. CHANGELOG.md: add section, move unreleased entries, update links
|
|
63
91
|
1.3. pyproject.toml - update [tool.hatch.version].fallback-version (near the end)
|
|
64
92
|
|
|
65
|
-
### Task 2.
|
|
93
|
+
### Task 2. Validate
|
|
66
94
|
|
|
67
95
|
```shell
|
|
68
96
|
uv sync --extra dev --extra docs --upgrade
|
|
@@ -72,7 +100,6 @@ uv run python -m pyright
|
|
|
72
100
|
uv run python -m pytest
|
|
73
101
|
uv run python -m zensical build
|
|
74
102
|
uvx validate-pyproject[all] pyproject.toml
|
|
75
|
-
|
|
76
103
|
uv build
|
|
77
104
|
uv run python -m twine check dist/\*
|
|
78
105
|
```
|
|
@@ -101,7 +128,9 @@ git push origin :refs/tags/vX.Z.Y
|
|
|
101
128
|
|
|
102
129
|
## Links
|
|
103
130
|
|
|
104
|
-
[Unreleased]: https://github.com/denisecase/datafun-streaming/compare/v0.
|
|
131
|
+
[Unreleased]: https://github.com/denisecase/datafun-streaming/compare/v0.3.0...HEAD
|
|
132
|
+
[0.3.0]: https://github.com/denisecase/datafun-streaming/releases/tag/v0.3.0
|
|
133
|
+
[0.2.0]: https://github.com/denisecase/datafun-streaming/releases/tag/v0.2.0
|
|
105
134
|
[0.1.0]: https://github.com/denisecase/datafun-streaming/releases/tag/v0.1.0
|
|
106
135
|
|
|
107
136
|
<!-- markdownlint-enable MD024 -->
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datafun-streaming
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Utilities for streaming data analytics with Kafka and DuckDB.
|
|
5
5
|
Project-URL: Homepage, https://github.com/denisecase/datafun-streaming
|
|
6
6
|
Project-URL: Repository, https://github.com/denisecase/datafun-streaming
|
|
@@ -43,7 +43,7 @@ Description-Content-Type: text/markdown
|
|
|
43
43
|
[](https://pypi.org/project/datafun-streaming/)
|
|
44
44
|
[](https://denisecase.github.io/datafun-streaming/)
|
|
45
45
|
[](https://github.com/denisecase/datafun-streaming)
|
|
46
|
-
[](https://github.com/denisecase/datafun-streaming/blob/main/pyproject.toml)
|
|
47
47
|
[](LICENSE)
|
|
48
48
|
|
|
49
49
|
[](https://github.com/denisecase/datafun-streaming/actions/workflows/ci-python-zensical.yml)
|
|
@@ -101,68 +101,3 @@ git push -u origin main
|
|
|
101
101
|
```
|
|
102
102
|
|
|
103
103
|
</details>
|
|
104
|
-
|
|
105
|
-
## Notes
|
|
106
|
-
|
|
107
|
-
- Use the **UP ARROW** and **DOWN ARROW** in the terminal to scroll through past commands.
|
|
108
|
-
- Use `CTRL+f` to find (and replace) text within a file.
|
|
109
|
-
- You do not need to add to or modify `tests/`. They are provided for example only.
|
|
110
|
-
- Many files are silent helpers. Explore as you like, but nothing is required.
|
|
111
|
-
- You do NOT not to understand everything; understanding builds naturally over time.
|
|
112
|
-
|
|
113
|
-
## Troubleshooting >>> or
|
|
114
|
-
|
|
115
|
-
If you see something like this in your terminal: `>>>` or `...`
|
|
116
|
-
You accidentally started Python interactive mode.
|
|
117
|
-
It happens.
|
|
118
|
-
Press `Ctrl+c` (both keys together) or `Ctrl+Z` then `Enter` on Windows.
|
|
119
|
-
|
|
120
|
-
## Example Output
|
|
121
|
-
|
|
122
|
-
```shell
|
|
123
|
-
| INFO | P01 | ========================
|
|
124
|
-
| INFO | P01 | START main()
|
|
125
|
-
| INFO | P01 | ========================
|
|
126
|
-
| INFO | P01 | ROOT_DIR = .
|
|
127
|
-
| INFO | P01 | DATA_DIR = data
|
|
128
|
-
| INFO | P01 | OUTPUT_CSV = data\sales.csv
|
|
129
|
-
| INFO | P01 | Streaming 3 sales to C:\Repos\streaming\datafun-streaming\data\sales.csv ...
|
|
130
|
-
| INFO | P01 | Watch each sale arrive. Press CTRL+C to stop early.
|
|
131
|
-
|
|
132
|
-
| INFO | P01 | (1, 81.87, 'Backpack', 'East')
|
|
133
|
-
| INFO | P01 | Generated formatted multi-line SUMMARY string.
|
|
134
|
-
| INFO | P01 | Returning the str to the calling function.
|
|
135
|
-
| INFO | P01 |
|
|
136
|
-
Descriptive Statistics for Streaming Sales Amounts ($):
|
|
137
|
-
Count of sales : 1
|
|
138
|
-
Minimum sale : $81.87
|
|
139
|
-
Maximum sale : $81.87
|
|
140
|
-
Average sale : $81.87
|
|
141
|
-
Standard deviation: $0.00
|
|
142
|
-
|
|
143
|
-
| INFO | P01 | (2, 101.58, 'Water Bottle', 'North')
|
|
144
|
-
| INFO | P01 | Generated formatted multi-line SUMMARY string.
|
|
145
|
-
| INFO | P01 | Returning the str to the calling function.
|
|
146
|
-
| INFO | P01 |
|
|
147
|
-
Descriptive Statistics for Streaming Sales Amounts ($):
|
|
148
|
-
Count of sales : 2
|
|
149
|
-
Minimum sale : $81.87
|
|
150
|
-
Maximum sale : $101.58
|
|
151
|
-
Average sale : $91.72
|
|
152
|
-
Standard deviation: $13.94
|
|
153
|
-
|
|
154
|
-
| INFO | P01 | (3, 27.15, 'Running Shoes', 'East')
|
|
155
|
-
| INFO | P01 | Generated formatted multi-line SUMMARY string.
|
|
156
|
-
| INFO | P01 | Returning the str to the calling function.
|
|
157
|
-
| INFO | P01 |
|
|
158
|
-
Descriptive Statistics for Streaming Sales Amounts ($):
|
|
159
|
-
Count of sales : 3
|
|
160
|
-
Minimum sale : $27.15
|
|
161
|
-
Maximum sale : $101.58
|
|
162
|
-
Average sale : $70.20
|
|
163
|
-
Standard deviation: $38.56
|
|
164
|
-
|
|
165
|
-
| INFO | P01 | ========================
|
|
166
|
-
| INFO | P01 | Producer executed successfully!
|
|
167
|
-
| INFO | P01 | ========================
|
|
168
|
-
```
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# datafun-streaming
|
|
2
|
+
|
|
3
|
+
[](https://github.com/denisecase)
|
|
4
|
+
[](https://pypi.org/project/datafun-streaming/)
|
|
5
|
+
[](https://denisecase.github.io/datafun-streaming/)
|
|
6
|
+
[](https://github.com/denisecase/datafun-streaming)
|
|
7
|
+
[](https://github.com/denisecase/datafun-streaming/blob/main/pyproject.toml)
|
|
8
|
+
[](LICENSE)
|
|
9
|
+
|
|
10
|
+
[](https://github.com/denisecase/datafun-streaming/actions/workflows/ci-python-zensical.yml)
|
|
11
|
+
[](https://github.com/denisecase/datafun-streaming/actions/workflows/deploy-zensical.yml)
|
|
12
|
+
[](https://github.com/denisecase/datafun-streaming/actions/workflows/links.yml)
|
|
13
|
+
|
|
14
|
+
> Shared Python utilities for Kafka, DuckDB, validation, stats, and visualization
|
|
15
|
+
> across streaming data analytics projects.
|
|
16
|
+
|
|
17
|
+
## Command Reference
|
|
18
|
+
|
|
19
|
+
<details>
|
|
20
|
+
<summary>Show command reference</summary>
|
|
21
|
+
|
|
22
|
+
### In a machine terminal
|
|
23
|
+
|
|
24
|
+
Open a machine terminal where you want the project:
|
|
25
|
+
|
|
26
|
+
```shell
|
|
27
|
+
git clone https://github.com/denisecase/datafun-streaming
|
|
28
|
+
|
|
29
|
+
cd datafun-streaming
|
|
30
|
+
code .
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
### In a VS Code terminal
|
|
34
|
+
|
|
35
|
+
```shell
|
|
36
|
+
# reset uv cache only after suspected cache corruption or strange dependency errors
|
|
37
|
+
# uv cache clean
|
|
38
|
+
|
|
39
|
+
uv self update
|
|
40
|
+
uv python pin 3.14
|
|
41
|
+
uv sync --extra dev --extra docs --upgrade
|
|
42
|
+
|
|
43
|
+
uvx pre-commit install
|
|
44
|
+
|
|
45
|
+
git add -A
|
|
46
|
+
uvx pre-commit run --all-files
|
|
47
|
+
# repeat if changes were made
|
|
48
|
+
git add -A
|
|
49
|
+
uvx pre-commit run --all-files
|
|
50
|
+
|
|
51
|
+
# do chores
|
|
52
|
+
uv run python -m ruff format .
|
|
53
|
+
uv run python -m ruff check . --fix
|
|
54
|
+
uv run python -m pyright
|
|
55
|
+
uv run python -m pytest
|
|
56
|
+
uv run python -m zensical build
|
|
57
|
+
|
|
58
|
+
# save progress
|
|
59
|
+
git add -A
|
|
60
|
+
git commit -m "update"
|
|
61
|
+
git push -u origin main
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
</details>
|
|
@@ -110,7 +110,7 @@ reportCallIssue = "none"
|
|
|
110
110
|
|
|
111
111
|
[tool.pytest.ini_options]
|
|
112
112
|
# WHY: Consistent test discovery and coverage visibility.
|
|
113
|
-
minversion = "
|
|
113
|
+
minversion = "9.0"
|
|
114
114
|
testpaths = ["tests"]
|
|
115
115
|
addopts = "--cov=datafun_streaming --cov-report=term-missing --cov-fail-under=50"
|
|
116
116
|
|
|
@@ -226,5 +226,5 @@ packages = ["src/datafun_streaming"] # REQ.PACKAGES: Discovery rooted at src/.
|
|
|
226
226
|
[tool.hatch.version]
|
|
227
227
|
# WHY: Version derived from git tags at build time, no manual source file edits.
|
|
228
228
|
source = "vcs"
|
|
229
|
-
fallback-version = "0.
|
|
229
|
+
fallback-version = "0.3.0" # Used when no git tags present (fresh clone, CI).
|
|
230
230
|
tag-pattern = "^(?:v)?(?P<version>\\d+\\.\\d+\\.\\d+)$"
|
|
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
|
|
|
18
18
|
commit_id: str | None
|
|
19
19
|
__commit_id__: str | None
|
|
20
20
|
|
|
21
|
-
__version__ = version = '0.
|
|
22
|
-
__version_tuple__ = version_tuple = (0,
|
|
21
|
+
__version__ = version = '0.3.0'
|
|
22
|
+
__version_tuple__ = version_tuple = (0, 3, 0)
|
|
23
23
|
|
|
24
24
|
__commit_id__ = commit_id = None
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""src/datafun_streaming/storage/duckdb_sql.py.
|
|
2
|
+
|
|
3
|
+
Pure SQL string builders for DuckDB streaming tables.
|
|
4
|
+
|
|
5
|
+
These functions build SQL statements as strings.
|
|
6
|
+
They require no database connection and have no side effects.
|
|
7
|
+
All use module-level constant table names, never raw user input.
|
|
8
|
+
|
|
9
|
+
Author: Denise Case
|
|
10
|
+
Date: 2026-05
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
# === EXPORTS ===
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"build_create_table_sql",
|
|
17
|
+
"build_clear_table_sql",
|
|
18
|
+
"build_insert_sql",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# === DEFINE SQL BUILDER FUNCTIONS ===
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def build_create_table_sql(table_name: str, fieldnames: list[str]) -> str:
|
|
26
|
+
"""Build a DuckDB CREATE TABLE IF NOT EXISTS statement.
|
|
27
|
+
|
|
28
|
+
All columns are declared as VARCHAR.
|
|
29
|
+
Use this to initialize tables before writing records.
|
|
30
|
+
|
|
31
|
+
Arguments:
|
|
32
|
+
table_name: The table to create.
|
|
33
|
+
fieldnames: The field names to include as VARCHAR columns.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
A CREATE TABLE IF NOT EXISTS SQL string.
|
|
37
|
+
|
|
38
|
+
Example:
|
|
39
|
+
sql = build_create_table_sql("sales", ["order_id", "region_id"])
|
|
40
|
+
connection.execute(sql)
|
|
41
|
+
"""
|
|
42
|
+
columns = ", ".join(f"{field} VARCHAR" for field in fieldnames)
|
|
43
|
+
return f"CREATE TABLE IF NOT EXISTS {table_name} ({columns})" # noqa: S608 - caller responsible for validated table name
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def build_clear_table_sql(table_name: str) -> str:
|
|
47
|
+
"""Build a DuckDB DELETE statement to clear all rows from a table.
|
|
48
|
+
|
|
49
|
+
Use this at the start of a consumer run to reset the table.
|
|
50
|
+
|
|
51
|
+
Arguments:
|
|
52
|
+
table_name: The table to clear.
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
A DELETE FROM SQL string.
|
|
56
|
+
|
|
57
|
+
Example:
|
|
58
|
+
sql = build_clear_table_sql("sales")
|
|
59
|
+
connection.execute(sql)
|
|
60
|
+
"""
|
|
61
|
+
return f"DELETE FROM {table_name}" # noqa: S608 - caller responsible for validated table name
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def build_insert_sql(table_name: str, fieldnames: list[str]) -> str:
|
|
65
|
+
"""Build a DuckDB INSERT statement with ? parameter placeholders.
|
|
66
|
+
|
|
67
|
+
The returned SQL expects one ? per field, passed as a list of values
|
|
68
|
+
to connection.execute().
|
|
69
|
+
|
|
70
|
+
Arguments:
|
|
71
|
+
table_name: The table to insert into.
|
|
72
|
+
fieldnames: The field names to insert.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
An INSERT INTO SQL string with ? placeholders.
|
|
76
|
+
|
|
77
|
+
Example:
|
|
78
|
+
sql = build_insert_sql("sales", ["order_id", "region_id"])
|
|
79
|
+
connection.execute(sql, ["S001", "US-MO"])
|
|
80
|
+
"""
|
|
81
|
+
columns = ", ".join(fieldnames)
|
|
82
|
+
placeholders = ", ".join("?" for _ in fieldnames)
|
|
83
|
+
return f"INSERT INTO {table_name} ({columns}) VALUES ({placeholders})" # noqa: S608 - caller responsible for validated table name
|
{datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/src/datafun_streaming/storage/duckdb_utils.py
RENAMED
|
@@ -45,16 +45,6 @@ _DUCKDB_TYPE_MAP: dict[type, str] = {
|
|
|
45
45
|
bool: "BOOLEAN",
|
|
46
46
|
}
|
|
47
47
|
|
|
48
|
-
# === DECLARE SQL-SAFE TABLE NAMES ===
|
|
49
|
-
|
|
50
|
-
_ALLOWED_TABLE_NAMES: frozenset[str] = frozenset(
|
|
51
|
-
{
|
|
52
|
-
"valid_table",
|
|
53
|
-
"rejected_table",
|
|
54
|
-
}
|
|
55
|
-
)
|
|
56
|
-
|
|
57
|
-
|
|
58
48
|
# === DEFINE FUNCTIONS ===
|
|
59
49
|
|
|
60
50
|
|
|
@@ -1,129 +0,0 @@
|
|
|
1
|
-
# datafun-streaming
|
|
2
|
-
|
|
3
|
-
[](https://github.com/denisecase)
|
|
4
|
-
[](https://pypi.org/project/datafun-streaming/)
|
|
5
|
-
[](https://denisecase.github.io/datafun-streaming/)
|
|
6
|
-
[](https://github.com/denisecase/datafun-streaming)
|
|
7
|
-
[](https://github.com/denisecase/datafun-streaming/blob/main/pyproject.toml)
|
|
8
|
-
[](LICENSE)
|
|
9
|
-
|
|
10
|
-
[](https://github.com/denisecase/datafun-streaming/actions/workflows/ci-python-zensical.yml)
|
|
11
|
-
[](https://github.com/denisecase/datafun-streaming/actions/workflows/deploy-zensical.yml)
|
|
12
|
-
[](https://github.com/denisecase/datafun-streaming/actions/workflows/links.yml)
|
|
13
|
-
|
|
14
|
-
> Shared Python utilities for Kafka, DuckDB, validation, stats, and visualization
|
|
15
|
-
> across streaming data analytics projects.
|
|
16
|
-
|
|
17
|
-
## Command Reference
|
|
18
|
-
|
|
19
|
-
<details>
|
|
20
|
-
<summary>Show command reference</summary>
|
|
21
|
-
|
|
22
|
-
### In a machine terminal
|
|
23
|
-
|
|
24
|
-
Open a machine terminal where you want the project:
|
|
25
|
-
|
|
26
|
-
```shell
|
|
27
|
-
git clone https://github.com/denisecase/datafun-streaming
|
|
28
|
-
|
|
29
|
-
cd datafun-streaming
|
|
30
|
-
code .
|
|
31
|
-
```
|
|
32
|
-
|
|
33
|
-
### In a VS Code terminal
|
|
34
|
-
|
|
35
|
-
```shell
|
|
36
|
-
# reset uv cache only after suspected cache corruption or strange dependency errors
|
|
37
|
-
# uv cache clean
|
|
38
|
-
|
|
39
|
-
uv self update
|
|
40
|
-
uv python pin 3.14
|
|
41
|
-
uv sync --extra dev --extra docs --upgrade
|
|
42
|
-
|
|
43
|
-
uvx pre-commit install
|
|
44
|
-
|
|
45
|
-
git add -A
|
|
46
|
-
uvx pre-commit run --all-files
|
|
47
|
-
# repeat if changes were made
|
|
48
|
-
git add -A
|
|
49
|
-
uvx pre-commit run --all-files
|
|
50
|
-
|
|
51
|
-
# do chores
|
|
52
|
-
uv run python -m ruff format .
|
|
53
|
-
uv run python -m ruff check . --fix
|
|
54
|
-
uv run python -m pyright
|
|
55
|
-
uv run python -m pytest
|
|
56
|
-
uv run python -m zensical build
|
|
57
|
-
|
|
58
|
-
# save progress
|
|
59
|
-
git add -A
|
|
60
|
-
git commit -m "update"
|
|
61
|
-
git push -u origin main
|
|
62
|
-
```
|
|
63
|
-
|
|
64
|
-
</details>
|
|
65
|
-
|
|
66
|
-
## Notes
|
|
67
|
-
|
|
68
|
-
- Use the **UP ARROW** and **DOWN ARROW** in the terminal to scroll through past commands.
|
|
69
|
-
- Use `CTRL+f` to find (and replace) text within a file.
|
|
70
|
-
- You do not need to add to or modify `tests/`. They are provided for example only.
|
|
71
|
-
- Many files are silent helpers. Explore as you like, but nothing is required.
|
|
72
|
-
- You do NOT not to understand everything; understanding builds naturally over time.
|
|
73
|
-
|
|
74
|
-
## Troubleshooting >>> or
|
|
75
|
-
|
|
76
|
-
If you see something like this in your terminal: `>>>` or `...`
|
|
77
|
-
You accidentally started Python interactive mode.
|
|
78
|
-
It happens.
|
|
79
|
-
Press `Ctrl+c` (both keys together) or `Ctrl+Z` then `Enter` on Windows.
|
|
80
|
-
|
|
81
|
-
## Example Output
|
|
82
|
-
|
|
83
|
-
```shell
|
|
84
|
-
| INFO | P01 | ========================
|
|
85
|
-
| INFO | P01 | START main()
|
|
86
|
-
| INFO | P01 | ========================
|
|
87
|
-
| INFO | P01 | ROOT_DIR = .
|
|
88
|
-
| INFO | P01 | DATA_DIR = data
|
|
89
|
-
| INFO | P01 | OUTPUT_CSV = data\sales.csv
|
|
90
|
-
| INFO | P01 | Streaming 3 sales to C:\Repos\streaming\datafun-streaming\data\sales.csv ...
|
|
91
|
-
| INFO | P01 | Watch each sale arrive. Press CTRL+C to stop early.
|
|
92
|
-
|
|
93
|
-
| INFO | P01 | (1, 81.87, 'Backpack', 'East')
|
|
94
|
-
| INFO | P01 | Generated formatted multi-line SUMMARY string.
|
|
95
|
-
| INFO | P01 | Returning the str to the calling function.
|
|
96
|
-
| INFO | P01 |
|
|
97
|
-
Descriptive Statistics for Streaming Sales Amounts ($):
|
|
98
|
-
Count of sales : 1
|
|
99
|
-
Minimum sale : $81.87
|
|
100
|
-
Maximum sale : $81.87
|
|
101
|
-
Average sale : $81.87
|
|
102
|
-
Standard deviation: $0.00
|
|
103
|
-
|
|
104
|
-
| INFO | P01 | (2, 101.58, 'Water Bottle', 'North')
|
|
105
|
-
| INFO | P01 | Generated formatted multi-line SUMMARY string.
|
|
106
|
-
| INFO | P01 | Returning the str to the calling function.
|
|
107
|
-
| INFO | P01 |
|
|
108
|
-
Descriptive Statistics for Streaming Sales Amounts ($):
|
|
109
|
-
Count of sales : 2
|
|
110
|
-
Minimum sale : $81.87
|
|
111
|
-
Maximum sale : $101.58
|
|
112
|
-
Average sale : $91.72
|
|
113
|
-
Standard deviation: $13.94
|
|
114
|
-
|
|
115
|
-
| INFO | P01 | (3, 27.15, 'Running Shoes', 'East')
|
|
116
|
-
| INFO | P01 | Generated formatted multi-line SUMMARY string.
|
|
117
|
-
| INFO | P01 | Returning the str to the calling function.
|
|
118
|
-
| INFO | P01 |
|
|
119
|
-
Descriptive Statistics for Streaming Sales Amounts ($):
|
|
120
|
-
Count of sales : 3
|
|
121
|
-
Minimum sale : $27.15
|
|
122
|
-
Maximum sale : $101.58
|
|
123
|
-
Average sale : $70.20
|
|
124
|
-
Standard deviation: $38.56
|
|
125
|
-
|
|
126
|
-
| INFO | P01 | ========================
|
|
127
|
-
| INFO | P01 | Producer executed successfully!
|
|
128
|
-
| INFO | P01 | ========================
|
|
129
|
-
```
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/src/datafun_streaming/data_validation/errors.py
RENAMED
|
File without changes
|
|
File without changes
|
{datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/src/datafun_streaming/data_validation/types.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/src/datafun_streaming/kafka/kafka_admin_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/src/datafun_streaming/kafka/kafka_settings.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/src/datafun_streaming/stats/stats_utils.py
RENAMED
|
File without changes
|
{datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/src/datafun_streaming/storage/__init__.py
RENAMED
|
File without changes
|
{datafun_streaming-0.1.0 → datafun_streaming-0.3.0}/src/datafun_streaming/visualization/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|