datafun-streaming 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datafun_streaming-0.1.0/.gitignore +110 -0
- datafun_streaming-0.1.0/CHANGELOG.md +107 -0
- datafun_streaming-0.1.0/LICENSE +21 -0
- datafun_streaming-0.1.0/PKG-INFO +168 -0
- datafun_streaming-0.1.0/README.md +129 -0
- datafun_streaming-0.1.0/pyproject.toml +230 -0
- datafun_streaming-0.1.0/src/datafun_streaming/__init__.py +1 -0
- datafun_streaming-0.1.0/src/datafun_streaming/_version.py +24 -0
- datafun_streaming-0.1.0/src/datafun_streaming/core/__init__.py +1 -0
- datafun_streaming-0.1.0/src/datafun_streaming/core/types.py +16 -0
- datafun_streaming-0.1.0/src/datafun_streaming/data_validation/__init__.py +1 -0
- datafun_streaming-0.1.0/src/datafun_streaming/data_validation/errors.py +24 -0
- datafun_streaming-0.1.0/src/datafun_streaming/data_validation/reference.py +63 -0
- datafun_streaming-0.1.0/src/datafun_streaming/data_validation/types.py +42 -0
- datafun_streaming-0.1.0/src/datafun_streaming/data_validation/validation_utils.py +143 -0
- datafun_streaming-0.1.0/src/datafun_streaming/io/__init__.py +1 -0
- datafun_streaming-0.1.0/src/datafun_streaming/io/errors.py +50 -0
- datafun_streaming-0.1.0/src/datafun_streaming/io/io_utils.py +109 -0
- datafun_streaming-0.1.0/src/datafun_streaming/kafka/__init__.py +1 -0
- datafun_streaming-0.1.0/src/datafun_streaming/kafka/errors.py +150 -0
- datafun_streaming-0.1.0/src/datafun_streaming/kafka/kafka_admin_utils.py +211 -0
- datafun_streaming-0.1.0/src/datafun_streaming/kafka/kafka_connection_utils.py +46 -0
- datafun_streaming-0.1.0/src/datafun_streaming/kafka/kafka_consumer_utils.py +62 -0
- datafun_streaming-0.1.0/src/datafun_streaming/kafka/kafka_producer_utils.py +96 -0
- datafun_streaming-0.1.0/src/datafun_streaming/kafka/kafka_settings.py +79 -0
- datafun_streaming-0.1.0/src/datafun_streaming/py.typed +0 -0
- datafun_streaming-0.1.0/src/datafun_streaming/stats/__init__.py +1 -0
- datafun_streaming-0.1.0/src/datafun_streaming/stats/stats_utils.py +110 -0
- datafun_streaming-0.1.0/src/datafun_streaming/storage/__init__.py +1 -0
- datafun_streaming-0.1.0/src/datafun_streaming/storage/duckdb_utils.py +244 -0
- datafun_streaming-0.1.0/src/datafun_streaming/visualization/__init__.py +1 -0
- datafun_streaming-0.1.0/src/datafun_streaming/visualization/chart_utils.py +150 -0
- datafun_streaming-0.1.0/tests/__init__.py +1 -0
- datafun_streaming-0.1.0/tests/test_core_types.py +14 -0
- datafun_streaming-0.1.0/tests/test_data_validation_reference.py +68 -0
- datafun_streaming-0.1.0/tests/test_data_validation_types.py +41 -0
- datafun_streaming-0.1.0/tests/test_data_validation_utils.py +129 -0
- datafun_streaming-0.1.0/tests/test_duckdb_utils.py +99 -0
- datafun_streaming-0.1.0/tests/test_io_utils.py +104 -0
- datafun_streaming-0.1.0/tests/test_kafka_error_messages.py +60 -0
- datafun_streaming-0.1.0/tests/test_kafka_settings.py +77 -0
- datafun_streaming-0.1.0/tests/test_stats_utils.py +76 -0
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
# ============================================================
|
|
2
|
+
# .gitignore (Keep unnecessary files out of the repository)
|
|
3
|
+
# ============================================================
|
|
4
|
+
|
|
5
|
+
# REQ.UNIVERSAL: All professional GitHub project repositories MUST include .gitignore.
|
|
6
|
+
# WHY: Prevent committing generated artifacts, local state, secrets, and OS-specific files.
|
|
7
|
+
# ALT: Repository may customize ignores, but MUST preserve universal safety rules.
|
|
8
|
+
# CUSTOM: Logs may be temporarily committed for verification; keep ignored for
|
|
9
|
+
# production use and security.
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# === Universal (all projects, all languages) ===
|
|
13
|
+
|
|
14
|
+
# WHY: Logs are useful during debugging and verification.
|
|
15
|
+
# ALT: Comment if logs must be inspected or validated.
|
|
16
|
+
*.log
|
|
17
|
+
logs/
|
|
18
|
+
PRIVATE_NOTES.md
|
|
19
|
+
PRIVATE-NOTES.md
|
|
20
|
+
|
|
21
|
+
# WHY: Temporary and swap files are machine-local noise and create meaningless diffs.
|
|
22
|
+
*.swo
|
|
23
|
+
*.swp
|
|
24
|
+
*.tmp
|
|
25
|
+
*~
|
|
26
|
+
|
|
27
|
+
# === VS Code (special case) ===
|
|
28
|
+
|
|
29
|
+
# WHY: Ignore editor state while allowing a shared baseline configuration.
|
|
30
|
+
.vscode/
|
|
31
|
+
|
|
32
|
+
# WHY: Commit recommended extensions (opt-in) for consistent development experience.
|
|
33
|
+
# NOTE: Share recommendations, not personal editor styles or preferences.
|
|
34
|
+
!.vscode/ # So it will explore this folder for the following files.
|
|
35
|
+
!.vscode/extensions.json
|
|
36
|
+
!.vscode/settings.json
|
|
37
|
+
|
|
38
|
+
# === OS-specific files (macOS / Windows) ===
|
|
39
|
+
|
|
40
|
+
# WHY: OS-generated metadata files should never be tracked.
|
|
41
|
+
.AppleDouble
|
|
42
|
+
.DS_Store
|
|
43
|
+
.LSOverride
|
|
44
|
+
Icon\r
|
|
45
|
+
._*
|
|
46
|
+
.Spotlight-V100/
|
|
47
|
+
.Trashes
|
|
48
|
+
desktop.ini
|
|
49
|
+
ehthumbs.db
|
|
50
|
+
Thumbs.db
|
|
51
|
+
|
|
52
|
+
# === Editors / IDEs (non-VS Code) ===
|
|
53
|
+
|
|
54
|
+
# WHY: IDE metadata is machine-local and should not be tracked.
|
|
55
|
+
*.code-workspace
|
|
56
|
+
.idea/
|
|
57
|
+
|
|
58
|
+
# === Environment variables and secrets ===
|
|
59
|
+
|
|
60
|
+
# WHY: Never commit credentials or environment-specific configuration.
|
|
61
|
+
.env
|
|
62
|
+
.env.*
|
|
63
|
+
*.env
|
|
64
|
+
|
|
65
|
+
# === Documentation build output ===
|
|
66
|
+
|
|
67
|
+
# WHY: Static site build output is generated.
|
|
68
|
+
site/
|
|
69
|
+
|
|
70
|
+
# === Generic caches ===
|
|
71
|
+
|
|
72
|
+
# WHY: Generic caches are machine-local and should not be tracked.
|
|
73
|
+
.cache/
|
|
74
|
+
|
|
75
|
+
# === Python ===
|
|
76
|
+
|
|
77
|
+
# WHY: Virtual environments are machine-local and reproducible.
|
|
78
|
+
.venv/
|
|
79
|
+
venv/
|
|
80
|
+
|
|
81
|
+
# REQ.PYTHON: Do NOT git ignore uv.lock. Commit it and use it in CI/CD pipelines.
|
|
82
|
+
|
|
83
|
+
# WHY: Python version when using scm matches any repo depth and any package name
|
|
84
|
+
**/src/**/_version.py
|
|
85
|
+
|
|
86
|
+
# WHY: Python bytecode is generated.
|
|
87
|
+
__pycache__/
|
|
88
|
+
*.pyc
|
|
89
|
+
*.pyd
|
|
90
|
+
*.pyo
|
|
91
|
+
|
|
92
|
+
# WHY: Build and packaging artifacts are generated.
|
|
93
|
+
.eggs/
|
|
94
|
+
build/
|
|
95
|
+
dist/
|
|
96
|
+
*.egg
|
|
97
|
+
*.egg-info/
|
|
98
|
+
*.whl
|
|
99
|
+
|
|
100
|
+
# WHY: Tooling caches should not be tracked.
|
|
101
|
+
.coverage
|
|
102
|
+
.coverage.*
|
|
103
|
+
.mypy_cache/
|
|
104
|
+
.pytest_cache/
|
|
105
|
+
.pytype/
|
|
106
|
+
.ruff_cache/
|
|
107
|
+
.tox/
|
|
108
|
+
|
|
109
|
+
# WHY: Notebook checkpoint state is generated.
|
|
110
|
+
.ipynb_checkpoints/
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
<!-- markdownlint-disable MD024 -->
|
|
4
|
+
|
|
5
|
+
All notable changes to this project will be documented in this file.
|
|
6
|
+
|
|
7
|
+
The format is based on **[Keep a Changelog](https://keepachangelog.com/en/1.1.0/)**
|
|
8
|
+
and this project adheres to **[Semantic Versioning](https://semver.org/spec/v2.0.0.html)**.
|
|
9
|
+
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
## [Unreleased]
|
|
13
|
+
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
## [0.1.0] - 2026-05-08
|
|
17
|
+
|
|
18
|
+
### Added
|
|
19
|
+
|
|
20
|
+
- `datafun_streaming.core` - shared type aliases
|
|
21
|
+
(`DataRecordDict`, `DataRecordDictList`) used across all subpackages
|
|
22
|
+
- `datafun_streaming.data_validation` - field-level validators
|
|
23
|
+
(`validate_required_fields`, `validate_boolean_text`, `validate_datetime`,
|
|
24
|
+
`validate_positive_integer`), `ValidationResult` dataclass, `make_lookup_set`,
|
|
25
|
+
`validate_reference_records`, and `add_validation_errors`
|
|
26
|
+
- `datafun_streaming.io` - CSV and JSON file I/O utilities (`read_csv_rows`, `read_csv_as_lookup`,
|
|
27
|
+
`append_csv_row`, `row_to_json`, `row_from_json`, `format_message_for_log`)
|
|
28
|
+
- `datafun_streaming.kafka` - Kafka producer, consumer, admin, and connection utilities built on
|
|
29
|
+
`confluent-kafka`; `KafkaSettings` frozen dataclass with `.from_env()` loader
|
|
30
|
+
- `datafun_streaming.stats` - `RunningStats` dataclass for incremental count,
|
|
31
|
+
total, mean, minimum, and maximum without storing message history
|
|
32
|
+
- `datafun_streaming.storage` - DuckDB utilities with schema inference,
|
|
33
|
+
allowlist-validated `upsert_row`, parameterized `query_db`, and `safe_table_name` injection guard
|
|
34
|
+
- `datafun_streaming.visualization` - `StreamingChart` dataclass and Plotly helpers
|
|
35
|
+
(`init_chart`, `update_chart`, `save_chart`) for live data visualization
|
|
36
|
+
- Full test suite covering all pure-Python subpackages
|
|
37
|
+
(validation, IO, stats, storage, Kafka settings, Kafka error messages)
|
|
38
|
+
- GitHub Actions CI workflow (pre-commit, pyright, pytest, zensical docs build)
|
|
39
|
+
- GitHub Actions release workflow (PyPI Trusted Publishing via OIDC, GitHub Pages docs deploy)
|
|
40
|
+
- `pyproject.toml` with hatchling + hatch-vcs, ruff strict config, pyright basic config,
|
|
41
|
+
and pytest coverage
|
|
42
|
+
|
|
43
|
+
---
|
|
44
|
+
|
|
45
|
+
## Notes on versioning and releases
|
|
46
|
+
|
|
47
|
+
- We use **SemVer**:
|
|
48
|
+
- **MAJOR** - breaking changes
|
|
49
|
+
- **MINOR** - backward-compatible additions
|
|
50
|
+
- **PATCH** - fixes, documentation, tooling
|
|
51
|
+
- Versions are driven by git tags. Tag `vX.Y.Z` to release.
|
|
52
|
+
|
|
53
|
+
---
|
|
54
|
+
|
|
55
|
+
## Release Procedure (Required)
|
|
56
|
+
|
|
57
|
+
Follow these steps exactly when creating a new release.
|
|
58
|
+
|
|
59
|
+
### Task 1. Update release metadata (manual edits)
|
|
60
|
+
|
|
61
|
+
1.1. `CITATION.cff` - update `version` and `date-released`
|
|
62
|
+
1.2. CHANGELOG.md: add section, move unreleased entries, update links
|
|
63
|
+
1.3. pyproject.toml - update [tool.hatch.version].fallback-version (near the end)
|
|
64
|
+
|
|
65
|
+
### Task 2. Sync Version and Validate
|
|
66
|
+
|
|
67
|
+
```shell
|
|
68
|
+
uv sync --extra dev --extra docs --upgrade
|
|
69
|
+
git add -A
|
|
70
|
+
uvx pre-commit run --all-files
|
|
71
|
+
uv run python -m pyright
|
|
72
|
+
uv run python -m pytest
|
|
73
|
+
uv run python -m zensical build
|
|
74
|
+
uvx validate-pyproject[all] pyproject.toml
|
|
75
|
+
|
|
76
|
+
uv build
|
|
77
|
+
uv run python -m twine check dist/\*
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### Task 3. Commit, tag, push
|
|
81
|
+
|
|
82
|
+
```shell
|
|
83
|
+
git add -A
|
|
84
|
+
git commit -m "Prep X.Y.Z"
|
|
85
|
+
git push -u origin main
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Verify actions run on GitHub. After success:
|
|
89
|
+
|
|
90
|
+
```shell
|
|
91
|
+
git tag vX.Y.Z -m "X.Y.Z"
|
|
92
|
+
git push origin vX.Y.Z
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## Only As Needed (delete a tag)
|
|
96
|
+
|
|
97
|
+
```shell
|
|
98
|
+
git tag -d vX.Z.Y
|
|
99
|
+
git push origin :refs/tags/vX.Z.Y
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## Links
|
|
103
|
+
|
|
104
|
+
[Unreleased]: https://github.com/denisecase/datafun-streaming/compare/v0.1.0...HEAD
|
|
105
|
+
[0.1.0]: https://github.com/denisecase/datafun-streaming/releases/tag/v0.1.0
|
|
106
|
+
|
|
107
|
+
<!-- markdownlint-enable MD024 -->
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Denise Case
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: datafun-streaming
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Utilities for streaming data analytics with Kafka and DuckDB.
|
|
5
|
+
Project-URL: Homepage, https://github.com/denisecase/datafun-streaming
|
|
6
|
+
Project-URL: Repository, https://github.com/denisecase/datafun-streaming
|
|
7
|
+
Project-URL: Documentation, https://denisecase.github.io/datafun-streaming/
|
|
8
|
+
Project-URL: Issues, https://github.com/denisecase/datafun-streaming/issues
|
|
9
|
+
Project-URL: Changelog, https://github.com/denisecase/datafun-streaming/blob/main/CHANGELOG.md
|
|
10
|
+
Author-email: Denise Case <dcase@nwmissouri.edu>
|
|
11
|
+
License-Expression: MIT
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Keywords: confluent-kafka,data-analytics,data-validation,running-statistics,streaming-data
|
|
14
|
+
Classifier: Development Status :: 4 - Beta
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
21
|
+
Classifier: Typing :: Typed
|
|
22
|
+
Requires-Python: >=3.14
|
|
23
|
+
Requires-Dist: confluent-kafka
|
|
24
|
+
Requires-Dist: datafun-toolkit
|
|
25
|
+
Requires-Dist: duckdb
|
|
26
|
+
Requires-Dist: plotly
|
|
27
|
+
Requires-Dist: python-dotenv
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: pre-commit; extra == 'dev'
|
|
30
|
+
Requires-Dist: pyright; extra == 'dev'
|
|
31
|
+
Requires-Dist: pytest; extra == 'dev'
|
|
32
|
+
Requires-Dist: pytest-cov; extra == 'dev'
|
|
33
|
+
Requires-Dist: ruff; extra == 'dev'
|
|
34
|
+
Requires-Dist: twine; extra == 'dev'
|
|
35
|
+
Provides-Extra: docs
|
|
36
|
+
Requires-Dist: mkdocstrings[python]; extra == 'docs'
|
|
37
|
+
Requires-Dist: zensical; extra == 'docs'
|
|
38
|
+
Description-Content-Type: text/markdown
|
|
39
|
+
|
|
40
|
+
# datafun-streaming
|
|
41
|
+
|
|
42
|
+
[](https://github.com/denisecase)
|
|
43
|
+
[](https://pypi.org/project/datafun-streaming/)
|
|
44
|
+
[](https://denisecase.github.io/datafun-streaming/)
|
|
45
|
+
[](https://github.com/denisecase/datafun-streaming)
|
|
46
|
+
[](https://github.com/denisecase/datafun-streaming/blob/main/pyproject.toml)
|
|
47
|
+
[](LICENSE)
|
|
48
|
+
|
|
49
|
+
[](https://github.com/denisecase/datafun-streaming/actions/workflows/ci-python-zensical.yml)
|
|
50
|
+
[](https://github.com/denisecase/datafun-streaming/actions/workflows/deploy-zensical.yml)
|
|
51
|
+
[](https://github.com/denisecase/datafun-streaming/actions/workflows/links.yml)
|
|
52
|
+
|
|
53
|
+
> Shared Python utilities for Kafka, DuckDB, validation, stats, and visualization
|
|
54
|
+
> across streaming data analytics projects.
|
|
55
|
+
|
|
56
|
+
## Command Reference
|
|
57
|
+
|
|
58
|
+
<details>
|
|
59
|
+
<summary>Show command reference</summary>
|
|
60
|
+
|
|
61
|
+
### In a machine terminal
|
|
62
|
+
|
|
63
|
+
Open a machine terminal where you want the project:
|
|
64
|
+
|
|
65
|
+
```shell
|
|
66
|
+
git clone https://github.com/denisecase/datafun-streaming
|
|
67
|
+
|
|
68
|
+
cd datafun-streaming
|
|
69
|
+
code .
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### In a VS Code terminal
|
|
73
|
+
|
|
74
|
+
```shell
|
|
75
|
+
# reset uv cache only after suspected cache corruption or strange dependency errors
|
|
76
|
+
# uv cache clean
|
|
77
|
+
|
|
78
|
+
uv self update
|
|
79
|
+
uv python pin 3.14
|
|
80
|
+
uv sync --extra dev --extra docs --upgrade
|
|
81
|
+
|
|
82
|
+
uvx pre-commit install
|
|
83
|
+
|
|
84
|
+
git add -A
|
|
85
|
+
uvx pre-commit run --all-files
|
|
86
|
+
# repeat if changes were made
|
|
87
|
+
git add -A
|
|
88
|
+
uvx pre-commit run --all-files
|
|
89
|
+
|
|
90
|
+
# do chores
|
|
91
|
+
uv run python -m ruff format .
|
|
92
|
+
uv run python -m ruff check . --fix
|
|
93
|
+
uv run python -m pyright
|
|
94
|
+
uv run python -m pytest
|
|
95
|
+
uv run python -m zensical build
|
|
96
|
+
|
|
97
|
+
# save progress
|
|
98
|
+
git add -A
|
|
99
|
+
git commit -m "update"
|
|
100
|
+
git push -u origin main
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
</details>
|
|
104
|
+
|
|
105
|
+
## Notes
|
|
106
|
+
|
|
107
|
+
- Use the **UP ARROW** and **DOWN ARROW** in the terminal to scroll through past commands.
|
|
108
|
+
- Use `CTRL+f` to find (and replace) text within a file.
|
|
109
|
+
- You do not need to add to or modify `tests/`. They are provided for example only.
|
|
110
|
+
- Many files are silent helpers. Explore as you like, but nothing is required.
|
|
111
|
+
- You do NOT not to understand everything; understanding builds naturally over time.
|
|
112
|
+
|
|
113
|
+
## Troubleshooting >>> or
|
|
114
|
+
|
|
115
|
+
If you see something like this in your terminal: `>>>` or `...`
|
|
116
|
+
You accidentally started Python interactive mode.
|
|
117
|
+
It happens.
|
|
118
|
+
Press `Ctrl+c` (both keys together) or `Ctrl+Z` then `Enter` on Windows.
|
|
119
|
+
|
|
120
|
+
## Example Output
|
|
121
|
+
|
|
122
|
+
```shell
|
|
123
|
+
| INFO | P01 | ========================
|
|
124
|
+
| INFO | P01 | START main()
|
|
125
|
+
| INFO | P01 | ========================
|
|
126
|
+
| INFO | P01 | ROOT_DIR = .
|
|
127
|
+
| INFO | P01 | DATA_DIR = data
|
|
128
|
+
| INFO | P01 | OUTPUT_CSV = data\sales.csv
|
|
129
|
+
| INFO | P01 | Streaming 3 sales to C:\Repos\streaming\datafun-streaming\data\sales.csv ...
|
|
130
|
+
| INFO | P01 | Watch each sale arrive. Press CTRL+C to stop early.
|
|
131
|
+
|
|
132
|
+
| INFO | P01 | (1, 81.87, 'Backpack', 'East')
|
|
133
|
+
| INFO | P01 | Generated formatted multi-line SUMMARY string.
|
|
134
|
+
| INFO | P01 | Returning the str to the calling function.
|
|
135
|
+
| INFO | P01 |
|
|
136
|
+
Descriptive Statistics for Streaming Sales Amounts ($):
|
|
137
|
+
Count of sales : 1
|
|
138
|
+
Minimum sale : $81.87
|
|
139
|
+
Maximum sale : $81.87
|
|
140
|
+
Average sale : $81.87
|
|
141
|
+
Standard deviation: $0.00
|
|
142
|
+
|
|
143
|
+
| INFO | P01 | (2, 101.58, 'Water Bottle', 'North')
|
|
144
|
+
| INFO | P01 | Generated formatted multi-line SUMMARY string.
|
|
145
|
+
| INFO | P01 | Returning the str to the calling function.
|
|
146
|
+
| INFO | P01 |
|
|
147
|
+
Descriptive Statistics for Streaming Sales Amounts ($):
|
|
148
|
+
Count of sales : 2
|
|
149
|
+
Minimum sale : $81.87
|
|
150
|
+
Maximum sale : $101.58
|
|
151
|
+
Average sale : $91.72
|
|
152
|
+
Standard deviation: $13.94
|
|
153
|
+
|
|
154
|
+
| INFO | P01 | (3, 27.15, 'Running Shoes', 'East')
|
|
155
|
+
| INFO | P01 | Generated formatted multi-line SUMMARY string.
|
|
156
|
+
| INFO | P01 | Returning the str to the calling function.
|
|
157
|
+
| INFO | P01 |
|
|
158
|
+
Descriptive Statistics for Streaming Sales Amounts ($):
|
|
159
|
+
Count of sales : 3
|
|
160
|
+
Minimum sale : $27.15
|
|
161
|
+
Maximum sale : $101.58
|
|
162
|
+
Average sale : $70.20
|
|
163
|
+
Standard deviation: $38.56
|
|
164
|
+
|
|
165
|
+
| INFO | P01 | ========================
|
|
166
|
+
| INFO | P01 | Producer executed successfully!
|
|
167
|
+
| INFO | P01 | ========================
|
|
168
|
+
```
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# datafun-streaming
|
|
2
|
+
|
|
3
|
+
[](https://github.com/denisecase)
|
|
4
|
+
[](https://pypi.org/project/datafun-streaming/)
|
|
5
|
+
[](https://denisecase.github.io/datafun-streaming/)
|
|
6
|
+
[](https://github.com/denisecase/datafun-streaming)
|
|
7
|
+
[](https://github.com/denisecase/datafun-streaming/blob/main/pyproject.toml)
|
|
8
|
+
[](LICENSE)
|
|
9
|
+
|
|
10
|
+
[](https://github.com/denisecase/datafun-streaming/actions/workflows/ci-python-zensical.yml)
|
|
11
|
+
[](https://github.com/denisecase/datafun-streaming/actions/workflows/deploy-zensical.yml)
|
|
12
|
+
[](https://github.com/denisecase/datafun-streaming/actions/workflows/links.yml)
|
|
13
|
+
|
|
14
|
+
> Shared Python utilities for Kafka, DuckDB, validation, stats, and visualization
|
|
15
|
+
> across streaming data analytics projects.
|
|
16
|
+
|
|
17
|
+
## Command Reference
|
|
18
|
+
|
|
19
|
+
<details>
|
|
20
|
+
<summary>Show command reference</summary>
|
|
21
|
+
|
|
22
|
+
### In a machine terminal
|
|
23
|
+
|
|
24
|
+
Open a machine terminal where you want the project:
|
|
25
|
+
|
|
26
|
+
```shell
|
|
27
|
+
git clone https://github.com/denisecase/datafun-streaming
|
|
28
|
+
|
|
29
|
+
cd datafun-streaming
|
|
30
|
+
code .
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
### In a VS Code terminal
|
|
34
|
+
|
|
35
|
+
```shell
|
|
36
|
+
# reset uv cache only after suspected cache corruption or strange dependency errors
|
|
37
|
+
# uv cache clean
|
|
38
|
+
|
|
39
|
+
uv self update
|
|
40
|
+
uv python pin 3.14
|
|
41
|
+
uv sync --extra dev --extra docs --upgrade
|
|
42
|
+
|
|
43
|
+
uvx pre-commit install
|
|
44
|
+
|
|
45
|
+
git add -A
|
|
46
|
+
uvx pre-commit run --all-files
|
|
47
|
+
# repeat if changes were made
|
|
48
|
+
git add -A
|
|
49
|
+
uvx pre-commit run --all-files
|
|
50
|
+
|
|
51
|
+
# do chores
|
|
52
|
+
uv run python -m ruff format .
|
|
53
|
+
uv run python -m ruff check . --fix
|
|
54
|
+
uv run python -m pyright
|
|
55
|
+
uv run python -m pytest
|
|
56
|
+
uv run python -m zensical build
|
|
57
|
+
|
|
58
|
+
# save progress
|
|
59
|
+
git add -A
|
|
60
|
+
git commit -m "update"
|
|
61
|
+
git push -u origin main
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
</details>
|
|
65
|
+
|
|
66
|
+
## Notes
|
|
67
|
+
|
|
68
|
+
- Use the **UP ARROW** and **DOWN ARROW** in the terminal to scroll through past commands.
|
|
69
|
+
- Use `CTRL+f` to find (and replace) text within a file.
|
|
70
|
+
- You do not need to add to or modify `tests/`. They are provided for example only.
|
|
71
|
+
- Many files are silent helpers. Explore as you like, but nothing is required.
|
|
72
|
+
- You do NOT not to understand everything; understanding builds naturally over time.
|
|
73
|
+
|
|
74
|
+
## Troubleshooting >>> or
|
|
75
|
+
|
|
76
|
+
If you see something like this in your terminal: `>>>` or `...`
|
|
77
|
+
You accidentally started Python interactive mode.
|
|
78
|
+
It happens.
|
|
79
|
+
Press `Ctrl+c` (both keys together) or `Ctrl+Z` then `Enter` on Windows.
|
|
80
|
+
|
|
81
|
+
## Example Output
|
|
82
|
+
|
|
83
|
+
```shell
|
|
84
|
+
| INFO | P01 | ========================
|
|
85
|
+
| INFO | P01 | START main()
|
|
86
|
+
| INFO | P01 | ========================
|
|
87
|
+
| INFO | P01 | ROOT_DIR = .
|
|
88
|
+
| INFO | P01 | DATA_DIR = data
|
|
89
|
+
| INFO | P01 | OUTPUT_CSV = data\sales.csv
|
|
90
|
+
| INFO | P01 | Streaming 3 sales to C:\Repos\streaming\datafun-streaming\data\sales.csv ...
|
|
91
|
+
| INFO | P01 | Watch each sale arrive. Press CTRL+C to stop early.
|
|
92
|
+
|
|
93
|
+
| INFO | P01 | (1, 81.87, 'Backpack', 'East')
|
|
94
|
+
| INFO | P01 | Generated formatted multi-line SUMMARY string.
|
|
95
|
+
| INFO | P01 | Returning the str to the calling function.
|
|
96
|
+
| INFO | P01 |
|
|
97
|
+
Descriptive Statistics for Streaming Sales Amounts ($):
|
|
98
|
+
Count of sales : 1
|
|
99
|
+
Minimum sale : $81.87
|
|
100
|
+
Maximum sale : $81.87
|
|
101
|
+
Average sale : $81.87
|
|
102
|
+
Standard deviation: $0.00
|
|
103
|
+
|
|
104
|
+
| INFO | P01 | (2, 101.58, 'Water Bottle', 'North')
|
|
105
|
+
| INFO | P01 | Generated formatted multi-line SUMMARY string.
|
|
106
|
+
| INFO | P01 | Returning the str to the calling function.
|
|
107
|
+
| INFO | P01 |
|
|
108
|
+
Descriptive Statistics for Streaming Sales Amounts ($):
|
|
109
|
+
Count of sales : 2
|
|
110
|
+
Minimum sale : $81.87
|
|
111
|
+
Maximum sale : $101.58
|
|
112
|
+
Average sale : $91.72
|
|
113
|
+
Standard deviation: $13.94
|
|
114
|
+
|
|
115
|
+
| INFO | P01 | (3, 27.15, 'Running Shoes', 'East')
|
|
116
|
+
| INFO | P01 | Generated formatted multi-line SUMMARY string.
|
|
117
|
+
| INFO | P01 | Returning the str to the calling function.
|
|
118
|
+
| INFO | P01 |
|
|
119
|
+
Descriptive Statistics for Streaming Sales Amounts ($):
|
|
120
|
+
Count of sales : 3
|
|
121
|
+
Minimum sale : $27.15
|
|
122
|
+
Maximum sale : $101.58
|
|
123
|
+
Average sale : $70.20
|
|
124
|
+
Standard deviation: $38.56
|
|
125
|
+
|
|
126
|
+
| INFO | P01 | ========================
|
|
127
|
+
| INFO | P01 | Producer executed successfully!
|
|
128
|
+
| INFO | P01 | ========================
|
|
129
|
+
```
|