datafun-streaming 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datafun_streaming/__init__.py +1 -0
- datafun_streaming/_version.py +24 -0
- datafun_streaming/core/__init__.py +1 -0
- datafun_streaming/core/types.py +16 -0
- datafun_streaming/data_validation/__init__.py +1 -0
- datafun_streaming/data_validation/errors.py +24 -0
- datafun_streaming/data_validation/reference.py +63 -0
- datafun_streaming/data_validation/types.py +42 -0
- datafun_streaming/data_validation/validation_utils.py +143 -0
- datafun_streaming/io/__init__.py +1 -0
- datafun_streaming/io/errors.py +50 -0
- datafun_streaming/io/io_utils.py +109 -0
- datafun_streaming/kafka/__init__.py +1 -0
- datafun_streaming/kafka/errors.py +150 -0
- datafun_streaming/kafka/kafka_admin_utils.py +211 -0
- datafun_streaming/kafka/kafka_connection_utils.py +46 -0
- datafun_streaming/kafka/kafka_consumer_utils.py +62 -0
- datafun_streaming/kafka/kafka_producer_utils.py +96 -0
- datafun_streaming/kafka/kafka_settings.py +79 -0
- datafun_streaming/py.typed +0 -0
- datafun_streaming/stats/__init__.py +1 -0
- datafun_streaming/stats/stats_utils.py +110 -0
- datafun_streaming/storage/__init__.py +1 -0
- datafun_streaming/storage/duckdb_utils.py +244 -0
- datafun_streaming/visualization/__init__.py +1 -0
- datafun_streaming/visualization/chart_utils.py +150 -0
- datafun_streaming-0.1.0.dist-info/METADATA +168 -0
- datafun_streaming-0.1.0.dist-info/RECORD +30 -0
- datafun_streaming-0.1.0.dist-info/WHEEL +4 -0
- datafun_streaming-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
"""src/datafun_streaming/storage/duckdb_utils.py.
|
|
2
|
+
|
|
3
|
+
DuckDB utilities for streaming data.
|
|
4
|
+
|
|
5
|
+
Provides functions to initialize, write to, query, and close
|
|
6
|
+
a DuckDB database from a streaming consumer.
|
|
7
|
+
|
|
8
|
+
This is domain-agnostic: it works with any table name and any row dict.
|
|
9
|
+
Tables are created automatically from the first row received.
|
|
10
|
+
Schema is inferred from Python value types.
|
|
11
|
+
|
|
12
|
+
Author: Denise Case
|
|
13
|
+
Date: 2026-05
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
# === IMPORTS ===
|
|
17
|
+
|
|
18
|
+
import logging
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
import duckdb
|
|
23
|
+
|
|
24
|
+
# === DECLARE EXPORTS
|
|
25
|
+
|
|
26
|
+
__all__ = [
|
|
27
|
+
"connect_to_database",
|
|
28
|
+
"init_db",
|
|
29
|
+
"close_db",
|
|
30
|
+
"upsert_row",
|
|
31
|
+
"query_db",
|
|
32
|
+
"safe_table_name",
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
# === CONFIGURE LOGGER ===
|
|
36
|
+
|
|
37
|
+
LOG = logging.getLogger(__name__)
|
|
38
|
+
|
|
39
|
+
# === TYPE MAP: Python types to DuckDB column types ===
|
|
40
|
+
|
|
41
|
+
_DUCKDB_TYPE_MAP: dict[type, str] = {
|
|
42
|
+
str: "VARCHAR",
|
|
43
|
+
int: "INTEGER",
|
|
44
|
+
float: "DOUBLE",
|
|
45
|
+
bool: "BOOLEAN",
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
# === DECLARE SQL-SAFE TABLE NAMES ===
|
|
49
|
+
|
|
50
|
+
_ALLOWED_TABLE_NAMES: frozenset[str] = frozenset(
|
|
51
|
+
{
|
|
52
|
+
"valid_table",
|
|
53
|
+
"rejected_table",
|
|
54
|
+
}
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# === DEFINE FUNCTIONS ===
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def connect_to_database(database_file_path: Path) -> duckdb.DuckDBPyConnection:
|
|
62
|
+
"""Connect to the DuckDB database file.
|
|
63
|
+
|
|
64
|
+
Arguments:
|
|
65
|
+
database_file_path: Path to the DuckDB database file.
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
An open DuckDB connection.
|
|
69
|
+
"""
|
|
70
|
+
return duckdb.connect(str(database_file_path))
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def close_db(conn: duckdb.DuckDBPyConnection) -> None:
|
|
74
|
+
"""Close a DuckDB connection.
|
|
75
|
+
|
|
76
|
+
Arguments:
|
|
77
|
+
conn: An open DuckDB connection.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
None.
|
|
81
|
+
"""
|
|
82
|
+
conn.close()
|
|
83
|
+
LOG.debug("DuckDB connection closed.")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def init_db(path: Path) -> duckdb.DuckDBPyConnection:
|
|
87
|
+
"""Open or create a DuckDB database at the given path.
|
|
88
|
+
|
|
89
|
+
If the file already exists it is opened and reused.
|
|
90
|
+
If it does not exist it is created.
|
|
91
|
+
|
|
92
|
+
Arguments:
|
|
93
|
+
path: File path for the DuckDB database (e.g. data/output/sales.duckdb).
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
An open DuckDB connection.
|
|
97
|
+
"""
|
|
98
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
99
|
+
conn = duckdb.connect(str(path))
|
|
100
|
+
LOG.debug(f"DuckDB opened: {path}")
|
|
101
|
+
return conn
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def safe_table_name(table_name: str, allowed: frozenset[str]) -> str:
|
|
105
|
+
"""Return table_name after confirming it is in the allowed list.
|
|
106
|
+
|
|
107
|
+
SQL identifiers (table names, column names) cannot use parameterized
|
|
108
|
+
query placeholders. This allowlist check prevents accidental injection
|
|
109
|
+
if a caller passes an unexpected string.
|
|
110
|
+
|
|
111
|
+
Arguments:
|
|
112
|
+
table_name: The table name to validate.
|
|
113
|
+
allowed: A frozenset of allowed table names.
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
The validated table name, unchanged.
|
|
117
|
+
|
|
118
|
+
Raises:
|
|
119
|
+
ValueError: If table_name is not in the allowed list.
|
|
120
|
+
"""
|
|
121
|
+
if table_name not in allowed:
|
|
122
|
+
raise ValueError(
|
|
123
|
+
f"Table name {table_name!r} is not in the allowed list. "
|
|
124
|
+
f"Allowed: {sorted(allowed)}"
|
|
125
|
+
)
|
|
126
|
+
return table_name
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def query_db(
|
|
130
|
+
conn: duckdb.DuckDBPyConnection,
|
|
131
|
+
sql: str,
|
|
132
|
+
params: list[Any] | None = None,
|
|
133
|
+
) -> list[dict[str, Any]]:
|
|
134
|
+
"""Execute a SQL query and return results as a list of dicts.
|
|
135
|
+
|
|
136
|
+
Arguments:
|
|
137
|
+
conn: Open DuckDB connection.
|
|
138
|
+
sql: SQL query string. Use ? for parameter placeholders.
|
|
139
|
+
params: Optional list of parameter values for placeholders.
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
A list of row dicts. Empty list if no rows matched.
|
|
143
|
+
|
|
144
|
+
Example:
|
|
145
|
+
rows = query_db(conn, "SELECT * FROM sales WHERE region_id = ?", ["US-MO"])
|
|
146
|
+
"""
|
|
147
|
+
result = conn.execute(sql, params or [])
|
|
148
|
+
columns = [desc[0] for desc in result.description]
|
|
149
|
+
return [dict(zip(columns, row, strict=False)) for row in result.fetchall()]
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def upsert_row(
|
|
153
|
+
conn: duckdb.DuckDBPyConnection,
|
|
154
|
+
*,
|
|
155
|
+
table: str,
|
|
156
|
+
row: dict[str, Any],
|
|
157
|
+
primary_key: str,
|
|
158
|
+
allowed_tables: frozenset[str], # ← caller provides this
|
|
159
|
+
) -> None:
|
|
160
|
+
"""Insert or replace one row in a DuckDB table.
|
|
161
|
+
|
|
162
|
+
Creates the table on the first call if it does not exist.
|
|
163
|
+
On subsequent calls with the same primary key value,
|
|
164
|
+
the existing row is replaced with the new values.
|
|
165
|
+
|
|
166
|
+
Arguments:
|
|
167
|
+
conn: Open DuckDB connection.
|
|
168
|
+
*: All arguments after the asterisk must be passed as keyword arguments.
|
|
169
|
+
table: Table name to write to.
|
|
170
|
+
row: The row to insert or replace.
|
|
171
|
+
primary_key: The field name that uniquely identifies each row.
|
|
172
|
+
allowed_tables: A frozenset of allowed table names.
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
None.
|
|
176
|
+
"""
|
|
177
|
+
# Validate before use: table name must be in the allowlist,
|
|
178
|
+
# primary key must be a field in the row.
|
|
179
|
+
safe = safe_table_name(table, allowed_tables)
|
|
180
|
+
safe_pk = safe_table_name(primary_key, frozenset(row.keys()))
|
|
181
|
+
|
|
182
|
+
# Pass the validated names so _ensure_table never receives raw input.
|
|
183
|
+
_ensure_table(conn, safe, row, safe_pk)
|
|
184
|
+
|
|
185
|
+
pk_value = row[safe_pk]
|
|
186
|
+
conn.execute(
|
|
187
|
+
f"DELETE FROM {safe} WHERE {safe_pk} = ?", # noqa: S608 - identifiers validated via safe_table_name allowlist
|
|
188
|
+
[pk_value],
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
cols = ", ".join(row.keys())
|
|
192
|
+
placeholders = ", ".join(["?"] * len(row))
|
|
193
|
+
conn.execute(
|
|
194
|
+
f"INSERT INTO {safe} ({cols}) VALUES ({placeholders})", # noqa: S608 - identifiers validated via safe_table_name allowlist
|
|
195
|
+
list(row.values()),
|
|
196
|
+
)
|
|
197
|
+
LOG.debug(f"Upserted row into {safe} with primary key {safe_pk}={pk_value}")
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
# == DEFINE INTERNAL HELPER FUNCTIONS (not exported) ===
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def _ensure_table(
|
|
204
|
+
conn: duckdb.DuckDBPyConnection,
|
|
205
|
+
table: str,
|
|
206
|
+
row: dict[str, Any],
|
|
207
|
+
primary_key: str,
|
|
208
|
+
) -> None:
|
|
209
|
+
"""Create the table if it does not already exist.
|
|
210
|
+
|
|
211
|
+
Infers column types from the Python types of the row values.
|
|
212
|
+
The primary key column is marked as PRIMARY KEY.
|
|
213
|
+
|
|
214
|
+
Arguments:
|
|
215
|
+
conn: Open DuckDB connection.
|
|
216
|
+
table: Table name to create.
|
|
217
|
+
row: A representative row dict used for schema inference.
|
|
218
|
+
primary_key: The field name to use as the primary key.
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
None.
|
|
222
|
+
"""
|
|
223
|
+
columns = []
|
|
224
|
+
for col, val in row.items():
|
|
225
|
+
col_type = _infer_column_type(val)
|
|
226
|
+
if col == primary_key:
|
|
227
|
+
columns.append(f"{col} {col_type} PRIMARY KEY")
|
|
228
|
+
else:
|
|
229
|
+
columns.append(f"{col} {col_type}")
|
|
230
|
+
|
|
231
|
+
col_defs = ", ".join(columns)
|
|
232
|
+
conn.execute(f"CREATE TABLE IF NOT EXISTS {table} ({col_defs})") # noqa: S608 - identifiers validated via safe_table_name allowlist
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _infer_column_type(value: Any) -> str:
|
|
236
|
+
"""Infer a DuckDB column type from a Python value.
|
|
237
|
+
|
|
238
|
+
Arguments:
|
|
239
|
+
value: A Python value whose type will be inspected.
|
|
240
|
+
|
|
241
|
+
Returns:
|
|
242
|
+
A DuckDB type string (e.g. "VARCHAR", "DOUBLE").
|
|
243
|
+
"""
|
|
244
|
+
return _DUCKDB_TYPE_MAP.get(type(value), "VARCHAR")
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Utilities for visualizing streaming data."""
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
"""src/datafun_streaming/visualization/chart_utils.py.
|
|
2
|
+
|
|
3
|
+
Chart utilities for streaming data.
|
|
4
|
+
|
|
5
|
+
Provides functions to create, update, and save a line chart
|
|
6
|
+
that accumulates data points as messages are consumed.
|
|
7
|
+
|
|
8
|
+
Uses Plotly to generate an interactive HTML chart.
|
|
9
|
+
The chart is updated in memory as each message arrives
|
|
10
|
+
and saved to disk at the end of the consume loop (Section C4).
|
|
11
|
+
|
|
12
|
+
This is domain-agnostic: pass any numeric field and any label.
|
|
13
|
+
The chart does not know what it is charting.
|
|
14
|
+
|
|
15
|
+
Author: Denise Case
|
|
16
|
+
Date: 2026-05
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
# === IMPORTS ===
|
|
20
|
+
|
|
21
|
+
from dataclasses import dataclass, field
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
from typing import Any
|
|
24
|
+
|
|
25
|
+
import plotly.graph_objects as go
|
|
26
|
+
|
|
27
|
+
# === EXPORTS ===
|
|
28
|
+
|
|
29
|
+
__all__ = [
|
|
30
|
+
"StreamingChart",
|
|
31
|
+
"init_chart",
|
|
32
|
+
"update_chart",
|
|
33
|
+
"save_chart",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
# === DEFINE CHART DATA CLASS ===
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class StreamingChart:
|
|
41
|
+
"""Holds chart state for a single line series.
|
|
42
|
+
|
|
43
|
+
Updated incrementally as messages arrive.
|
|
44
|
+
Rendered to HTML when save_chart() is called.
|
|
45
|
+
|
|
46
|
+
Attributes:
|
|
47
|
+
title: Chart title shown at the top.
|
|
48
|
+
x_label: Label for the x-axis.
|
|
49
|
+
y_label: Label for the y-axis.
|
|
50
|
+
x_values: Accumulated x-axis values (e.g. message count).
|
|
51
|
+
y_values: Accumulated y-axis values (e.g. running total).
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
title: str
|
|
55
|
+
x_label: str
|
|
56
|
+
y_label: str
|
|
57
|
+
x_values: list[int | float | str] = field(default_factory=list)
|
|
58
|
+
y_values: list[float] = field(default_factory=list)
|
|
59
|
+
|
|
60
|
+
@property
|
|
61
|
+
def is_empty(self) -> bool:
|
|
62
|
+
"""Return True if no data points have been added yet."""
|
|
63
|
+
return len(self.x_values) == 0
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
# === DEFINE CHART FUNCTIONS ===
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def init_chart(
|
|
70
|
+
*,
|
|
71
|
+
title: str,
|
|
72
|
+
x_label: str,
|
|
73
|
+
y_label: str,
|
|
74
|
+
) -> StreamingChart:
|
|
75
|
+
"""Create a new empty StreamingChart.
|
|
76
|
+
|
|
77
|
+
Arguments:
|
|
78
|
+
title: Chart title.
|
|
79
|
+
x_label: Label for the x-axis.
|
|
80
|
+
y_label: Label for the y-axis.
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
An empty StreamingChart ready to receive data points.
|
|
84
|
+
"""
|
|
85
|
+
return StreamingChart(title=title, x_label=x_label, y_label=y_label)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def update_chart(
|
|
89
|
+
chart: StreamingChart,
|
|
90
|
+
row: dict[str, Any],
|
|
91
|
+
*,
|
|
92
|
+
x_field: str = "_kafka_offset",
|
|
93
|
+
y_field: str = "total",
|
|
94
|
+
) -> None:
|
|
95
|
+
"""Add one data point to the chart from a message row.
|
|
96
|
+
|
|
97
|
+
Arguments:
|
|
98
|
+
chart: The StreamingChart to update.
|
|
99
|
+
row: The enriched message row.
|
|
100
|
+
x_field: The row field to use as the x-axis value.
|
|
101
|
+
Defaults to _kafka_offset (message sequence number).
|
|
102
|
+
y_field: The row field to use as the y-axis value.
|
|
103
|
+
Defaults to total (post-tax total price).
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
None.
|
|
107
|
+
"""
|
|
108
|
+
x_value = row.get(x_field, len(chart.x_values))
|
|
109
|
+
y_value = row.get(y_field, 0.0)
|
|
110
|
+
|
|
111
|
+
chart.x_values.append(x_value)
|
|
112
|
+
chart.y_values.append(float(y_value))
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def save_chart(chart: StreamingChart, path: Path) -> None:
|
|
116
|
+
"""Render the chart to an interactive HTML file.
|
|
117
|
+
|
|
118
|
+
Arguments:
|
|
119
|
+
chart: The StreamingChart to render.
|
|
120
|
+
path: Output file path. Must end in .html.
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
None.
|
|
124
|
+
"""
|
|
125
|
+
if chart.is_empty:
|
|
126
|
+
return
|
|
127
|
+
|
|
128
|
+
fig = go.Figure()
|
|
129
|
+
|
|
130
|
+
fig.add_trace(
|
|
131
|
+
go.Scatter(
|
|
132
|
+
x=chart.x_values,
|
|
133
|
+
y=chart.y_values,
|
|
134
|
+
mode="lines+markers",
|
|
135
|
+
name=chart.y_label,
|
|
136
|
+
line={"width": 2},
|
|
137
|
+
marker={"size": 4},
|
|
138
|
+
)
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
fig.update_layout(
|
|
142
|
+
title=chart.title,
|
|
143
|
+
xaxis_title=chart.x_label,
|
|
144
|
+
yaxis_title=chart.y_label,
|
|
145
|
+
hovermode="x unified",
|
|
146
|
+
template="plotly_white",
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
150
|
+
fig.write_html(str(path))
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: datafun-streaming
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Utilities for streaming data analytics with Kafka and DuckDB.
|
|
5
|
+
Project-URL: Homepage, https://github.com/denisecase/datafun-streaming
|
|
6
|
+
Project-URL: Repository, https://github.com/denisecase/datafun-streaming
|
|
7
|
+
Project-URL: Documentation, https://denisecase.github.io/datafun-streaming/
|
|
8
|
+
Project-URL: Issues, https://github.com/denisecase/datafun-streaming/issues
|
|
9
|
+
Project-URL: Changelog, https://github.com/denisecase/datafun-streaming/blob/main/CHANGELOG.md
|
|
10
|
+
Author-email: Denise Case <dcase@nwmissouri.edu>
|
|
11
|
+
License-Expression: MIT
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Keywords: confluent-kafka,data-analytics,data-validation,running-statistics,streaming-data
|
|
14
|
+
Classifier: Development Status :: 4 - Beta
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
21
|
+
Classifier: Typing :: Typed
|
|
22
|
+
Requires-Python: >=3.14
|
|
23
|
+
Requires-Dist: confluent-kafka
|
|
24
|
+
Requires-Dist: datafun-toolkit
|
|
25
|
+
Requires-Dist: duckdb
|
|
26
|
+
Requires-Dist: plotly
|
|
27
|
+
Requires-Dist: python-dotenv
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: pre-commit; extra == 'dev'
|
|
30
|
+
Requires-Dist: pyright; extra == 'dev'
|
|
31
|
+
Requires-Dist: pytest; extra == 'dev'
|
|
32
|
+
Requires-Dist: pytest-cov; extra == 'dev'
|
|
33
|
+
Requires-Dist: ruff; extra == 'dev'
|
|
34
|
+
Requires-Dist: twine; extra == 'dev'
|
|
35
|
+
Provides-Extra: docs
|
|
36
|
+
Requires-Dist: mkdocstrings[python]; extra == 'docs'
|
|
37
|
+
Requires-Dist: zensical; extra == 'docs'
|
|
38
|
+
Description-Content-Type: text/markdown
|
|
39
|
+
|
|
40
|
+
# datafun-streaming
|
|
41
|
+
|
|
42
|
+
[](https://github.com/denisecase)
|
|
43
|
+
[](https://pypi.org/project/datafun-streaming/)
|
|
44
|
+
[](https://denisecase.github.io/datafun-streaming/)
|
|
45
|
+
[](https://github.com/denisecase/datafun-streaming)
|
|
46
|
+
[](https://github.com/denisecase/datafun-streaming/blob/main/pyproject.toml)
|
|
47
|
+
[](LICENSE)
|
|
48
|
+
|
|
49
|
+
[](https://github.com/denisecase/datafun-streaming/actions/workflows/ci-python-zensical.yml)
|
|
50
|
+
[](https://github.com/denisecase/datafun-streaming/actions/workflows/deploy-zensical.yml)
|
|
51
|
+
[](https://github.com/denisecase/datafun-streaming/actions/workflows/links.yml)
|
|
52
|
+
|
|
53
|
+
> Shared Python utilities for Kafka, DuckDB, validation, stats, and visualization
|
|
54
|
+
> across streaming data analytics projects.
|
|
55
|
+
|
|
56
|
+
## Command Reference
|
|
57
|
+
|
|
58
|
+
<details>
|
|
59
|
+
<summary>Show command reference</summary>
|
|
60
|
+
|
|
61
|
+
### In a machine terminal
|
|
62
|
+
|
|
63
|
+
Open a machine terminal where you want the project:
|
|
64
|
+
|
|
65
|
+
```shell
|
|
66
|
+
git clone https://github.com/denisecase/datafun-streaming
|
|
67
|
+
|
|
68
|
+
cd datafun-streaming
|
|
69
|
+
code .
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### In a VS Code terminal
|
|
73
|
+
|
|
74
|
+
```shell
|
|
75
|
+
# reset uv cache only after suspected cache corruption or strange dependency errors
|
|
76
|
+
# uv cache clean
|
|
77
|
+
|
|
78
|
+
uv self update
|
|
79
|
+
uv python pin 3.14
|
|
80
|
+
uv sync --extra dev --extra docs --upgrade
|
|
81
|
+
|
|
82
|
+
uvx pre-commit install
|
|
83
|
+
|
|
84
|
+
git add -A
|
|
85
|
+
uvx pre-commit run --all-files
|
|
86
|
+
# repeat if changes were made
|
|
87
|
+
git add -A
|
|
88
|
+
uvx pre-commit run --all-files
|
|
89
|
+
|
|
90
|
+
# do chores
|
|
91
|
+
uv run python -m ruff format .
|
|
92
|
+
uv run python -m ruff check . --fix
|
|
93
|
+
uv run python -m pyright
|
|
94
|
+
uv run python -m pytest
|
|
95
|
+
uv run python -m zensical build
|
|
96
|
+
|
|
97
|
+
# save progress
|
|
98
|
+
git add -A
|
|
99
|
+
git commit -m "update"
|
|
100
|
+
git push -u origin main
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
</details>
|
|
104
|
+
|
|
105
|
+
## Notes
|
|
106
|
+
|
|
107
|
+
- Use the **UP ARROW** and **DOWN ARROW** in the terminal to scroll through past commands.
|
|
108
|
+
- Use `CTRL+f` to find (and replace) text within a file.
|
|
109
|
+
- You do not need to add to or modify `tests/`. They are provided for example only.
|
|
110
|
+
- Many files are silent helpers. Explore as you like, but nothing is required.
|
|
111
|
+
- You do NOT not to understand everything; understanding builds naturally over time.
|
|
112
|
+
|
|
113
|
+
## Troubleshooting >>> or
|
|
114
|
+
|
|
115
|
+
If you see something like this in your terminal: `>>>` or `...`
|
|
116
|
+
You accidentally started Python interactive mode.
|
|
117
|
+
It happens.
|
|
118
|
+
Press `Ctrl+c` (both keys together) or `Ctrl+Z` then `Enter` on Windows.
|
|
119
|
+
|
|
120
|
+
## Example Output
|
|
121
|
+
|
|
122
|
+
```shell
|
|
123
|
+
| INFO | P01 | ========================
|
|
124
|
+
| INFO | P01 | START main()
|
|
125
|
+
| INFO | P01 | ========================
|
|
126
|
+
| INFO | P01 | ROOT_DIR = .
|
|
127
|
+
| INFO | P01 | DATA_DIR = data
|
|
128
|
+
| INFO | P01 | OUTPUT_CSV = data\sales.csv
|
|
129
|
+
| INFO | P01 | Streaming 3 sales to C:\Repos\streaming\datafun-streaming\data\sales.csv ...
|
|
130
|
+
| INFO | P01 | Watch each sale arrive. Press CTRL+C to stop early.
|
|
131
|
+
|
|
132
|
+
| INFO | P01 | (1, 81.87, 'Backpack', 'East')
|
|
133
|
+
| INFO | P01 | Generated formatted multi-line SUMMARY string.
|
|
134
|
+
| INFO | P01 | Returning the str to the calling function.
|
|
135
|
+
| INFO | P01 |
|
|
136
|
+
Descriptive Statistics for Streaming Sales Amounts ($):
|
|
137
|
+
Count of sales : 1
|
|
138
|
+
Minimum sale : $81.87
|
|
139
|
+
Maximum sale : $81.87
|
|
140
|
+
Average sale : $81.87
|
|
141
|
+
Standard deviation: $0.00
|
|
142
|
+
|
|
143
|
+
| INFO | P01 | (2, 101.58, 'Water Bottle', 'North')
|
|
144
|
+
| INFO | P01 | Generated formatted multi-line SUMMARY string.
|
|
145
|
+
| INFO | P01 | Returning the str to the calling function.
|
|
146
|
+
| INFO | P01 |
|
|
147
|
+
Descriptive Statistics for Streaming Sales Amounts ($):
|
|
148
|
+
Count of sales : 2
|
|
149
|
+
Minimum sale : $81.87
|
|
150
|
+
Maximum sale : $101.58
|
|
151
|
+
Average sale : $91.72
|
|
152
|
+
Standard deviation: $13.94
|
|
153
|
+
|
|
154
|
+
| INFO | P01 | (3, 27.15, 'Running Shoes', 'East')
|
|
155
|
+
| INFO | P01 | Generated formatted multi-line SUMMARY string.
|
|
156
|
+
| INFO | P01 | Returning the str to the calling function.
|
|
157
|
+
| INFO | P01 |
|
|
158
|
+
Descriptive Statistics for Streaming Sales Amounts ($):
|
|
159
|
+
Count of sales : 3
|
|
160
|
+
Minimum sale : $27.15
|
|
161
|
+
Maximum sale : $101.58
|
|
162
|
+
Average sale : $70.20
|
|
163
|
+
Standard deviation: $38.56
|
|
164
|
+
|
|
165
|
+
| INFO | P01 | ========================
|
|
166
|
+
| INFO | P01 | Producer executed successfully!
|
|
167
|
+
| INFO | P01 | ========================
|
|
168
|
+
```
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
datafun_streaming/__init__.py,sha256=fWc236jQhyvJ_PJQdxZtZ5n750F5Toj4qIMX6__xHgg,83
|
|
2
|
+
datafun_streaming/_version.py,sha256=n_5vdJsPNu7wZ57LGuRL585uvll-hiuvZUBWzdG0RQU,520
|
|
3
|
+
datafun_streaming/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
datafun_streaming/core/__init__.py,sha256=q_nZswj8VsvIVjHUKRCI4RE0djGcuf9Gtkodcmz-Z8g,25
|
|
5
|
+
datafun_streaming/core/types.py,sha256=HDdIFHEheXcihYJqmuAcfGjJiihn0y1bpsBAhOh7v0M,419
|
|
6
|
+
datafun_streaming/data_validation/__init__.py,sha256=4T0-MWVJSJ3NoVbaqe7pVYeZvhkWyLeaSvuqvISNjfU,84
|
|
7
|
+
datafun_streaming/data_validation/errors.py,sha256=zepbDg8YoPYUaPddqDfkuOaukD6YXRT49cxgYWAoT8s,758
|
|
8
|
+
datafun_streaming/data_validation/reference.py,sha256=BpEYMQxFvcIm7pzeu8iE8TS8pB-F7scLjovtK1cK8xE,1914
|
|
9
|
+
datafun_streaming/data_validation/types.py,sha256=OJL35s0TLl3w-8BYJhaLrXACaCjT229nzqkxU1kNgFk,921
|
|
10
|
+
datafun_streaming/data_validation/validation_utils.py,sha256=dU6SPNEld2ConD9L_N0ahVrf5sHmdSehIXhhNB8n_Eg,3889
|
|
11
|
+
datafun_streaming/io/__init__.py,sha256=T5cpnhwM00xH-kAQen_rQjVtY-Q_jpbnEbXUl2wz524,63
|
|
12
|
+
datafun_streaming/io/errors.py,sha256=FtrooZTpv3ppzuQbLbSEEWs2cfNiTaNs4CRE-rGyfS8,1187
|
|
13
|
+
datafun_streaming/io/io_utils.py,sha256=sL4oDfwHhSfRKoXuHcII4MrFAVxwWy1W28yvc9AFu_o,2882
|
|
14
|
+
datafun_streaming/kafka/__init__.py,sha256=xbhP9Kt6Gu6rVewpG9TyLflpEymPrK2u92aUWxo_xCw,65
|
|
15
|
+
datafun_streaming/kafka/errors.py,sha256=nAQU5-hNHlFlhB8VMaU0E1SmEiWLV-suEkTvyJa45LY,4511
|
|
16
|
+
datafun_streaming/kafka/kafka_admin_utils.py,sha256=WEHlAvzpLIAjpQDYrHH_ZozVkdMbn2x1r3-Z2vTrxBQ,6183
|
|
17
|
+
datafun_streaming/kafka/kafka_connection_utils.py,sha256=XhO9SU2_HVPaABJgqEky3U-6JG3mdeRbfIPf_mqZQQc,1297
|
|
18
|
+
datafun_streaming/kafka/kafka_consumer_utils.py,sha256=rDynWwpL4fOaHZGI46jdxNOLPU5ZFIrYGuhC0oVpI54,1456
|
|
19
|
+
datafun_streaming/kafka/kafka_producer_utils.py,sha256=6MAIUpGd8qtPIJbtKA1DIdfSbLC9nBb8ntmbB7unik0,2489
|
|
20
|
+
datafun_streaming/kafka/kafka_settings.py,sha256=f4rX8G7OSCcFUnG8qLQ7QRkaalLKUeHgq3HO9Jh5z7A,2108
|
|
21
|
+
datafun_streaming/stats/__init__.py,sha256=njpyz4wW4oDhx_XYP-ETRoSPPhzf8EQqqh9FdK500tA,57
|
|
22
|
+
datafun_streaming/stats/stats_utils.py,sha256=uUHQqsUogof4_Kubm4sZLZBaw35yfnBq18jA-SfmK8g,2928
|
|
23
|
+
datafun_streaming/storage/__init__.py,sha256=Iqv6QCzUnEkYCR7Kf1edqU5jP9Oa6mh3HqmmTLI93No,47
|
|
24
|
+
datafun_streaming/storage/duckdb_utils.py,sha256=3es14fNzXsRT5K7IxHjfB2csK3Hu3-K-Xu0PkmgrR5o,6787
|
|
25
|
+
datafun_streaming/visualization/__init__.py,sha256=mBLBuDKyEI1DqzvqbQ5F0Cu7kzfF9LQ2ChfpJQRLkiA,48
|
|
26
|
+
datafun_streaming/visualization/chart_utils.py,sha256=q1jMh0835uM1LzorNWvSXpi4fUsz5ikUrEi9D-CUkFk,3676
|
|
27
|
+
datafun_streaming-0.1.0.dist-info/METADATA,sha256=V8hlav5pf51c3Yfsg3rdhYVXBUaMygqZGeBeY43EfXE,6427
|
|
28
|
+
datafun_streaming-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
29
|
+
datafun_streaming-0.1.0.dist-info/licenses/LICENSE,sha256=Xs6IcjtQ_6V_-e6sSH0xOsGwgZ1zwlqOz12Tx4VMXvU,1068
|
|
30
|
+
datafun_streaming-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Denise Case
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|