prismiq 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- prismiq/__init__.py +543 -0
- prismiq/api.py +1889 -0
- prismiq/auth.py +108 -0
- prismiq/cache.py +527 -0
- prismiq/calculated_field_processor.py +231 -0
- prismiq/calculated_fields.py +819 -0
- prismiq/dashboard_store.py +1219 -0
- prismiq/dashboards.py +374 -0
- prismiq/dates.py +247 -0
- prismiq/engine.py +1315 -0
- prismiq/executor.py +345 -0
- prismiq/filter_merge.py +397 -0
- prismiq/formatting.py +298 -0
- prismiq/logging.py +489 -0
- prismiq/metrics.py +536 -0
- prismiq/middleware.py +346 -0
- prismiq/permissions.py +87 -0
- prismiq/persistence/__init__.py +45 -0
- prismiq/persistence/models.py +208 -0
- prismiq/persistence/postgres_store.py +1119 -0
- prismiq/persistence/saved_query_store.py +336 -0
- prismiq/persistence/schema.sql +95 -0
- prismiq/persistence/setup.py +222 -0
- prismiq/persistence/tables.py +76 -0
- prismiq/pins.py +72 -0
- prismiq/py.typed +0 -0
- prismiq/query.py +1233 -0
- prismiq/schema.py +333 -0
- prismiq/schema_config.py +354 -0
- prismiq/sql_utils.py +147 -0
- prismiq/sql_validator.py +219 -0
- prismiq/sqlalchemy_builder.py +577 -0
- prismiq/timeseries.py +410 -0
- prismiq/transforms.py +471 -0
- prismiq/trends.py +573 -0
- prismiq/types.py +688 -0
- prismiq-0.1.0.dist-info/METADATA +109 -0
- prismiq-0.1.0.dist-info/RECORD +39 -0
- prismiq-0.1.0.dist-info/WHEEL +4 -0
prismiq/timeseries.py
ADDED
|
@@ -0,0 +1,410 @@
|
|
|
1
|
+
"""Time series bucketing utilities for Prismiq analytics.
|
|
2
|
+
|
|
3
|
+
This module provides utilities for grouping data by time intervals,
|
|
4
|
+
generating time buckets, and filling missing data points.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from datetime import datetime, timedelta
|
|
10
|
+
from enum import Enum
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from pydantic import BaseModel, ConfigDict
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class TimeInterval(str, Enum):
|
|
17
|
+
"""Time intervals for bucketing."""
|
|
18
|
+
|
|
19
|
+
MINUTE = "minute"
|
|
20
|
+
HOUR = "hour"
|
|
21
|
+
DAY = "day"
|
|
22
|
+
WEEK = "week"
|
|
23
|
+
MONTH = "month"
|
|
24
|
+
QUARTER = "quarter"
|
|
25
|
+
YEAR = "year"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class TimeBucket(BaseModel):
|
|
29
|
+
"""A time bucket with start and end times."""
|
|
30
|
+
|
|
31
|
+
model_config = ConfigDict(strict=True)
|
|
32
|
+
|
|
33
|
+
start: datetime
|
|
34
|
+
"""Start of the time bucket (inclusive)."""
|
|
35
|
+
|
|
36
|
+
end: datetime
|
|
37
|
+
"""End of the time bucket (exclusive for next bucket)."""
|
|
38
|
+
|
|
39
|
+
label: str
|
|
40
|
+
"""Human-readable label like 'Jan 2024'."""
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def get_date_trunc_sql(interval: TimeInterval, column: str) -> str:
|
|
44
|
+
"""Generate PostgreSQL date_trunc expression.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
interval: Time interval for truncation.
|
|
48
|
+
column: Column name to truncate.
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
SQL expression like: date_trunc('day', "order_date")
|
|
52
|
+
|
|
53
|
+
Example:
|
|
54
|
+
>>> get_date_trunc_sql(TimeInterval.DAY, "order_date")
|
|
55
|
+
'date_trunc(\\'day\\', "order_date")'
|
|
56
|
+
"""
|
|
57
|
+
# Quote the column name to prevent SQL injection
|
|
58
|
+
escaped_column = column.replace('"', '""')
|
|
59
|
+
quoted_column = f'"{escaped_column}"'
|
|
60
|
+
|
|
61
|
+
return f"date_trunc('{interval.value}', {quoted_column})"
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def get_interval_format(interval: TimeInterval) -> str:
|
|
65
|
+
"""Get the appropriate date format string for the interval.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
interval: Time interval.
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
Format string suitable for datetime.strftime().
|
|
72
|
+
Note: Quarter format returns a special marker that needs post-processing.
|
|
73
|
+
|
|
74
|
+
Example:
|
|
75
|
+
>>> get_interval_format(TimeInterval.DAY)
|
|
76
|
+
'%Y-%m-%d'
|
|
77
|
+
"""
|
|
78
|
+
format_map = {
|
|
79
|
+
TimeInterval.MINUTE: "%Y-%m-%d %H:%M",
|
|
80
|
+
TimeInterval.HOUR: "%Y-%m-%d %H:00",
|
|
81
|
+
TimeInterval.DAY: "%Y-%m-%d",
|
|
82
|
+
TimeInterval.WEEK: "%Y-W%W",
|
|
83
|
+
TimeInterval.MONTH: "%Y-%m",
|
|
84
|
+
TimeInterval.QUARTER: "%Y-Q%q", # Special marker, needs post-processing
|
|
85
|
+
TimeInterval.YEAR: "%Y",
|
|
86
|
+
}
|
|
87
|
+
return format_map[interval]
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _format_bucket_label(dt: datetime, interval: TimeInterval) -> str:
|
|
91
|
+
"""Format a datetime as a human-readable bucket label.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
dt: Datetime to format.
|
|
95
|
+
interval: Time interval for context.
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
Human-readable label.
|
|
99
|
+
"""
|
|
100
|
+
if interval == TimeInterval.MINUTE:
|
|
101
|
+
return dt.strftime("%b %d, %H:%M")
|
|
102
|
+
|
|
103
|
+
if interval == TimeInterval.HOUR:
|
|
104
|
+
return dt.strftime("%b %d, %H:00")
|
|
105
|
+
|
|
106
|
+
if interval == TimeInterval.DAY:
|
|
107
|
+
return dt.strftime("%b %d")
|
|
108
|
+
|
|
109
|
+
if interval == TimeInterval.WEEK:
|
|
110
|
+
# ISO week number
|
|
111
|
+
week_num = dt.isocalendar()[1]
|
|
112
|
+
return f"Week {week_num}, {dt.year}"
|
|
113
|
+
|
|
114
|
+
if interval == TimeInterval.MONTH:
|
|
115
|
+
return dt.strftime("%b %Y")
|
|
116
|
+
|
|
117
|
+
if interval == TimeInterval.QUARTER:
|
|
118
|
+
quarter = (dt.month - 1) // 3 + 1
|
|
119
|
+
return f"Q{quarter} {dt.year}"
|
|
120
|
+
|
|
121
|
+
if interval == TimeInterval.YEAR:
|
|
122
|
+
return str(dt.year)
|
|
123
|
+
|
|
124
|
+
# Fallback
|
|
125
|
+
return dt.isoformat()
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _truncate_datetime(dt: datetime, interval: TimeInterval) -> datetime:
|
|
129
|
+
"""Truncate datetime to the start of the given interval.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
dt: Datetime to truncate.
|
|
133
|
+
interval: Interval to truncate to.
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
Truncated datetime.
|
|
137
|
+
"""
|
|
138
|
+
# Remove timezone info for consistent handling
|
|
139
|
+
if dt.tzinfo is not None:
|
|
140
|
+
dt = dt.replace(tzinfo=None)
|
|
141
|
+
|
|
142
|
+
if interval == TimeInterval.MINUTE:
|
|
143
|
+
return dt.replace(second=0, microsecond=0)
|
|
144
|
+
|
|
145
|
+
if interval == TimeInterval.HOUR:
|
|
146
|
+
return dt.replace(minute=0, second=0, microsecond=0)
|
|
147
|
+
|
|
148
|
+
if interval == TimeInterval.DAY:
|
|
149
|
+
return dt.replace(hour=0, minute=0, second=0, microsecond=0)
|
|
150
|
+
|
|
151
|
+
if interval == TimeInterval.WEEK:
|
|
152
|
+
# Week starts on Monday
|
|
153
|
+
days_since_monday = dt.weekday()
|
|
154
|
+
week_start = dt - timedelta(days=days_since_monday)
|
|
155
|
+
return week_start.replace(hour=0, minute=0, second=0, microsecond=0)
|
|
156
|
+
|
|
157
|
+
if interval == TimeInterval.MONTH:
|
|
158
|
+
return dt.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
|
|
159
|
+
|
|
160
|
+
if interval == TimeInterval.QUARTER:
|
|
161
|
+
quarter = (dt.month - 1) // 3
|
|
162
|
+
quarter_start_month = quarter * 3 + 1
|
|
163
|
+
return dt.replace(
|
|
164
|
+
month=quarter_start_month, day=1, hour=0, minute=0, second=0, microsecond=0
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
if interval == TimeInterval.YEAR:
|
|
168
|
+
return dt.replace(month=1, day=1, hour=0, minute=0, second=0, microsecond=0)
|
|
169
|
+
|
|
170
|
+
return dt
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def _get_next_bucket_start(dt: datetime, interval: TimeInterval) -> datetime:
|
|
174
|
+
"""Get the start of the next bucket after the given datetime.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
dt: Current bucket start.
|
|
178
|
+
interval: Time interval.
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
Start of the next bucket.
|
|
182
|
+
"""
|
|
183
|
+
if interval == TimeInterval.MINUTE:
|
|
184
|
+
return dt + timedelta(minutes=1)
|
|
185
|
+
|
|
186
|
+
if interval == TimeInterval.HOUR:
|
|
187
|
+
return dt + timedelta(hours=1)
|
|
188
|
+
|
|
189
|
+
if interval == TimeInterval.DAY:
|
|
190
|
+
return dt + timedelta(days=1)
|
|
191
|
+
|
|
192
|
+
if interval == TimeInterval.WEEK:
|
|
193
|
+
return dt + timedelta(weeks=1)
|
|
194
|
+
|
|
195
|
+
if interval == TimeInterval.MONTH:
|
|
196
|
+
# Move to next month
|
|
197
|
+
if dt.month == 12:
|
|
198
|
+
return dt.replace(year=dt.year + 1, month=1)
|
|
199
|
+
return dt.replace(month=dt.month + 1)
|
|
200
|
+
|
|
201
|
+
if interval == TimeInterval.QUARTER:
|
|
202
|
+
# Move to next quarter (3 months)
|
|
203
|
+
new_month = dt.month + 3
|
|
204
|
+
if new_month > 12:
|
|
205
|
+
return dt.replace(year=dt.year + 1, month=new_month - 12)
|
|
206
|
+
return dt.replace(month=new_month)
|
|
207
|
+
|
|
208
|
+
if interval == TimeInterval.YEAR:
|
|
209
|
+
return dt.replace(year=dt.year + 1)
|
|
210
|
+
|
|
211
|
+
return dt + timedelta(days=1)
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def _get_bucket_end(bucket_start: datetime, interval: TimeInterval) -> datetime:
|
|
215
|
+
"""Get the end datetime for a bucket (last moment before next bucket).
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
bucket_start: Start of the bucket.
|
|
219
|
+
interval: Time interval.
|
|
220
|
+
|
|
221
|
+
Returns:
|
|
222
|
+
End of the bucket (last microsecond before next bucket).
|
|
223
|
+
"""
|
|
224
|
+
next_start = _get_next_bucket_start(bucket_start, interval)
|
|
225
|
+
# End is one microsecond before the next bucket starts
|
|
226
|
+
return next_start - timedelta(microseconds=1)
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def generate_time_buckets(
|
|
230
|
+
start: datetime,
|
|
231
|
+
end: datetime,
|
|
232
|
+
interval: TimeInterval,
|
|
233
|
+
) -> list[TimeBucket]:
|
|
234
|
+
"""Generate all time buckets between start and end.
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
start: Start datetime (inclusive).
|
|
238
|
+
end: End datetime (inclusive).
|
|
239
|
+
interval: Time interval for bucketing.
|
|
240
|
+
|
|
241
|
+
Returns:
|
|
242
|
+
List of TimeBucket objects covering the range.
|
|
243
|
+
|
|
244
|
+
Example:
|
|
245
|
+
>>> from datetime import datetime
|
|
246
|
+
>>> start = datetime(2024, 1, 1)
|
|
247
|
+
>>> end = datetime(2024, 1, 3)
|
|
248
|
+
>>> buckets = generate_time_buckets(start, end, TimeInterval.DAY)
|
|
249
|
+
>>> len(buckets)
|
|
250
|
+
3
|
|
251
|
+
"""
|
|
252
|
+
# Handle timezone-aware datetimes by converting to naive
|
|
253
|
+
if start.tzinfo is not None:
|
|
254
|
+
start = start.replace(tzinfo=None)
|
|
255
|
+
if end.tzinfo is not None:
|
|
256
|
+
end = end.replace(tzinfo=None)
|
|
257
|
+
|
|
258
|
+
# Truncate start to the beginning of its interval
|
|
259
|
+
current = _truncate_datetime(start, interval)
|
|
260
|
+
|
|
261
|
+
buckets: list[TimeBucket] = []
|
|
262
|
+
|
|
263
|
+
while current <= end:
|
|
264
|
+
bucket_end = _get_bucket_end(current, interval)
|
|
265
|
+
label = _format_bucket_label(current, interval)
|
|
266
|
+
|
|
267
|
+
buckets.append(
|
|
268
|
+
TimeBucket(
|
|
269
|
+
start=current,
|
|
270
|
+
end=bucket_end,
|
|
271
|
+
label=label,
|
|
272
|
+
)
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
current = _get_next_bucket_start(current, interval)
|
|
276
|
+
|
|
277
|
+
return buckets
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def fill_missing_buckets(
|
|
281
|
+
data: list[dict[str, Any]],
|
|
282
|
+
date_column: str,
|
|
283
|
+
buckets: list[TimeBucket],
|
|
284
|
+
fill_value: Any = 0,
|
|
285
|
+
) -> list[dict[str, Any]]:
|
|
286
|
+
"""Fill missing time buckets with default values.
|
|
287
|
+
|
|
288
|
+
Takes query result data and fills in missing time periods with
|
|
289
|
+
default values for numeric columns.
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
data: List of row dictionaries from query result.
|
|
293
|
+
date_column: Name of the date/datetime column.
|
|
294
|
+
buckets: List of time buckets to ensure coverage.
|
|
295
|
+
fill_value: Value to use for missing numeric data (default: 0).
|
|
296
|
+
|
|
297
|
+
Returns:
|
|
298
|
+
List of row dictionaries with missing buckets filled.
|
|
299
|
+
|
|
300
|
+
Example:
|
|
301
|
+
>>> data = [
|
|
302
|
+
... {"date": datetime(2024, 1, 1), "sales": 100},
|
|
303
|
+
... {"date": datetime(2024, 1, 3), "sales": 150},
|
|
304
|
+
... ]
|
|
305
|
+
>>> buckets = generate_time_buckets(
|
|
306
|
+
... datetime(2024, 1, 1), datetime(2024, 1, 3), TimeInterval.DAY
|
|
307
|
+
... )
|
|
308
|
+
>>> filled = fill_missing_buckets(data, "date", buckets)
|
|
309
|
+
>>> len(filled) # Now includes Jan 2
|
|
310
|
+
3
|
|
311
|
+
"""
|
|
312
|
+
if not buckets:
|
|
313
|
+
return data
|
|
314
|
+
|
|
315
|
+
if not data:
|
|
316
|
+
# No data, create empty rows for all buckets
|
|
317
|
+
return [{date_column: bucket.start} for bucket in buckets]
|
|
318
|
+
|
|
319
|
+
# Build a map of bucket start -> existing data rows
|
|
320
|
+
bucket_data: dict[datetime, list[dict[str, Any]]] = {}
|
|
321
|
+
|
|
322
|
+
# Get all columns from first data row for template
|
|
323
|
+
template_row = data[0]
|
|
324
|
+
all_columns = list(template_row.keys())
|
|
325
|
+
|
|
326
|
+
# Determine which interval we're using based on bucket size
|
|
327
|
+
# (we need this to truncate data dates properly)
|
|
328
|
+
if len(buckets) >= 2:
|
|
329
|
+
diff = buckets[1].start - buckets[0].start
|
|
330
|
+
if diff <= timedelta(minutes=1):
|
|
331
|
+
interval = TimeInterval.MINUTE
|
|
332
|
+
elif diff <= timedelta(hours=1):
|
|
333
|
+
interval = TimeInterval.HOUR
|
|
334
|
+
elif diff <= timedelta(days=1):
|
|
335
|
+
interval = TimeInterval.DAY
|
|
336
|
+
elif diff <= timedelta(weeks=1):
|
|
337
|
+
interval = TimeInterval.WEEK
|
|
338
|
+
elif diff <= timedelta(days=32):
|
|
339
|
+
interval = TimeInterval.MONTH
|
|
340
|
+
elif diff <= timedelta(days=100):
|
|
341
|
+
interval = TimeInterval.QUARTER
|
|
342
|
+
else:
|
|
343
|
+
interval = TimeInterval.YEAR
|
|
344
|
+
else:
|
|
345
|
+
# Single bucket, guess from bucket duration
|
|
346
|
+
diff = buckets[0].end - buckets[0].start
|
|
347
|
+
if diff <= timedelta(minutes=1):
|
|
348
|
+
interval = TimeInterval.MINUTE
|
|
349
|
+
elif diff <= timedelta(hours=1):
|
|
350
|
+
interval = TimeInterval.HOUR
|
|
351
|
+
elif diff <= timedelta(days=1):
|
|
352
|
+
interval = TimeInterval.DAY
|
|
353
|
+
elif diff <= timedelta(weeks=1):
|
|
354
|
+
interval = TimeInterval.WEEK
|
|
355
|
+
elif diff <= timedelta(days=32):
|
|
356
|
+
interval = TimeInterval.MONTH
|
|
357
|
+
elif diff <= timedelta(days=100):
|
|
358
|
+
interval = TimeInterval.QUARTER
|
|
359
|
+
else:
|
|
360
|
+
interval = TimeInterval.YEAR
|
|
361
|
+
|
|
362
|
+
# Map existing data to buckets
|
|
363
|
+
for row in data:
|
|
364
|
+
date_val = row.get(date_column)
|
|
365
|
+
if date_val is None:
|
|
366
|
+
continue
|
|
367
|
+
|
|
368
|
+
# Convert to datetime if it's a date
|
|
369
|
+
if hasattr(date_val, "hour"):
|
|
370
|
+
dt = date_val
|
|
371
|
+
else:
|
|
372
|
+
# It's a date, convert to datetime
|
|
373
|
+
dt = datetime.combine(date_val, datetime.min.time())
|
|
374
|
+
|
|
375
|
+
# Handle timezone
|
|
376
|
+
if hasattr(dt, "tzinfo") and dt.tzinfo is not None:
|
|
377
|
+
dt = dt.replace(tzinfo=None)
|
|
378
|
+
|
|
379
|
+
# Truncate to bucket start
|
|
380
|
+
bucket_start = _truncate_datetime(dt, interval)
|
|
381
|
+
|
|
382
|
+
if bucket_start not in bucket_data:
|
|
383
|
+
bucket_data[bucket_start] = []
|
|
384
|
+
bucket_data[bucket_start].append(row)
|
|
385
|
+
|
|
386
|
+
# Build result with all buckets
|
|
387
|
+
result: list[dict[str, Any]] = []
|
|
388
|
+
|
|
389
|
+
for bucket in buckets:
|
|
390
|
+
existing_rows = bucket_data.get(bucket.start, [])
|
|
391
|
+
|
|
392
|
+
if existing_rows:
|
|
393
|
+
# Use existing data
|
|
394
|
+
result.extend(existing_rows)
|
|
395
|
+
else:
|
|
396
|
+
# Create a filled row
|
|
397
|
+
filled_row: dict[str, Any] = {}
|
|
398
|
+
for col in all_columns:
|
|
399
|
+
if col == date_column:
|
|
400
|
+
filled_row[col] = bucket.start
|
|
401
|
+
else:
|
|
402
|
+
# Check if original column was numeric
|
|
403
|
+
sample_value = template_row.get(col)
|
|
404
|
+
if isinstance(sample_value, int | float):
|
|
405
|
+
filled_row[col] = fill_value
|
|
406
|
+
else:
|
|
407
|
+
filled_row[col] = None
|
|
408
|
+
result.append(filled_row)
|
|
409
|
+
|
|
410
|
+
return result
|