prismiq 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
prismiq/trends.py ADDED
@@ -0,0 +1,573 @@
1
+ """Trend calculation utilities for Prismiq analytics.
2
+
3
+ This module provides utilities for calculating trends, period-over-
4
+ period comparisons, and moving averages.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import contextlib
10
+ from collections import defaultdict
11
+ from datetime import date, datetime, timedelta
12
+ from enum import Enum
13
+ from typing import Any
14
+
15
+ from pydantic import BaseModel, ConfigDict
16
+
17
+ from prismiq.types import QueryResult
18
+
19
+
20
+ class TrendDirection(str, Enum):
21
+ """Direction of a trend."""
22
+
23
+ UP = "up"
24
+ DOWN = "down"
25
+ FLAT = "flat"
26
+
27
+
28
+ class TrendResult(BaseModel):
29
+ """Result of a trend calculation."""
30
+
31
+ model_config = ConfigDict(strict=True)
32
+
33
+ current_value: float
34
+ """Current period value."""
35
+
36
+ previous_value: float | None
37
+ """Previous period value (None if no comparison available)."""
38
+
39
+ absolute_change: float | None
40
+ """Absolute difference (current - previous)."""
41
+
42
+ percent_change: float | None
43
+ """Percentage change ((current - previous) / previous * 100)."""
44
+
45
+ direction: TrendDirection
46
+ """Direction of the trend."""
47
+
48
+
49
+ class ComparisonPeriod(str, Enum):
50
+ """Period for comparison."""
51
+
52
+ PREVIOUS_PERIOD = "previous_period" # Same length as current
53
+ PREVIOUS_YEAR = "previous_year" # Same period last year
54
+ PREVIOUS_MONTH = "previous_month"
55
+ PREVIOUS_WEEK = "previous_week"
56
+
57
+
58
+ def calculate_trend(
59
+ current: float | None,
60
+ previous: float | None,
61
+ threshold: float = 0.001,
62
+ ) -> TrendResult:
63
+ """Calculate trend between two values.
64
+
65
+ Args:
66
+ current: Current period value.
67
+ previous: Previous period value.
68
+ threshold: Changes smaller than this percentage are considered "flat".
69
+
70
+ Returns:
71
+ TrendResult with change calculations and direction.
72
+
73
+ Example:
74
+ >>> result = calculate_trend(150, 100)
75
+ >>> result.percent_change
76
+ 50.0
77
+ >>> result.direction
78
+ TrendDirection.UP
79
+ """
80
+ # Handle None current value
81
+ if current is None:
82
+ current = 0.0
83
+
84
+ # Calculate changes
85
+ if previous is None:
86
+ return TrendResult(
87
+ current_value=current,
88
+ previous_value=None,
89
+ absolute_change=None,
90
+ percent_change=None,
91
+ direction=TrendDirection.FLAT,
92
+ )
93
+
94
+ absolute_change = current - previous
95
+
96
+ # Calculate percent change (handle division by zero)
97
+ if previous == 0:
98
+ if current == 0:
99
+ percent_change = 0.0
100
+ elif current > 0:
101
+ percent_change = 100.0 # Treat as 100% increase from zero
102
+ else:
103
+ percent_change = -100.0 # Treat as 100% decrease
104
+ else:
105
+ percent_change = (absolute_change / abs(previous)) * 100
106
+
107
+ # Determine direction
108
+ if abs(percent_change) < threshold * 100: # threshold is a ratio, not percent
109
+ direction = TrendDirection.FLAT
110
+ elif percent_change > 0:
111
+ direction = TrendDirection.UP
112
+ else:
113
+ direction = TrendDirection.DOWN
114
+
115
+ return TrendResult(
116
+ current_value=current,
117
+ previous_value=previous,
118
+ absolute_change=absolute_change,
119
+ percent_change=percent_change,
120
+ direction=direction,
121
+ )
122
+
123
+
124
+ def _get_comparison_date_range(
125
+ current_start: date,
126
+ current_end: date,
127
+ comparison: ComparisonPeriod,
128
+ ) -> tuple[date, date]:
129
+ """Calculate the comparison period date range.
130
+
131
+ Args:
132
+ current_start: Start of current period.
133
+ current_end: End of current period.
134
+ comparison: Type of comparison.
135
+
136
+ Returns:
137
+ Tuple of (previous_start, previous_end).
138
+ """
139
+ period_length = (current_end - current_start).days + 1
140
+
141
+ if comparison == ComparisonPeriod.PREVIOUS_PERIOD:
142
+ # Same length, immediately before
143
+ prev_end = current_start - timedelta(days=1)
144
+ prev_start = prev_end - timedelta(days=period_length - 1)
145
+ return prev_start, prev_end
146
+
147
+ if comparison == ComparisonPeriod.PREVIOUS_YEAR:
148
+ # Same dates, one year earlier
149
+ try:
150
+ prev_start = current_start.replace(year=current_start.year - 1)
151
+ prev_end = current_end.replace(year=current_end.year - 1)
152
+ except ValueError:
153
+ # Handle Feb 29 in non-leap year
154
+ prev_start = current_start.replace(year=current_start.year - 1, day=28)
155
+ prev_end = current_end.replace(year=current_end.year - 1, day=28)
156
+ return prev_start, prev_end
157
+
158
+ if comparison == ComparisonPeriod.PREVIOUS_MONTH:
159
+ # One month earlier
160
+ if current_start.month == 1:
161
+ prev_start = current_start.replace(year=current_start.year - 1, month=12)
162
+ else:
163
+ # Handle day overflow (e.g., Mar 31 -> Feb 28)
164
+ try:
165
+ prev_start = current_start.replace(month=current_start.month - 1)
166
+ except ValueError:
167
+ # Day doesn't exist in previous month
168
+ prev_start = current_start.replace(month=current_start.month - 1, day=1)
169
+ # Move to last day of that month
170
+ if prev_start.month == 12:
171
+ next_month = prev_start.replace(year=prev_start.year + 1, month=1)
172
+ else:
173
+ next_month = prev_start.replace(month=prev_start.month + 1)
174
+ prev_start = next_month - timedelta(days=1)
175
+
176
+ if current_end.month == 1:
177
+ prev_end = current_end.replace(year=current_end.year - 1, month=12)
178
+ else:
179
+ try:
180
+ prev_end = current_end.replace(month=current_end.month - 1)
181
+ except ValueError:
182
+ prev_end = current_end.replace(month=current_end.month - 1, day=1)
183
+ if prev_end.month == 12:
184
+ next_month = prev_end.replace(year=prev_end.year + 1, month=1)
185
+ else:
186
+ next_month = prev_end.replace(month=prev_end.month + 1)
187
+ prev_end = next_month - timedelta(days=1)
188
+
189
+ return prev_start, prev_end
190
+
191
+ if comparison == ComparisonPeriod.PREVIOUS_WEEK:
192
+ # One week earlier
193
+ prev_start = current_start - timedelta(weeks=1)
194
+ prev_end = current_end - timedelta(weeks=1)
195
+ return prev_start, prev_end
196
+
197
+ # Default to previous period
198
+ prev_end = current_start - timedelta(days=1)
199
+ prev_start = prev_end - timedelta(days=period_length - 1)
200
+ return prev_start, prev_end
201
+
202
+
203
+ def calculate_period_comparison(
204
+ result: QueryResult,
205
+ date_column: str,
206
+ value_column: str,
207
+ comparison: ComparisonPeriod,
208
+ current_start: date,
209
+ current_end: date,
210
+ ) -> TrendResult:
211
+ """Calculate trend comparing current period to comparison period.
212
+
213
+ Args:
214
+ result: Query result containing date and value columns.
215
+ date_column: Name of the date column.
216
+ value_column: Name of the value column.
217
+ comparison: Type of period comparison.
218
+ current_start: Start of current period.
219
+ current_end: End of current period.
220
+
221
+ Returns:
222
+ TrendResult with period comparison.
223
+ """
224
+ if not result.rows:
225
+ return TrendResult(
226
+ current_value=0,
227
+ previous_value=None,
228
+ absolute_change=None,
229
+ percent_change=None,
230
+ direction=TrendDirection.FLAT,
231
+ )
232
+
233
+ try:
234
+ date_idx = result.columns.index(date_column)
235
+ value_idx = result.columns.index(value_column)
236
+ except ValueError as e:
237
+ raise ValueError(f"Column not found: {e}") from e
238
+
239
+ # Calculate comparison period
240
+ prev_start, prev_end = _get_comparison_date_range(current_start, current_end, comparison)
241
+
242
+ # Sum values for each period
243
+ current_sum = 0.0
244
+ previous_sum = 0.0
245
+
246
+ for row in result.rows:
247
+ date_val = row[date_idx]
248
+ value = row[value_idx]
249
+
250
+ if date_val is None or value is None:
251
+ continue
252
+
253
+ # Convert to date if datetime
254
+ if isinstance(date_val, datetime):
255
+ row_date = date_val.date()
256
+ elif isinstance(date_val, date):
257
+ row_date = date_val
258
+ else:
259
+ continue
260
+
261
+ try:
262
+ float_value = float(value)
263
+ except (ValueError, TypeError):
264
+ continue
265
+
266
+ if current_start <= row_date <= current_end:
267
+ current_sum += float_value
268
+ elif prev_start <= row_date <= prev_end:
269
+ previous_sum += float_value
270
+
271
+ return calculate_trend(current_sum, previous_sum if previous_sum != 0 else None)
272
+
273
+
274
+ def add_trend_column(
275
+ result: QueryResult,
276
+ value_column: str,
277
+ order_column: str,
278
+ group_column: str | None = None,
279
+ ) -> QueryResult:
280
+ """Add columns for trend calculation to each row.
281
+
282
+ Adds: {value_column}_prev, {value_column}_change, {value_column}_pct_change
283
+
284
+ Args:
285
+ result: Query result to process.
286
+ value_column: Column containing values.
287
+ order_column: Column to order by (for determining previous row).
288
+ group_column: Column to group by (calculate trends within groups).
289
+
290
+ Returns:
291
+ New QueryResult with trend columns added.
292
+ """
293
+ if not result.rows:
294
+ return QueryResult(
295
+ columns=[
296
+ *result.columns,
297
+ f"{value_column}_prev",
298
+ f"{value_column}_change",
299
+ f"{value_column}_pct_change",
300
+ ],
301
+ column_types=[*result.column_types, "numeric", "numeric", "numeric"],
302
+ rows=[],
303
+ row_count=0,
304
+ truncated=False,
305
+ execution_time_ms=0,
306
+ )
307
+
308
+ try:
309
+ value_idx = result.columns.index(value_column)
310
+ order_idx = result.columns.index(order_column)
311
+ except ValueError as e:
312
+ raise ValueError(f"Column not found: {e}") from e
313
+
314
+ group_idx = None
315
+ if group_column is not None:
316
+ try:
317
+ group_idx = result.columns.index(group_column)
318
+ except ValueError as e:
319
+ raise ValueError(f"Group column not found: {e}") from e
320
+
321
+ # Create indexed rows and sort
322
+ indexed_rows = list(enumerate(result.rows))
323
+ indexed_rows.sort(key=lambda x: (x[1][order_idx] or 0))
324
+
325
+ # Calculate previous values per group
326
+ previous_values: dict[Any, float | None] = defaultdict(lambda: None)
327
+ row_trends: dict[int, tuple[float | None, float | None, float | None]] = {}
328
+
329
+ for original_idx, row in indexed_rows:
330
+ group_key = row[group_idx] if group_idx is not None else "__all__"
331
+ current = row[value_idx]
332
+ previous = previous_values[group_key]
333
+
334
+ if current is not None:
335
+ try:
336
+ current_float = float(current)
337
+ except (ValueError, TypeError):
338
+ current_float = None
339
+ else:
340
+ current_float = None
341
+
342
+ # Calculate trend
343
+ if current_float is not None and previous is not None:
344
+ change = current_float - previous
345
+ if previous != 0:
346
+ pct_change = (change / abs(previous)) * 100
347
+ else:
348
+ pct_change = 100.0 if current_float > 0 else (-100.0 if current_float < 0 else 0.0)
349
+ else:
350
+ change = None
351
+ pct_change = None
352
+
353
+ row_trends[original_idx] = (previous, change, pct_change)
354
+
355
+ # Update previous value for group
356
+ if current_float is not None:
357
+ previous_values[group_key] = current_float
358
+
359
+ # Build output with trend columns in original order
360
+ output_rows: list[list[Any]] = []
361
+ for i, row in enumerate(result.rows):
362
+ prev, change, pct = row_trends.get(i, (None, None, None))
363
+ output_rows.append([*row, prev, change, pct])
364
+
365
+ return QueryResult(
366
+ columns=[
367
+ *result.columns,
368
+ f"{value_column}_prev",
369
+ f"{value_column}_change",
370
+ f"{value_column}_pct_change",
371
+ ],
372
+ column_types=[*result.column_types, "numeric", "numeric", "numeric"],
373
+ rows=output_rows,
374
+ row_count=result.row_count,
375
+ truncated=result.truncated,
376
+ execution_time_ms=0,
377
+ )
378
+
379
+
380
+ def calculate_moving_average(
381
+ result: QueryResult,
382
+ value_column: str,
383
+ window: int = 7,
384
+ order_column: str | None = None,
385
+ ) -> QueryResult:
386
+ """Add a moving average column.
387
+
388
+ Args:
389
+ result: Query result to process.
390
+ value_column: Column containing values.
391
+ window: Number of periods for the moving average.
392
+ order_column: Column to order by (uses existing order if None).
393
+
394
+ Returns:
395
+ New QueryResult with moving average column added.
396
+ """
397
+ if not result.rows:
398
+ return QueryResult(
399
+ columns=[*result.columns, f"{value_column}_ma{window}"],
400
+ column_types=[*result.column_types, "numeric"],
401
+ rows=[],
402
+ row_count=0,
403
+ truncated=False,
404
+ execution_time_ms=0,
405
+ )
406
+
407
+ try:
408
+ value_idx = result.columns.index(value_column)
409
+ except ValueError as e:
410
+ raise ValueError(f"Column '{value_column}' not found") from e
411
+
412
+ order_idx = None
413
+ if order_column is not None:
414
+ try:
415
+ order_idx = result.columns.index(order_column)
416
+ except ValueError as e:
417
+ raise ValueError(f"Order column '{order_column}' not found") from e
418
+
419
+ # Create indexed rows and optionally sort
420
+ indexed_rows = list(enumerate(result.rows))
421
+ if order_idx is not None:
422
+ indexed_rows.sort(key=lambda x: (x[1][order_idx] or 0))
423
+
424
+ # Calculate moving averages
425
+ values: list[float] = []
426
+ row_averages: dict[int, float | None] = {}
427
+
428
+ for original_idx, row in indexed_rows:
429
+ val = row[value_idx]
430
+
431
+ if val is not None:
432
+ with contextlib.suppress(ValueError, TypeError):
433
+ values.append(float(val))
434
+
435
+ # Calculate moving average for this position
436
+ if len(values) >= window:
437
+ window_values = values[-window:]
438
+ row_averages[original_idx] = sum(window_values) / len(window_values)
439
+ elif values:
440
+ # Partial window
441
+ row_averages[original_idx] = sum(values) / len(values)
442
+ else:
443
+ row_averages[original_idx] = None
444
+
445
+ # Build output in original order
446
+ output_rows: list[list[Any]] = []
447
+ for i, row in enumerate(result.rows):
448
+ ma = row_averages.get(i)
449
+ output_rows.append([*row, ma])
450
+
451
+ return QueryResult(
452
+ columns=[*result.columns, f"{value_column}_ma{window}"],
453
+ column_types=[*result.column_types, "numeric"],
454
+ rows=output_rows,
455
+ row_count=result.row_count,
456
+ truncated=result.truncated,
457
+ execution_time_ms=0,
458
+ )
459
+
460
+
461
+ def calculate_year_over_year(
462
+ result: QueryResult,
463
+ date_column: str,
464
+ value_column: str,
465
+ ) -> QueryResult:
466
+ """Add year-over-year comparison columns.
467
+
468
+ Adds: {value_column}_prev_year, {value_column}_yoy_change, {value_column}_yoy_pct
469
+
470
+ Args:
471
+ result: Query result to process.
472
+ date_column: Column containing dates.
473
+ value_column: Column containing values.
474
+
475
+ Returns:
476
+ New QueryResult with YoY comparison columns.
477
+ """
478
+ if not result.rows:
479
+ return QueryResult(
480
+ columns=[
481
+ *result.columns,
482
+ f"{value_column}_prev_year",
483
+ f"{value_column}_yoy_change",
484
+ f"{value_column}_yoy_pct",
485
+ ],
486
+ column_types=[*result.column_types, "numeric", "numeric", "numeric"],
487
+ rows=[],
488
+ row_count=0,
489
+ truncated=False,
490
+ execution_time_ms=0,
491
+ )
492
+
493
+ try:
494
+ date_idx = result.columns.index(date_column)
495
+ value_idx = result.columns.index(value_column)
496
+ except ValueError as e:
497
+ raise ValueError(f"Column not found: {e}") from e
498
+
499
+ # Build a map of (month, day or period) -> value for previous year
500
+ # We'll use (month, day) as key for matching
501
+ year_data: dict[int, dict[tuple[int, int], float]] = defaultdict(dict)
502
+
503
+ for row in result.rows:
504
+ date_val = row[date_idx]
505
+ value = row[value_idx]
506
+
507
+ if date_val is None or value is None:
508
+ continue
509
+
510
+ if isinstance(date_val, datetime):
511
+ row_date = date_val.date()
512
+ elif isinstance(date_val, date):
513
+ row_date = date_val
514
+ else:
515
+ continue
516
+
517
+ try:
518
+ float_value = float(value)
519
+ except (ValueError, TypeError):
520
+ continue
521
+
522
+ key = (row_date.month, row_date.day)
523
+ year_data[row_date.year][key] = float_value
524
+
525
+ # Calculate YoY for each row
526
+ output_rows: list[list[Any]] = []
527
+ for row in result.rows:
528
+ date_val = row[date_idx]
529
+ value = row[value_idx]
530
+
531
+ prev_year_val: float | None = None
532
+ yoy_change: float | None = None
533
+ yoy_pct: float | None = None
534
+
535
+ if date_val is not None:
536
+ if isinstance(date_val, datetime):
537
+ row_date = date_val.date()
538
+ elif isinstance(date_val, date):
539
+ row_date = date_val
540
+ else:
541
+ row_date = None # type: ignore[assignment]
542
+
543
+ if row_date is not None:
544
+ key = (row_date.month, row_date.day)
545
+ prev_year = row_date.year - 1
546
+ prev_year_val = year_data.get(prev_year, {}).get(key)
547
+
548
+ if prev_year_val is not None and value is not None:
549
+ try:
550
+ current_float = float(value)
551
+ yoy_change = current_float - prev_year_val
552
+ if prev_year_val != 0:
553
+ yoy_pct = (yoy_change / abs(prev_year_val)) * 100
554
+ else:
555
+ yoy_pct = 100.0 if current_float > 0 else 0.0
556
+ except (ValueError, TypeError):
557
+ pass
558
+
559
+ output_rows.append([*row, prev_year_val, yoy_change, yoy_pct])
560
+
561
+ return QueryResult(
562
+ columns=[
563
+ *result.columns,
564
+ f"{value_column}_prev_year",
565
+ f"{value_column}_yoy_change",
566
+ f"{value_column}_yoy_pct",
567
+ ],
568
+ column_types=[*result.column_types, "numeric", "numeric", "numeric"],
569
+ rows=output_rows,
570
+ row_count=result.row_count,
571
+ truncated=result.truncated,
572
+ execution_time_ms=0,
573
+ )