chrono-correlator 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chrono_correlator-0.1.0/PKG-INFO +86 -0
- chrono_correlator-0.1.0/README.md +57 -0
- chrono_correlator-0.1.0/chrono_correlator/__init__.py +17 -0
- chrono_correlator-0.1.0/chrono_correlator/correlator.py +53 -0
- chrono_correlator-0.1.0/chrono_correlator/engine.py +36 -0
- chrono_correlator-0.1.0/chrono_correlator/models.py +39 -0
- chrono_correlator-0.1.0/chrono_correlator/narrator.py +57 -0
- chrono_correlator-0.1.0/chrono_correlator/tests/__init__.py +0 -0
- chrono_correlator-0.1.0/chrono_correlator/tests/test_correlator.py +54 -0
- chrono_correlator-0.1.0/chrono_correlator/tests/test_engine.py +61 -0
- chrono_correlator-0.1.0/chrono_correlator.egg-info/PKG-INFO +86 -0
- chrono_correlator-0.1.0/chrono_correlator.egg-info/SOURCES.txt +15 -0
- chrono_correlator-0.1.0/chrono_correlator.egg-info/dependency_links.txt +1 -0
- chrono_correlator-0.1.0/chrono_correlator.egg-info/requires.txt +8 -0
- chrono_correlator-0.1.0/chrono_correlator.egg-info/top_level.txt +1 -0
- chrono_correlator-0.1.0/pyproject.toml +42 -0
- chrono_correlator-0.1.0/setup.cfg +4 -0
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: chrono-correlator
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Statistical correlation between time-series and discrete events with optional LLM narration
|
|
5
|
+
Author-email: Raúl Gallardo <g3ov3r@gmail.com>
|
|
6
|
+
License-Expression: GPL-3.0-only
|
|
7
|
+
Project-URL: Homepage, https://github.com/Raulcadiz/chrono-correlator
|
|
8
|
+
Project-URL: Repository, https://github.com/Raulcadiz/chrono-correlator
|
|
9
|
+
Project-URL: Bug Tracker, https://github.com/Raulcadiz/chrono-correlator/issues
|
|
10
|
+
Keywords: statistics,time-series,correlation,mann-whitney,llm,health,monitoring
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
19
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
20
|
+
Requires-Python: >=3.10
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
Requires-Dist: scipy>=1.11
|
|
23
|
+
Requires-Dist: numpy>=1.24
|
|
24
|
+
Requires-Dist: groq>=0.4.0
|
|
25
|
+
Requires-Dist: anthropic>=0.20.0
|
|
26
|
+
Provides-Extra: dev
|
|
27
|
+
Requires-Dist: pytest; extra == "dev"
|
|
28
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
29
|
+
|
|
30
|
+
# chrono-correlator
|
|
31
|
+
|
|
32
|
+
A generic statistical engine that correlates time-series data with discrete events using Mann-Whitney U, and narrates results with an LLM only when p < 0.05.
|
|
33
|
+
|
|
34
|
+
## Install
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
pip install chrono-correlator
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Quick start
|
|
41
|
+
|
|
42
|
+
```python
|
|
43
|
+
from datetime import datetime, timedelta
|
|
44
|
+
from chrono_correlator import Event, Metric, evaluate, narrate
|
|
45
|
+
|
|
46
|
+
base = datetime(2024, 1, 1)
|
|
47
|
+
|
|
48
|
+
events = [
|
|
49
|
+
Event(timestamp=base + timedelta(days=d), label="migraine")
|
|
50
|
+
for d in [10, 20, 30]
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
timestamps = [base + timedelta(hours=h) for h in range(800)]
|
|
54
|
+
values = [55.0] * 800
|
|
55
|
+
for day in [10, 20, 30]:
|
|
56
|
+
for h in range(48):
|
|
57
|
+
idx = day * 24 - 48 + h
|
|
58
|
+
if 0 <= idx < 800:
|
|
59
|
+
values[idx] = 28.0
|
|
60
|
+
|
|
61
|
+
hrv = Metric(name="hrv", timestamps=timestamps, values=values)
|
|
62
|
+
|
|
63
|
+
report = evaluate(events, [hrv])
|
|
64
|
+
print(f"Level: {report.level} — {report.active_signals}/{report.total_signals} signals")
|
|
65
|
+
|
|
66
|
+
if report.level != "green":
|
|
67
|
+
report = narrate(report, provider="groq")
|
|
68
|
+
print(report.narrative)
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## How it works
|
|
72
|
+
|
|
73
|
+
- **Statistical core:** For each metric, values in the 48 h before each event are compared against a 28-day baseline using Mann-Whitney U. Effect size is computed as rank-biserial correlation.
|
|
74
|
+
- **Alert level:** Active signals (p < 0.05) are counted across all metrics. 1–2 → green, 3–4 → yellow, 5–7 → red.
|
|
75
|
+
- **LLM narration:** Only triggered on yellow or red. The model receives pre-calculated statistics and is constrained to one factual sentence per signal — no diagnosis, no causal inference.
|
|
76
|
+
|
|
77
|
+
## Use cases
|
|
78
|
+
|
|
79
|
+
- **Health monitoring** — correlate HRV, deep sleep, or skin temperature drops with migraine or crisis events.
|
|
80
|
+
- **Infrastructure** — detect latency or error-rate anomalies preceding service outages.
|
|
81
|
+
- **IPTV / streaming** — link buffering load spikes to subscriber disconnection events.
|
|
82
|
+
- **Energy consumption** — associate power demand patterns with grid stress or equipment failures.
|
|
83
|
+
|
|
84
|
+
## License
|
|
85
|
+
|
|
86
|
+
GPL-3.0 — Raúl Gallardo (g3v3r)
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# chrono-correlator
|
|
2
|
+
|
|
3
|
+
A generic statistical engine that correlates time-series data with discrete events using Mann-Whitney U, and narrates results with an LLM only when p < 0.05.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install chrono-correlator
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Quick start
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
from datetime import datetime, timedelta
|
|
15
|
+
from chrono_correlator import Event, Metric, evaluate, narrate
|
|
16
|
+
|
|
17
|
+
base = datetime(2024, 1, 1)
|
|
18
|
+
|
|
19
|
+
events = [
|
|
20
|
+
Event(timestamp=base + timedelta(days=d), label="migraine")
|
|
21
|
+
for d in [10, 20, 30]
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
timestamps = [base + timedelta(hours=h) for h in range(800)]
|
|
25
|
+
values = [55.0] * 800
|
|
26
|
+
for day in [10, 20, 30]:
|
|
27
|
+
for h in range(48):
|
|
28
|
+
idx = day * 24 - 48 + h
|
|
29
|
+
if 0 <= idx < 800:
|
|
30
|
+
values[idx] = 28.0
|
|
31
|
+
|
|
32
|
+
hrv = Metric(name="hrv", timestamps=timestamps, values=values)
|
|
33
|
+
|
|
34
|
+
report = evaluate(events, [hrv])
|
|
35
|
+
print(f"Level: {report.level} — {report.active_signals}/{report.total_signals} signals")
|
|
36
|
+
|
|
37
|
+
if report.level != "green":
|
|
38
|
+
report = narrate(report, provider="groq")
|
|
39
|
+
print(report.narrative)
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## How it works
|
|
43
|
+
|
|
44
|
+
- **Statistical core:** For each metric, values in the 48 h before each event are compared against a 28-day baseline using Mann-Whitney U. Effect size is computed as rank-biserial correlation.
|
|
45
|
+
- **Alert level:** Active signals (p < 0.05) are counted across all metrics. 1–2 → green, 3–4 → yellow, 5–7 → red.
|
|
46
|
+
- **LLM narration:** Only triggered on yellow or red. The model receives pre-calculated statistics and is constrained to one factual sentence per signal — no diagnosis, no causal inference.
|
|
47
|
+
|
|
48
|
+
## Use cases
|
|
49
|
+
|
|
50
|
+
- **Health monitoring** — correlate HRV, deep sleep, or skin temperature drops with migraine or crisis events.
|
|
51
|
+
- **Infrastructure** — detect latency or error-rate anomalies preceding service outages.
|
|
52
|
+
- **IPTV / streaming** — link buffering load spikes to subscriber disconnection events.
|
|
53
|
+
- **Energy consumption** — associate power demand patterns with grid stress or equipment failures.
|
|
54
|
+
|
|
55
|
+
## License
|
|
56
|
+
|
|
57
|
+
GPL-3.0 — Raúl Gallardo (g3v3r)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from .correlator import analyze
|
|
2
|
+
from .engine import evaluate
|
|
3
|
+
from .models import AlertReport, CorrelationResult, Event, Metric
|
|
4
|
+
from .narrator import narrate
|
|
5
|
+
|
|
6
|
+
__version__ = "0.1.0"
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"analyze",
|
|
10
|
+
"evaluate",
|
|
11
|
+
"narrate",
|
|
12
|
+
"Event",
|
|
13
|
+
"Metric",
|
|
14
|
+
"CorrelationResult",
|
|
15
|
+
"AlertReport",
|
|
16
|
+
"__version__",
|
|
17
|
+
]
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from datetime import timedelta
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
from scipy import stats
|
|
5
|
+
|
|
6
|
+
from .models import CorrelationResult, Event, Metric
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def analyze(
|
|
10
|
+
events: list[Event],
|
|
11
|
+
metric: Metric,
|
|
12
|
+
lookback_hours: int = 48,
|
|
13
|
+
baseline_days: int = 28,
|
|
14
|
+
) -> CorrelationResult:
|
|
15
|
+
ts = np.array([t.timestamp() for t in metric.timestamps])
|
|
16
|
+
vals = np.array(metric.values)
|
|
17
|
+
|
|
18
|
+
pre_event_values: list[float] = []
|
|
19
|
+
baseline_values: list[float] = []
|
|
20
|
+
|
|
21
|
+
for event in events:
|
|
22
|
+
t_event = event.timestamp.timestamp()
|
|
23
|
+
t_pre_start = t_event - lookback_hours * 3600
|
|
24
|
+
t_baseline_start = t_event - baseline_days * 86400
|
|
25
|
+
|
|
26
|
+
mask_pre = (ts >= t_pre_start) & (ts < t_event)
|
|
27
|
+
mask_baseline = (ts >= t_baseline_start) & (ts < t_pre_start)
|
|
28
|
+
|
|
29
|
+
pre_event_values.extend(vals[mask_pre].tolist())
|
|
30
|
+
baseline_values.extend(vals[mask_baseline].tolist())
|
|
31
|
+
|
|
32
|
+
n1, n2 = len(pre_event_values), len(baseline_values)
|
|
33
|
+
|
|
34
|
+
if n1 < 3 or n2 < 3:
|
|
35
|
+
raise ValueError(
|
|
36
|
+
f"Datos insuficientes para '{metric.name}'. "
|
|
37
|
+
f"Pre-evento: {n1}, Baseline: {n2}"
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
stat, p_value = stats.mannwhitneyu(
|
|
41
|
+
pre_event_values, baseline_values, alternative="two-sided"
|
|
42
|
+
)
|
|
43
|
+
effect_size = 1 - (2 * stat) / (n1 * n2)
|
|
44
|
+
|
|
45
|
+
return CorrelationResult(
|
|
46
|
+
metric_name=metric.name,
|
|
47
|
+
p_value=float(p_value),
|
|
48
|
+
significant=bool(p_value < 0.05),
|
|
49
|
+
effect_size=float(effect_size),
|
|
50
|
+
baseline_median=float(np.median(baseline_values)),
|
|
51
|
+
pre_event_median=float(np.median(pre_event_values)),
|
|
52
|
+
narrative=None,
|
|
53
|
+
)
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from .correlator import analyze
|
|
2
|
+
from .models import AlertReport, CorrelationResult, Event, Metric
|
|
3
|
+
|
|
4
|
+
THRESHOLDS = {"green": (0, 2), "yellow": (3, 4), "red": (5, 7)}
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def evaluate(
|
|
8
|
+
events: list[Event],
|
|
9
|
+
metrics: list[Metric],
|
|
10
|
+
lookback_hours: int = 48,
|
|
11
|
+
baseline_days: int = 28,
|
|
12
|
+
) -> AlertReport:
|
|
13
|
+
results: list[CorrelationResult] = []
|
|
14
|
+
|
|
15
|
+
for metric in metrics:
|
|
16
|
+
try:
|
|
17
|
+
results.append(analyze(events, metric, lookback_hours, baseline_days))
|
|
18
|
+
except ValueError:
|
|
19
|
+
continue
|
|
20
|
+
|
|
21
|
+
active = sum(1 for r in results if r.significant)
|
|
22
|
+
|
|
23
|
+
if active <= 2:
|
|
24
|
+
level = "green"
|
|
25
|
+
elif active <= 4:
|
|
26
|
+
level = "yellow"
|
|
27
|
+
else:
|
|
28
|
+
level = "red"
|
|
29
|
+
|
|
30
|
+
return AlertReport(
|
|
31
|
+
level=level,
|
|
32
|
+
active_signals=active,
|
|
33
|
+
total_signals=len(results),
|
|
34
|
+
results=results,
|
|
35
|
+
narrative=None,
|
|
36
|
+
)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class Event:
|
|
10
|
+
timestamp: datetime
|
|
11
|
+
label: str
|
|
12
|
+
notes: Optional[str] = None
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class Metric:
|
|
17
|
+
name: str
|
|
18
|
+
timestamps: list[datetime]
|
|
19
|
+
values: list[float]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class CorrelationResult:
|
|
24
|
+
metric_name: str
|
|
25
|
+
p_value: float
|
|
26
|
+
significant: bool
|
|
27
|
+
effect_size: float
|
|
28
|
+
baseline_median: float
|
|
29
|
+
pre_event_median: float
|
|
30
|
+
narrative: Optional[str] = None
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class AlertReport:
|
|
35
|
+
level: str
|
|
36
|
+
active_signals: int
|
|
37
|
+
total_signals: int
|
|
38
|
+
results: list[CorrelationResult]
|
|
39
|
+
narrative: Optional[str] = None
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
from .models import AlertReport, CorrelationResult
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def _build_prompt(r: CorrelationResult) -> str:
|
|
7
|
+
return (
|
|
8
|
+
"Datos estadísticos CALCULADOS (no los inventes ni los modifiques):\n"
|
|
9
|
+
f"Variable: {r.metric_name}\n"
|
|
10
|
+
f"Mediana baseline: {r.baseline_median}\n"
|
|
11
|
+
f"Mediana pre-evento: {r.pre_event_median}\n"
|
|
12
|
+
f"p-valor Mann-Whitney U: {r.p_value}\n"
|
|
13
|
+
f"Tamaño del efecto (rank-biserial): {r.effect_size}\n"
|
|
14
|
+
"Escribe UNA frase en español. Máximo 20 palabras.\n"
|
|
15
|
+
"Usa: patrón detectado, asociación observada.\n"
|
|
16
|
+
"Prohibido: diagnóstico, causa, predice, confirma."
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _call_groq(prompt: str) -> str:
|
|
21
|
+
from groq import Groq
|
|
22
|
+
|
|
23
|
+
client = Groq(api_key=os.environ["GROQ_API_KEY"])
|
|
24
|
+
response = client.chat.completions.create(
|
|
25
|
+
model="llama3-8b-8192",
|
|
26
|
+
messages=[{"role": "user", "content": prompt}],
|
|
27
|
+
max_tokens=80,
|
|
28
|
+
)
|
|
29
|
+
return response.choices[0].message.content.strip()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _call_anthropic(prompt: str) -> str:
|
|
33
|
+
import anthropic
|
|
34
|
+
|
|
35
|
+
client = anthropic.Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
|
|
36
|
+
response = client.messages.create(
|
|
37
|
+
model="claude-haiku-4-5-20251001",
|
|
38
|
+
max_tokens=80,
|
|
39
|
+
messages=[{"role": "user", "content": prompt}],
|
|
40
|
+
)
|
|
41
|
+
return response.content[0].text.strip()
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def narrate(report: AlertReport, provider: str = "groq") -> AlertReport:
|
|
45
|
+
if report.level == "green":
|
|
46
|
+
return report
|
|
47
|
+
|
|
48
|
+
caller = _call_groq if provider == "groq" else _call_anthropic
|
|
49
|
+
|
|
50
|
+
individual_narratives: list[str] = []
|
|
51
|
+
for result in report.results:
|
|
52
|
+
if result.significant:
|
|
53
|
+
result.narrative = caller(_build_prompt(result))
|
|
54
|
+
individual_narratives.append(f"[{result.metric_name}] {result.narrative}")
|
|
55
|
+
|
|
56
|
+
report.narrative = "\n".join(individual_narratives) if individual_narratives else None
|
|
57
|
+
return report
|
|
File without changes
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from datetime import datetime, timedelta
|
|
3
|
+
|
|
4
|
+
from chrono_correlator.models import Event, Metric
|
|
5
|
+
from chrono_correlator.correlator import analyze
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
BASE = datetime(2024, 1, 1)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _make_flat_metric(name: str, hours: int, value: float = 55.0) -> Metric:
|
|
12
|
+
timestamps = [BASE + timedelta(hours=h) for h in range(hours)]
|
|
13
|
+
values = [value] * hours
|
|
14
|
+
return Metric(name=name, timestamps=timestamps, values=values)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def test_detects_real_pattern():
|
|
18
|
+
hours = 700
|
|
19
|
+
timestamps = [BASE + timedelta(hours=h) for h in range(hours)]
|
|
20
|
+
values = [55.0] * hours
|
|
21
|
+
|
|
22
|
+
event_days = [10, 20, 30]
|
|
23
|
+
for day in event_days:
|
|
24
|
+
for h in range(48):
|
|
25
|
+
idx = day * 24 - 48 + h
|
|
26
|
+
if 0 <= idx < hours:
|
|
27
|
+
values[idx] = 30.0
|
|
28
|
+
|
|
29
|
+
metric = Metric(name="hrv", timestamps=timestamps, values=values)
|
|
30
|
+
events = [Event(timestamp=BASE + timedelta(days=d), label="test") for d in event_days]
|
|
31
|
+
|
|
32
|
+
result = analyze(events, metric)
|
|
33
|
+
|
|
34
|
+
assert result.significant is True
|
|
35
|
+
assert result.pre_event_median < result.baseline_median
|
|
36
|
+
assert result.narrative is None
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def test_no_false_positive():
|
|
40
|
+
metric = _make_flat_metric("hrv", hours=700, value=55.0)
|
|
41
|
+
events = [Event(timestamp=BASE + timedelta(days=d), label="test") for d in [10, 20, 30]]
|
|
42
|
+
|
|
43
|
+
result = analyze(events, metric)
|
|
44
|
+
|
|
45
|
+
assert result.significant is False
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def test_insufficient_data():
|
|
49
|
+
timestamps = [BASE + timedelta(hours=h) for h in range(2)]
|
|
50
|
+
metric = Metric(name="hrv", timestamps=timestamps, values=[55.0, 55.0])
|
|
51
|
+
events = [Event(timestamp=BASE + timedelta(days=1), label="test")]
|
|
52
|
+
|
|
53
|
+
with pytest.raises(ValueError, match="Datos insuficientes"):
|
|
54
|
+
analyze(events, metric)
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
from datetime import datetime, timedelta
|
|
2
|
+
|
|
3
|
+
from chrono_correlator.models import Event, Metric
|
|
4
|
+
from chrono_correlator.engine import evaluate
|
|
5
|
+
|
|
6
|
+
BASE = datetime(2024, 1, 1)
|
|
7
|
+
EVENT_DAYS = [10, 20, 30]
|
|
8
|
+
EVENTS = [Event(timestamp=BASE + timedelta(days=d), label="test") for d in EVENT_DAYS]
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _flat_metric(name: str, hours: int = 700, value: float = 55.0) -> Metric:
|
|
12
|
+
timestamps = [BASE + timedelta(hours=h) for h in range(hours)]
|
|
13
|
+
return Metric(name=name, timestamps=timestamps, values=[value] * hours)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _pattern_metric(name: str, hours: int = 700) -> Metric:
|
|
17
|
+
timestamps = [BASE + timedelta(hours=h) for h in range(hours)]
|
|
18
|
+
values = [55.0] * hours
|
|
19
|
+
for day in EVENT_DAYS:
|
|
20
|
+
for h in range(48):
|
|
21
|
+
idx = day * 24 - 48 + h
|
|
22
|
+
if 0 <= idx < hours:
|
|
23
|
+
values[idx] = 30.0
|
|
24
|
+
return Metric(name=name, timestamps=timestamps, values=values)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_green_level():
|
|
28
|
+
metrics = [_flat_metric(f"m{i}") for i in range(7)]
|
|
29
|
+
report = evaluate(EVENTS, metrics)
|
|
30
|
+
assert report.level == "green"
|
|
31
|
+
assert report.narrative is None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def test_yellow_level():
|
|
35
|
+
metrics = [_pattern_metric(f"m{i}") for i in range(3)] + [
|
|
36
|
+
_flat_metric(f"flat{i}") for i in range(4)
|
|
37
|
+
]
|
|
38
|
+
report = evaluate(EVENTS, metrics)
|
|
39
|
+
assert report.level == "yellow"
|
|
40
|
+
assert report.active_signals == 3
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_red_level():
|
|
44
|
+
metrics = [_pattern_metric(f"m{i}") for i in range(5)] + [
|
|
45
|
+
_flat_metric(f"flat{i}") for i in range(2)
|
|
46
|
+
]
|
|
47
|
+
report = evaluate(EVENTS, metrics)
|
|
48
|
+
assert report.level == "red"
|
|
49
|
+
assert report.active_signals >= 5
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def test_skips_insufficient_metrics():
|
|
53
|
+
tiny = Metric(
|
|
54
|
+
name="tiny",
|
|
55
|
+
timestamps=[BASE + timedelta(hours=h) for h in range(2)],
|
|
56
|
+
values=[55.0, 55.0],
|
|
57
|
+
)
|
|
58
|
+
metrics = [tiny, _flat_metric("good")]
|
|
59
|
+
report = evaluate(EVENTS, metrics)
|
|
60
|
+
assert report.total_signals == 1
|
|
61
|
+
assert report.level == "green"
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: chrono-correlator
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Statistical correlation between time-series and discrete events with optional LLM narration
|
|
5
|
+
Author-email: Raúl Gallardo <g3ov3r@gmail.com>
|
|
6
|
+
License-Expression: GPL-3.0-only
|
|
7
|
+
Project-URL: Homepage, https://github.com/Raulcadiz/chrono-correlator
|
|
8
|
+
Project-URL: Repository, https://github.com/Raulcadiz/chrono-correlator
|
|
9
|
+
Project-URL: Bug Tracker, https://github.com/Raulcadiz/chrono-correlator/issues
|
|
10
|
+
Keywords: statistics,time-series,correlation,mann-whitney,llm,health,monitoring
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
19
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
20
|
+
Requires-Python: >=3.10
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
Requires-Dist: scipy>=1.11
|
|
23
|
+
Requires-Dist: numpy>=1.24
|
|
24
|
+
Requires-Dist: groq>=0.4.0
|
|
25
|
+
Requires-Dist: anthropic>=0.20.0
|
|
26
|
+
Provides-Extra: dev
|
|
27
|
+
Requires-Dist: pytest; extra == "dev"
|
|
28
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
29
|
+
|
|
30
|
+
# chrono-correlator
|
|
31
|
+
|
|
32
|
+
A generic statistical engine that correlates time-series data with discrete events using Mann-Whitney U, and narrates results with an LLM only when p < 0.05.
|
|
33
|
+
|
|
34
|
+
## Install
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
pip install chrono-correlator
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Quick start
|
|
41
|
+
|
|
42
|
+
```python
|
|
43
|
+
from datetime import datetime, timedelta
|
|
44
|
+
from chrono_correlator import Event, Metric, evaluate, narrate
|
|
45
|
+
|
|
46
|
+
base = datetime(2024, 1, 1)
|
|
47
|
+
|
|
48
|
+
events = [
|
|
49
|
+
Event(timestamp=base + timedelta(days=d), label="migraine")
|
|
50
|
+
for d in [10, 20, 30]
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
timestamps = [base + timedelta(hours=h) for h in range(800)]
|
|
54
|
+
values = [55.0] * 800
|
|
55
|
+
for day in [10, 20, 30]:
|
|
56
|
+
for h in range(48):
|
|
57
|
+
idx = day * 24 - 48 + h
|
|
58
|
+
if 0 <= idx < 800:
|
|
59
|
+
values[idx] = 28.0
|
|
60
|
+
|
|
61
|
+
hrv = Metric(name="hrv", timestamps=timestamps, values=values)
|
|
62
|
+
|
|
63
|
+
report = evaluate(events, [hrv])
|
|
64
|
+
print(f"Level: {report.level} — {report.active_signals}/{report.total_signals} signals")
|
|
65
|
+
|
|
66
|
+
if report.level != "green":
|
|
67
|
+
report = narrate(report, provider="groq")
|
|
68
|
+
print(report.narrative)
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## How it works
|
|
72
|
+
|
|
73
|
+
- **Statistical core:** For each metric, values in the 48 h before each event are compared against a 28-day baseline using Mann-Whitney U. Effect size is computed as rank-biserial correlation.
|
|
74
|
+
- **Alert level:** Active signals (p < 0.05) are counted across all metrics. 1–2 → green, 3–4 → yellow, 5–7 → red.
|
|
75
|
+
- **LLM narration:** Only triggered on yellow or red. The model receives pre-calculated statistics and is constrained to one factual sentence per signal — no diagnosis, no causal inference.
|
|
76
|
+
|
|
77
|
+
## Use cases
|
|
78
|
+
|
|
79
|
+
- **Health monitoring** — correlate HRV, deep sleep, or skin temperature drops with migraine or crisis events.
|
|
80
|
+
- **Infrastructure** — detect latency or error-rate anomalies preceding service outages.
|
|
81
|
+
- **IPTV / streaming** — link buffering load spikes to subscriber disconnection events.
|
|
82
|
+
- **Energy consumption** — associate power demand patterns with grid stress or equipment failures.
|
|
83
|
+
|
|
84
|
+
## License
|
|
85
|
+
|
|
86
|
+
GPL-3.0 — Raúl Gallardo (g3v3r)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
chrono_correlator/__init__.py
|
|
4
|
+
chrono_correlator/correlator.py
|
|
5
|
+
chrono_correlator/engine.py
|
|
6
|
+
chrono_correlator/models.py
|
|
7
|
+
chrono_correlator/narrator.py
|
|
8
|
+
chrono_correlator.egg-info/PKG-INFO
|
|
9
|
+
chrono_correlator.egg-info/SOURCES.txt
|
|
10
|
+
chrono_correlator.egg-info/dependency_links.txt
|
|
11
|
+
chrono_correlator.egg-info/requires.txt
|
|
12
|
+
chrono_correlator.egg-info/top_level.txt
|
|
13
|
+
chrono_correlator/tests/__init__.py
|
|
14
|
+
chrono_correlator/tests/test_correlator.py
|
|
15
|
+
chrono_correlator/tests/test_engine.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
chrono_correlator
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "chrono-correlator"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Statistical correlation between time-series and discrete events with optional LLM narration"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "GPL-3.0-only"
|
|
11
|
+
authors = [{ name = "Raúl Gallardo", email = "g3ov3r@gmail.com" }]
|
|
12
|
+
keywords = ["statistics", "time-series", "correlation", "mann-whitney", "llm", "health", "monitoring"]
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Development Status :: 3 - Alpha",
|
|
15
|
+
"Intended Audience :: Developers",
|
|
16
|
+
"Intended Audience :: Science/Research",
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"Programming Language :: Python :: 3.10",
|
|
19
|
+
"Programming Language :: Python :: 3.11",
|
|
20
|
+
"Programming Language :: Python :: 3.12",
|
|
21
|
+
"Topic :: Scientific/Engineering :: Information Analysis",
|
|
22
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
23
|
+
]
|
|
24
|
+
requires-python = ">=3.10"
|
|
25
|
+
dependencies = [
|
|
26
|
+
"scipy>=1.11",
|
|
27
|
+
"numpy>=1.24",
|
|
28
|
+
"groq>=0.4.0",
|
|
29
|
+
"anthropic>=0.20.0",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
[project.optional-dependencies]
|
|
33
|
+
dev = ["pytest", "pytest-cov"]
|
|
34
|
+
|
|
35
|
+
[project.urls]
|
|
36
|
+
Homepage = "https://github.com/Raulcadiz/chrono-correlator"
|
|
37
|
+
Repository = "https://github.com/Raulcadiz/chrono-correlator"
|
|
38
|
+
"Bug Tracker" = "https://github.com/Raulcadiz/chrono-correlator/issues"
|
|
39
|
+
|
|
40
|
+
[tool.setuptools.packages.find]
|
|
41
|
+
where = ["."]
|
|
42
|
+
include = ["chrono_correlator*"]
|