depthfusion 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- depthfusion/__init__.py +0 -0
- depthfusion/analytics/__init__.py +15 -0
- depthfusion/analytics/aggregation.py +336 -0
- depthfusion/analytics/budget.py +358 -0
- depthfusion/analytics/collector.py +160 -0
- depthfusion/analytics/model_stats.py +352 -0
- depthfusion/analytics/recommender.py +294 -0
- depthfusion/analytics/router.py +342 -0
- depthfusion/analytics/store.py +73 -0
- depthfusion/analyzer/__init__.py +0 -0
- depthfusion/analyzer/compatibility.py +348 -0
- depthfusion/analyzer/installer.py +179 -0
- depthfusion/analyzer/recommender.py +35 -0
- depthfusion/analyzer/scanner.py +127 -0
- depthfusion/api/__init__.py +0 -0
- depthfusion/api/admin_console.py +795 -0
- depthfusion/api/auth.py +164 -0
- depthfusion/api/events.py +313 -0
- depthfusion/api/query.py +417 -0
- depthfusion/api/rest.py +1028 -0
- depthfusion/api/role_admin.py +254 -0
- depthfusion/audit/__init__.py +16 -0
- depthfusion/audit/log.py +353 -0
- depthfusion/authz/__init__.py +52 -0
- depthfusion/authz/capability_check.py +191 -0
- depthfusion/authz/classification.py +201 -0
- depthfusion/authz/export_audit.py +525 -0
- depthfusion/authz/export_controls.py +217 -0
- depthfusion/authz/frontmatter.py +177 -0
- depthfusion/authz/label_mapping.py +263 -0
- depthfusion/authz/policy_engine.py +605 -0
- depthfusion/authz/policy_snapshot.py +340 -0
- depthfusion/authz/roles.py +327 -0
- depthfusion/backends/__init__.py +29 -0
- depthfusion/backends/base.py +139 -0
- depthfusion/backends/chain.py +266 -0
- depthfusion/backends/factory.py +267 -0
- depthfusion/backends/gemma.py +302 -0
- depthfusion/backends/haiku.py +297 -0
- depthfusion/backends/local_embedding.py +202 -0
- depthfusion/backends/null.py +65 -0
- depthfusion/backends/openrouter.py +132 -0
- depthfusion/cache/__init__.py +110 -0
- depthfusion/cache/activity_signals.py +322 -0
- depthfusion/cache/admission.py +278 -0
- depthfusion/cache/hit_rate.py +288 -0
- depthfusion/cache/lease_lifecycle.py +888 -0
- depthfusion/cache/manager.py +371 -0
- depthfusion/cache/models.py +102 -0
- depthfusion/cache/prefetch_scheduler.py +254 -0
- depthfusion/capture/__init__.py +0 -0
- depthfusion/capture/_metrics.py +66 -0
- depthfusion/capture/auto_learn.py +515 -0
- depthfusion/capture/compressor.py +141 -0
- depthfusion/capture/decay.py +266 -0
- depthfusion/capture/decision_extractor.py +394 -0
- depthfusion/capture/dedup.py +404 -0
- depthfusion/capture/event_hook.py +62 -0
- depthfusion/capture/negative_extractor.py +369 -0
- depthfusion/capture/pruner.py +344 -0
- depthfusion/cli/__init__.py +1 -0
- depthfusion/cli/devices.py +143 -0
- depthfusion/cli/migrate.py +438 -0
- depthfusion/cli/roles.py +303 -0
- depthfusion/cognitive/__init__.py +0 -0
- depthfusion/cognitive/consolidator.py +60 -0
- depthfusion/cognitive/contradiction.py +95 -0
- depthfusion/cognitive/scorer.py +56 -0
- depthfusion/connectors/__init__.py +19 -0
- depthfusion/connectors/sharepoint.py +756 -0
- depthfusion/connectors/sharepoint_scheduler.py +149 -0
- depthfusion/connectors/sharepoint_scope.py +186 -0
- depthfusion/connectors/sharepoint_state.py +128 -0
- depthfusion/core/__init__.py +0 -0
- depthfusion/core/config.py +256 -0
- depthfusion/core/event_store.py +658 -0
- depthfusion/core/feedback.py +317 -0
- depthfusion/core/file_locking.py +266 -0
- depthfusion/core/hit_tracker.py +123 -0
- depthfusion/core/memory.py +59 -0
- depthfusion/core/memory_object.py +187 -0
- depthfusion/core/project_context.py +122 -0
- depthfusion/core/project_ingest.py +204 -0
- depthfusion/core/project_registry.py +59 -0
- depthfusion/core/research.py +181 -0
- depthfusion/core/scoring.py +82 -0
- depthfusion/core/types.py +216 -0
- depthfusion/fusion/__init__.py +0 -0
- depthfusion/fusion/block_retrieval.py +192 -0
- depthfusion/fusion/chunk_state_compression.py +190 -0
- depthfusion/fusion/gates.py +460 -0
- depthfusion/fusion/materialisation_policy.py +210 -0
- depthfusion/fusion/reranker.py +68 -0
- depthfusion/fusion/rrf.py +47 -0
- depthfusion/fusion/selective_fusion_weighter.py +316 -0
- depthfusion/fusion/weighted.py +118 -0
- depthfusion/graph/__init__.py +1 -0
- depthfusion/graph/builder.py +102 -0
- depthfusion/graph/dedup.py +165 -0
- depthfusion/graph/extractor.py +234 -0
- depthfusion/graph/linker.py +339 -0
- depthfusion/graph/scope.py +43 -0
- depthfusion/graph/store.py +610 -0
- depthfusion/graph/traverser.py +196 -0
- depthfusion/graph/types.py +91 -0
- depthfusion/hooks/__init__.py +0 -0
- depthfusion/hooks/git_post_commit.py +249 -0
- depthfusion/hooks/post_tool_use.py +296 -0
- depthfusion/hooks/session_start.py +162 -0
- depthfusion/identity/__init__.py +59 -0
- depthfusion/identity/device_keychain.py +451 -0
- depthfusion/identity/device_lease.py +222 -0
- depthfusion/identity/device_registry.py +239 -0
- depthfusion/identity/errors.py +55 -0
- depthfusion/identity/fastapi_deps.py +117 -0
- depthfusion/identity/jwks_cache.py +159 -0
- depthfusion/identity/legacy_shim.py +204 -0
- depthfusion/identity/models.py +81 -0
- depthfusion/identity/oidc_client.py +483 -0
- depthfusion/identity/principal_store.py +156 -0
- depthfusion/identity/service_account.py +204 -0
- depthfusion/identity/token_validator.py +232 -0
- depthfusion/ingest/__init__.py +33 -0
- depthfusion/ingest/chunking.py +149 -0
- depthfusion/ingest/models.py +46 -0
- depthfusion/ingest/parser.py +294 -0
- depthfusion/ingest/pipeline.py +256 -0
- depthfusion/install/__init__.py +0 -0
- depthfusion/install/dep_checker.py +111 -0
- depthfusion/install/gpu_probe.py +238 -0
- depthfusion/install/install.py +1153 -0
- depthfusion/install/migrate.py +68 -0
- depthfusion/install/smoke.py +248 -0
- depthfusion/install/ui_server.py +399 -0
- depthfusion/mcp/__init__.py +0 -0
- depthfusion/mcp/authz.py +195 -0
- depthfusion/mcp/cognitive_tools.py +79 -0
- depthfusion/mcp/http_server.py +257 -0
- depthfusion/mcp/server.py +418 -0
- depthfusion/mcp/skillforge_client.py +86 -0
- depthfusion/mcp/tools/__init__.py +23 -0
- depthfusion/mcp/tools/_registry.py +603 -0
- depthfusion/mcp/tools/_shared.py +718 -0
- depthfusion/mcp/tools/_state.py +139 -0
- depthfusion/mcp/tools/analytics_tools.py +9 -0
- depthfusion/mcp/tools/bridge.py +90 -0
- depthfusion/mcp/tools/capture.py +364 -0
- depthfusion/mcp/tools/decisions.py +206 -0
- depthfusion/mcp/tools/graph.py +509 -0
- depthfusion/mcp/tools/model_stats_tool.py +24 -0
- depthfusion/mcp/tools/project.py +250 -0
- depthfusion/mcp/tools/recall.py +219 -0
- depthfusion/mcp/tools/recommender_tools.py +60 -0
- depthfusion/mcp/tools/system.py +85 -0
- depthfusion/mcp/tools/telemetry.py +341 -0
- depthfusion/mcp/tools/telemetry_tools.py +43 -0
- depthfusion/metrics/__init__.py +0 -0
- depthfusion/metrics/aggregator.py +341 -0
- depthfusion/metrics/collector.py +522 -0
- depthfusion/migrations/__init__.py +11 -0
- depthfusion/parsers/__init__.py +69 -0
- depthfusion/parsers/base.py +49 -0
- depthfusion/parsers/chatgpt.py +107 -0
- depthfusion/parsers/deepseek.py +103 -0
- depthfusion/parsers/documents/__init__.py +107 -0
- depthfusion/parsers/documents/base.py +354 -0
- depthfusion/parsers/documents/docx.py +122 -0
- depthfusion/parsers/documents/generic.py +173 -0
- depthfusion/parsers/documents/ocr.py +191 -0
- depthfusion/parsers/documents/pdf.py +113 -0
- depthfusion/parsers/documents/pptx.py +103 -0
- depthfusion/parsers/documents/xlsx.py +150 -0
- depthfusion/parsers/gemini.py +82 -0
- depthfusion/parsers/generic.py +138 -0
- depthfusion/recursive/__init__.py +0 -0
- depthfusion/recursive/client.py +346 -0
- depthfusion/recursive/sandbox.py +78 -0
- depthfusion/recursive/sidecar.py +79 -0
- depthfusion/recursive/strategies.py +45 -0
- depthfusion/recursive/trajectory.py +40 -0
- depthfusion/retrieval/__init__.py +17 -0
- depthfusion/retrieval/acl_verifier.py +204 -0
- depthfusion/retrieval/bm25.py +130 -0
- depthfusion/retrieval/hnsw_store.py +509 -0
- depthfusion/retrieval/hybrid.py +942 -0
- depthfusion/retrieval/reranker.py +99 -0
- depthfusion/router/__init__.py +0 -0
- depthfusion/router/bus.py +302 -0
- depthfusion/router/cost_estimator.py +83 -0
- depthfusion/router/dispatcher.py +49 -0
- depthfusion/router/publisher.py +35 -0
- depthfusion/router/subscriber.py +17 -0
- depthfusion/session/__init__.py +0 -0
- depthfusion/session/compactor.py +91 -0
- depthfusion/session/loader.py +84 -0
- depthfusion/session/scorer.py +69 -0
- depthfusion/session/tagger.py +169 -0
- depthfusion/storage/__init__.py +3 -0
- depthfusion/storage/event_log.py +92 -0
- depthfusion/storage/file_index.py +318 -0
- depthfusion/storage/memory_store.py +306 -0
- depthfusion/storage/telemetry_store.py +336 -0
- depthfusion/storage/tier_manager.py +66 -0
- depthfusion/storage/vector_store.py +238 -0
- depthfusion/sync/__init__.py +4 -0
- depthfusion/sync/engine.py +497 -0
- depthfusion/sync/router.py +177 -0
- depthfusion/telemetry/__init__.py +2 -0
- depthfusion/telemetry/recorder.py +142 -0
- depthfusion/telemetry/schema.py +65 -0
- depthfusion/utils/__init__.py +0 -0
- depthfusion/utils/expression_eval.py +257 -0
- depthfusion/utils/mode.py +31 -0
- depthfusion-2.0.0.dist-info/METADATA +80 -0
- depthfusion-2.0.0.dist-info/RECORD +219 -0
- depthfusion-2.0.0.dist-info/WHEEL +5 -0
- depthfusion-2.0.0.dist-info/entry_points.txt +2 -0
- depthfusion-2.0.0.dist-info/licenses/LICENSE +21 -0
- depthfusion-2.0.0.dist-info/top_level.txt +1 -0
depthfusion/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Business Intelligence Layer — E-55 analytics foundation.
|
|
2
|
+
|
|
3
|
+
Exposes:
|
|
4
|
+
- MetricsCollector: record usage events (search, ingest, sync) with
|
|
5
|
+
principal_id + timestamp into a SQLite analytics table.
|
|
6
|
+
- AggregationService: compute daily/weekly rollups from the events table.
|
|
7
|
+
- analytics_router: FastAPI router mounting GET /v2/analytics/summary.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from .aggregation import AggregationService
|
|
12
|
+
from .collector import AnalyticsCollector
|
|
13
|
+
from .router import analytics_router
|
|
14
|
+
|
|
15
|
+
__all__ = ["AnalyticsCollector", "AggregationService", "analytics_router"]
|
|
@@ -0,0 +1,336 @@
|
|
|
1
|
+
"""AggregationService — daily/weekly rollups of usage events (E-55).
|
|
2
|
+
|
|
3
|
+
Rollups are stored in the ``analytics_rollups`` table so the summary
|
|
4
|
+
endpoint can serve pre-computed counts without a full table scan.
|
|
5
|
+
|
|
6
|
+
``compute_rollups()`` is idempotent: re-running it overwrites existing
|
|
7
|
+
rollup rows for the same (principal_id, event_type, period, period_start)
|
|
8
|
+
via ``INSERT OR REPLACE``.
|
|
9
|
+
"""
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import logging
|
|
13
|
+
from contextlib import closing
|
|
14
|
+
from datetime import date, datetime, timedelta, timezone
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
from .store import _connect, init_db
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
#: Rollup granularities supported by the service.
|
|
22
|
+
SUPPORTED_PERIODS = frozenset({"daily", "weekly"})
|
|
23
|
+
|
|
24
|
+
#: Columns of ``analytics_events`` that may be used as a facet group-by
|
|
25
|
+
#: dimension. ``principal_id`` is intentionally excluded — it is the ACL
|
|
26
|
+
#: scope, never a facet, so a caller can never group across principals.
|
|
27
|
+
SUPPORTED_FACETS = frozenset({"event_type"})
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _period_bounds(period: str, reference: date) -> tuple[date, date]:
|
|
31
|
+
"""Return (start, end_inclusive) for a period ending on *reference*.
|
|
32
|
+
|
|
33
|
+
``daily`` → single day: (reference, reference)
|
|
34
|
+
``weekly`` → 7-day window ending on reference: (reference-6d, reference)
|
|
35
|
+
"""
|
|
36
|
+
if period == "daily":
|
|
37
|
+
return reference, reference
|
|
38
|
+
# weekly
|
|
39
|
+
return reference - timedelta(days=6), reference
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class AggregationService:
|
|
43
|
+
"""Computes and stores pre-aggregated usage rollups.
|
|
44
|
+
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
db_path:
|
|
48
|
+
Same SQLite database used by :class:`~.collector.AnalyticsCollector`.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def __init__(self, db_path: Path) -> None:
|
|
52
|
+
self._db_path = Path(db_path)
|
|
53
|
+
init_db(self._db_path)
|
|
54
|
+
|
|
55
|
+
# ------------------------------------------------------------------
|
|
56
|
+
# Public API
|
|
57
|
+
# ------------------------------------------------------------------
|
|
58
|
+
|
|
59
|
+
def compute_rollups(
|
|
60
|
+
self,
|
|
61
|
+
*,
|
|
62
|
+
reference_date: date | None = None,
|
|
63
|
+
periods: tuple[str, ...] = ("daily", "weekly"),
|
|
64
|
+
) -> int:
|
|
65
|
+
"""Compute rollups for *reference_date* and write them to the DB.
|
|
66
|
+
|
|
67
|
+
Returns the total number of rollup rows written/updated.
|
|
68
|
+
|
|
69
|
+
Parameters
|
|
70
|
+
----------
|
|
71
|
+
reference_date:
|
|
72
|
+
The "today" anchor for period calculation; defaults to
|
|
73
|
+
``date.today()`` in UTC.
|
|
74
|
+
periods:
|
|
75
|
+
Which granularities to compute. Defaults to both.
|
|
76
|
+
"""
|
|
77
|
+
if reference_date is None:
|
|
78
|
+
reference_date = datetime.now(tz=timezone.utc).date()
|
|
79
|
+
|
|
80
|
+
computed_at = datetime.now(tz=timezone.utc).isoformat()
|
|
81
|
+
rows_written = 0
|
|
82
|
+
|
|
83
|
+
try:
|
|
84
|
+
with closing(_connect(self._db_path)) as conn:
|
|
85
|
+
# Enumerate distinct principals
|
|
86
|
+
principal_rows = conn.execute(
|
|
87
|
+
"SELECT DISTINCT principal_id FROM analytics_events"
|
|
88
|
+
).fetchall()
|
|
89
|
+
principals = [r[0] for r in principal_rows]
|
|
90
|
+
|
|
91
|
+
for principal_id in principals:
|
|
92
|
+
for period in periods:
|
|
93
|
+
if period not in SUPPORTED_PERIODS:
|
|
94
|
+
continue
|
|
95
|
+
start, end = _period_bounds(period, reference_date)
|
|
96
|
+
start_ts = datetime(
|
|
97
|
+
start.year, start.month, start.day, tzinfo=timezone.utc
|
|
98
|
+
).isoformat()
|
|
99
|
+
end_ts = datetime(
|
|
100
|
+
end.year, end.month, end.day, 23, 59, 59, tzinfo=timezone.utc
|
|
101
|
+
).isoformat()
|
|
102
|
+
|
|
103
|
+
# Count per event_type for this principal + window
|
|
104
|
+
counts = conn.execute(
|
|
105
|
+
"SELECT event_type, COUNT(*) as cnt"
|
|
106
|
+
" FROM analytics_events"
|
|
107
|
+
" WHERE principal_id = ?"
|
|
108
|
+
" AND recorded_at >= ? AND recorded_at <= ?"
|
|
109
|
+
" GROUP BY event_type",
|
|
110
|
+
(principal_id, start_ts, end_ts),
|
|
111
|
+
).fetchall()
|
|
112
|
+
|
|
113
|
+
for row in counts:
|
|
114
|
+
event_type, count = row[0], row[1]
|
|
115
|
+
conn.execute(
|
|
116
|
+
"INSERT OR REPLACE INTO analytics_rollups"
|
|
117
|
+
" (principal_id, event_type, period,"
|
|
118
|
+
" period_start, count, computed_at)"
|
|
119
|
+
" VALUES (?, ?, ?, ?, ?, ?)",
|
|
120
|
+
(
|
|
121
|
+
principal_id,
|
|
122
|
+
event_type,
|
|
123
|
+
period,
|
|
124
|
+
start.isoformat(),
|
|
125
|
+
count,
|
|
126
|
+
computed_at,
|
|
127
|
+
),
|
|
128
|
+
)
|
|
129
|
+
rows_written += 1
|
|
130
|
+
|
|
131
|
+
conn.commit()
|
|
132
|
+
except Exception: # noqa: BLE001
|
|
133
|
+
logger.exception("analytics: rollup computation failed")
|
|
134
|
+
|
|
135
|
+
return rows_written
|
|
136
|
+
|
|
137
|
+
def summary(
|
|
138
|
+
self,
|
|
139
|
+
*,
|
|
140
|
+
principal_id: str,
|
|
141
|
+
period_days: int = 7,
|
|
142
|
+
reference_date: date | None = None,
|
|
143
|
+
) -> dict:
|
|
144
|
+
"""Return an aggregated usage summary for *principal_id*.
|
|
145
|
+
|
|
146
|
+
Computes counts directly from ``analytics_events`` (not the
|
|
147
|
+
rollup table) so the endpoint always reflects real-time data
|
|
148
|
+
even before :meth:`compute_rollups` has been called.
|
|
149
|
+
|
|
150
|
+
Parameters
|
|
151
|
+
----------
|
|
152
|
+
principal_id:
|
|
153
|
+
The principal whose metrics are being summarised.
|
|
154
|
+
period_days:
|
|
155
|
+
How many days to look back (1 = today only, 7 = last 7 days).
|
|
156
|
+
reference_date:
|
|
157
|
+
Anchor date; defaults to today in UTC.
|
|
158
|
+
|
|
159
|
+
Returns
|
|
160
|
+
-------
|
|
161
|
+
dict with keys:
|
|
162
|
+
``principal_id``, ``period_days``, ``period_start``,
|
|
163
|
+
``period_end``, ``total_events``, ``by_event_type``
|
|
164
|
+
"""
|
|
165
|
+
if reference_date is None:
|
|
166
|
+
reference_date = datetime.now(tz=timezone.utc).date()
|
|
167
|
+
|
|
168
|
+
start_date = reference_date - timedelta(days=period_days - 1)
|
|
169
|
+
start_ts = datetime(
|
|
170
|
+
start_date.year, start_date.month, start_date.day, tzinfo=timezone.utc
|
|
171
|
+
).isoformat()
|
|
172
|
+
end_ts = datetime(
|
|
173
|
+
reference_date.year, reference_date.month, reference_date.day,
|
|
174
|
+
23, 59, 59, tzinfo=timezone.utc,
|
|
175
|
+
).isoformat()
|
|
176
|
+
|
|
177
|
+
by_type: dict[str, int] = {}
|
|
178
|
+
total = 0
|
|
179
|
+
|
|
180
|
+
try:
|
|
181
|
+
with closing(_connect(self._db_path)) as conn:
|
|
182
|
+
rows = conn.execute(
|
|
183
|
+
"SELECT event_type, COUNT(*) as cnt"
|
|
184
|
+
" FROM analytics_events"
|
|
185
|
+
" WHERE principal_id = ?"
|
|
186
|
+
" AND recorded_at >= ? AND recorded_at <= ?"
|
|
187
|
+
" GROUP BY event_type",
|
|
188
|
+
(principal_id, start_ts, end_ts),
|
|
189
|
+
).fetchall()
|
|
190
|
+
|
|
191
|
+
for row in rows:
|
|
192
|
+
by_type[row[0]] = int(row[1])
|
|
193
|
+
total += int(row[1])
|
|
194
|
+
|
|
195
|
+
except Exception: # noqa: BLE001
|
|
196
|
+
logger.exception(
|
|
197
|
+
"analytics: summary query failed for principal=%r", principal_id
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
return {
|
|
201
|
+
"principal_id": principal_id,
|
|
202
|
+
"period_days": period_days,
|
|
203
|
+
"period_start": start_date.isoformat(),
|
|
204
|
+
"period_end": reference_date.isoformat(),
|
|
205
|
+
"total_events": total,
|
|
206
|
+
"by_event_type": by_type,
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
def facets(
|
|
210
|
+
self,
|
|
211
|
+
*,
|
|
212
|
+
principal_id: str,
|
|
213
|
+
facet: str = "event_type",
|
|
214
|
+
period_days: int = 7,
|
|
215
|
+
reference_date: date | None = None,
|
|
216
|
+
) -> dict:
|
|
217
|
+
"""Return faceted counts for *principal_id* grouped by *facet*.
|
|
218
|
+
|
|
219
|
+
Unlike :meth:`summary` (which always groups by ``event_type``), this
|
|
220
|
+
method exercises the composite ``(principal_id, recorded_at,
|
|
221
|
+
event_type)`` index added in T-622, returning a generic facet
|
|
222
|
+
breakdown that BI dashboards can chart.
|
|
223
|
+
|
|
224
|
+
ACL invariant: the result is always scoped to *principal_id*; the
|
|
225
|
+
facet dimension can never be ``principal_id`` (see
|
|
226
|
+
:data:`SUPPORTED_FACETS`), so a caller can never group across
|
|
227
|
+
principals.
|
|
228
|
+
|
|
229
|
+
Parameters
|
|
230
|
+
----------
|
|
231
|
+
principal_id:
|
|
232
|
+
The principal whose events are being faceted (ACL scope).
|
|
233
|
+
facet:
|
|
234
|
+
The column to group by. Must be in :data:`SUPPORTED_FACETS`.
|
|
235
|
+
period_days:
|
|
236
|
+
Look-back window in days.
|
|
237
|
+
reference_date:
|
|
238
|
+
Anchor date; defaults to today in UTC.
|
|
239
|
+
|
|
240
|
+
Returns
|
|
241
|
+
-------
|
|
242
|
+
dict with keys ``principal_id``, ``facet``, ``period_days``,
|
|
243
|
+
``period_start``, ``period_end``, ``total``, ``buckets``.
|
|
244
|
+
|
|
245
|
+
Raises
|
|
246
|
+
------
|
|
247
|
+
ValueError
|
|
248
|
+
If *facet* is not a supported facet dimension. This is the
|
|
249
|
+
allowlist that prevents SQL injection via the column name (the
|
|
250
|
+
value is interpolated into the SQL, so it must never come from
|
|
251
|
+
untrusted input directly).
|
|
252
|
+
"""
|
|
253
|
+
if facet not in SUPPORTED_FACETS:
|
|
254
|
+
raise ValueError(
|
|
255
|
+
f"Unsupported facet {facet!r}; allowed: {sorted(SUPPORTED_FACETS)}"
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
if reference_date is None:
|
|
259
|
+
reference_date = datetime.now(tz=timezone.utc).date()
|
|
260
|
+
|
|
261
|
+
start_date = reference_date - timedelta(days=period_days - 1)
|
|
262
|
+
start_ts = datetime(
|
|
263
|
+
start_date.year, start_date.month, start_date.day, tzinfo=timezone.utc
|
|
264
|
+
).isoformat()
|
|
265
|
+
end_ts = datetime(
|
|
266
|
+
reference_date.year, reference_date.month, reference_date.day,
|
|
267
|
+
23, 59, 59, tzinfo=timezone.utc,
|
|
268
|
+
).isoformat()
|
|
269
|
+
|
|
270
|
+
buckets: dict[str, int] = {}
|
|
271
|
+
total = 0
|
|
272
|
+
|
|
273
|
+
try:
|
|
274
|
+
with closing(_connect(self._db_path)) as conn:
|
|
275
|
+
# ``facet`` is validated against SUPPORTED_FACETS above, so the
|
|
276
|
+
# interpolation here is safe (allowlist, not user input).
|
|
277
|
+
rows = conn.execute(
|
|
278
|
+
f"SELECT {facet} AS bucket, COUNT(*) AS cnt" # noqa: S608
|
|
279
|
+
" FROM analytics_events"
|
|
280
|
+
" WHERE principal_id = ?"
|
|
281
|
+
" AND recorded_at >= ? AND recorded_at <= ?"
|
|
282
|
+
f" GROUP BY {facet}", # noqa: S608
|
|
283
|
+
(principal_id, start_ts, end_ts),
|
|
284
|
+
).fetchall()
|
|
285
|
+
|
|
286
|
+
for row in rows:
|
|
287
|
+
buckets[row[0]] = int(row[1])
|
|
288
|
+
total += int(row[1])
|
|
289
|
+
|
|
290
|
+
except Exception: # noqa: BLE001
|
|
291
|
+
logger.exception(
|
|
292
|
+
"analytics: facet query failed for principal=%r facet=%r",
|
|
293
|
+
principal_id,
|
|
294
|
+
facet,
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
return {
|
|
298
|
+
"principal_id": principal_id,
|
|
299
|
+
"facet": facet,
|
|
300
|
+
"period_days": period_days,
|
|
301
|
+
"period_start": start_date.isoformat(),
|
|
302
|
+
"period_end": reference_date.isoformat(),
|
|
303
|
+
"total": total,
|
|
304
|
+
"buckets": buckets,
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
def explain_facet_query(
|
|
308
|
+
self,
|
|
309
|
+
*,
|
|
310
|
+
principal_id: str = "_probe",
|
|
311
|
+
period_days: int = 7,
|
|
312
|
+
) -> list[str]:
|
|
313
|
+
"""Return the SQLite ``EXPLAIN QUERY PLAN`` rows for the facet query.
|
|
314
|
+
|
|
315
|
+
Used by the performance test to assert the composite facet index
|
|
316
|
+
(T-622) is actually selected by the planner rather than a full scan.
|
|
317
|
+
"""
|
|
318
|
+
reference_date = datetime.now(tz=timezone.utc).date()
|
|
319
|
+
start_date = reference_date - timedelta(days=period_days - 1)
|
|
320
|
+
start_ts = datetime(
|
|
321
|
+
start_date.year, start_date.month, start_date.day, tzinfo=timezone.utc
|
|
322
|
+
).isoformat()
|
|
323
|
+
end_ts = datetime(
|
|
324
|
+
reference_date.year, reference_date.month, reference_date.day,
|
|
325
|
+
23, 59, 59, tzinfo=timezone.utc,
|
|
326
|
+
).isoformat()
|
|
327
|
+
|
|
328
|
+
with closing(_connect(self._db_path)) as conn:
|
|
329
|
+
rows = conn.execute(
|
|
330
|
+
"EXPLAIN QUERY PLAN "
|
|
331
|
+
"SELECT event_type, COUNT(*) FROM analytics_events"
|
|
332
|
+
" WHERE principal_id = ? AND recorded_at >= ? AND recorded_at <= ?"
|
|
333
|
+
" GROUP BY event_type",
|
|
334
|
+
(principal_id, start_ts, end_ts),
|
|
335
|
+
).fetchall()
|
|
336
|
+
return [" ".join(str(c) for c in row) for row in rows]
|