nilalytics 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nilalytics/__init__.py +1 -0
- nilalytics/autocapture.py +114 -0
- nilalytics/cli.py +60 -0
- nilalytics/config.py +160 -0
- nilalytics/emitter.py +233 -0
- nilalytics/gateway.py +167 -0
- nilalytics/maintenance.py +60 -0
- nilalytics/query.py +271 -0
- nilalytics/server.py +151 -0
- nilalytics/storage.py +117 -0
- nilalytics-0.1.0.dist-info/METADATA +99 -0
- nilalytics-0.1.0.dist-info/RECORD +15 -0
- nilalytics-0.1.0.dist-info/WHEEL +4 -0
- nilalytics-0.1.0.dist-info/entry_points.txt +2 -0
- nilalytics-0.1.0.dist-info/licenses/LICENSE +201 -0
nilalytics/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""nilalytics: serverless realtime analytics on DuckLake + Quack + object storage."""
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""Automatic error submission for Python apps/services.
|
|
2
|
+
|
|
3
|
+
Call ``install()`` once at startup. After that, uncaught exceptions (main
|
|
4
|
+
thread and worker threads) and ``logger.exception()`` / ``logging.error(...,
|
|
5
|
+
exc_info=True)`` calls are sent to nilalytics as OTLP error logs
|
|
6
|
+
(severity ERROR + ``exception.*`` attributes) — the same shape Grafana Faro
|
|
7
|
+
and Sentry use. No per-error code needed.
|
|
8
|
+
|
|
9
|
+
Usage:
|
|
10
|
+
from nilalytics import autocapture
|
|
11
|
+
autocapture.install("my-service")
|
|
12
|
+
... # any uncaught error now self-reports
|
|
13
|
+
|
|
14
|
+
You can also report a handled error explicitly:
|
|
15
|
+
try:
|
|
16
|
+
risky()
|
|
17
|
+
except Exception as exc:
|
|
18
|
+
autocapture.report(exc, order_id=123)
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import logging
|
|
24
|
+
import sys
|
|
25
|
+
import threading
|
|
26
|
+
import time
|
|
27
|
+
import traceback
|
|
28
|
+
|
|
29
|
+
import requests
|
|
30
|
+
|
|
31
|
+
from . import config
|
|
32
|
+
|
|
33
|
+
_service = "nilalytics-app"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _attr(key: str, value: str) -> dict:
|
|
37
|
+
return {"key": key, "value": {"stringValue": str(value)}}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _error_record(exc_type, exc_value, exc_tb, extra: dict | None = None) -> dict:
|
|
41
|
+
stack = "".join(traceback.format_exception(exc_type, exc_value, exc_tb))
|
|
42
|
+
attrs = [
|
|
43
|
+
_attr("event.name", "exception"),
|
|
44
|
+
_attr("exception.type", getattr(exc_type, "__name__", str(exc_type))),
|
|
45
|
+
_attr("exception.message", str(exc_value)),
|
|
46
|
+
_attr("exception.stacktrace", stack),
|
|
47
|
+
]
|
|
48
|
+
attrs += [_attr(k, v) for k, v in (extra or {}).items()]
|
|
49
|
+
return {
|
|
50
|
+
"timeUnixNano": str(time.time_ns()),
|
|
51
|
+
"severityNumber": 17, # ERROR
|
|
52
|
+
"severityText": "ERROR",
|
|
53
|
+
"body": {"stringValue": "exception"},
|
|
54
|
+
"attributes": attrs,
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _post(record: dict) -> None:
|
|
59
|
+
payload = {
|
|
60
|
+
"resourceLogs": [
|
|
61
|
+
{
|
|
62
|
+
"resource": {"attributes": [_attr("service.name", _service)]},
|
|
63
|
+
"scopeLogs": [{"scope": {"name": "nilalytics.autocapture"},
|
|
64
|
+
"logRecords": [record]}],
|
|
65
|
+
}
|
|
66
|
+
]
|
|
67
|
+
}
|
|
68
|
+
try:
|
|
69
|
+
requests.post(
|
|
70
|
+
f"{config.OTLP_HTTP}/v1/logs",
|
|
71
|
+
json=payload,
|
|
72
|
+
headers={"Authorization": f"Bearer {config.OTLP_TOKEN}"},
|
|
73
|
+
timeout=5,
|
|
74
|
+
)
|
|
75
|
+
except Exception:
|
|
76
|
+
# Telemetry must never crash the app it is observing.
|
|
77
|
+
pass
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def report(exc: BaseException, **extra) -> None:
|
|
81
|
+
"""Submit a handled exception explicitly."""
|
|
82
|
+
_post(_error_record(type(exc), exc, exc.__traceback__, extra))
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class _LoggingHandler(logging.Handler):
|
|
86
|
+
def emit(self, record: logging.LogRecord) -> None:
|
|
87
|
+
if record.exc_info:
|
|
88
|
+
exc_type, exc_value, exc_tb = record.exc_info
|
|
89
|
+
_post(_error_record(exc_type, exc_value, exc_tb,
|
|
90
|
+
{"logger": record.name, "log.message": record.getMessage()}))
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def install(service_name: str = "nilalytics-app") -> None:
|
|
94
|
+
"""Install global handlers so all uncaught errors self-report."""
|
|
95
|
+
global _service
|
|
96
|
+
_service = service_name
|
|
97
|
+
|
|
98
|
+
# 1) uncaught exceptions on the main thread
|
|
99
|
+
_prev_hook = sys.excepthook
|
|
100
|
+
|
|
101
|
+
def _excepthook(exc_type, exc_value, exc_tb):
|
|
102
|
+
_post(_error_record(exc_type, exc_value, exc_tb))
|
|
103
|
+
_prev_hook(exc_type, exc_value, exc_tb)
|
|
104
|
+
|
|
105
|
+
sys.excepthook = _excepthook
|
|
106
|
+
|
|
107
|
+
# 2) uncaught exceptions on worker threads
|
|
108
|
+
def _threadhook(args):
|
|
109
|
+
_post(_error_record(args.exc_type, args.exc_value, args.exc_traceback))
|
|
110
|
+
|
|
111
|
+
threading.excepthook = _threadhook
|
|
112
|
+
|
|
113
|
+
# 3) logger.exception() / logging.error(..., exc_info=True)
|
|
114
|
+
logging.getLogger().addHandler(_LoggingHandler())
|
nilalytics/cli.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""Unified nilalytics command-line interface.
|
|
2
|
+
|
|
3
|
+
After installing the package (``pip install nilalytics`` / ``uv sync``), the
|
|
4
|
+
``nilalytics`` command is available anywhere:
|
|
5
|
+
|
|
6
|
+
nilalytics server # ingest + Quack catalog server
|
|
7
|
+
nilalytics gateway # public ingest gateway (CORS, tokens, TLS)
|
|
8
|
+
nilalytics emit --persons 5 # send sample events
|
|
9
|
+
nilalytics query report # analytics report over Quack
|
|
10
|
+
nilalytics maintenance --expire # flush inlined data + compact
|
|
11
|
+
|
|
12
|
+
Each subcommand delegates to its module, which is also runnable directly via
|
|
13
|
+
``python -m nilalytics.<module>``.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import sys
|
|
19
|
+
|
|
20
|
+
_USAGE = """usage: nilalytics <command> [args]
|
|
21
|
+
|
|
22
|
+
commands:
|
|
23
|
+
server run the ingest + Quack catalog server
|
|
24
|
+
gateway run the public ingest gateway (CORS, short-lived tokens, TLS)
|
|
25
|
+
emit [options] send sample logs/traces/metrics (--persons for cross-device)
|
|
26
|
+
query [subcommand] [args] report | traces | metrics | stitch | asof | changes | snapshots | errors
|
|
27
|
+
maintenance [--expire] flush inlined data to Parquet + compact
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def main(argv: list[str] | None = None) -> int:
|
|
32
|
+
argv = list(sys.argv[1:] if argv is None else argv)
|
|
33
|
+
if not argv or argv[0] in ("-h", "--help", "help"):
|
|
34
|
+
print(_USAGE)
|
|
35
|
+
return 0
|
|
36
|
+
|
|
37
|
+
command, rest = argv[0], argv[1:]
|
|
38
|
+
if command == "server":
|
|
39
|
+
from . import server
|
|
40
|
+
server.main(rest)
|
|
41
|
+
elif command == "gateway":
|
|
42
|
+
from . import gateway
|
|
43
|
+
gateway.main(rest)
|
|
44
|
+
elif command == "emit":
|
|
45
|
+
from . import emitter
|
|
46
|
+
emitter.main(rest)
|
|
47
|
+
elif command == "query":
|
|
48
|
+
from . import query
|
|
49
|
+
query.main(rest)
|
|
50
|
+
elif command == "maintenance":
|
|
51
|
+
from . import maintenance
|
|
52
|
+
maintenance.main(rest)
|
|
53
|
+
else:
|
|
54
|
+
print(f"unknown command: {command}\n\n{_USAGE}")
|
|
55
|
+
return 1
|
|
56
|
+
return 0
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
if __name__ == "__main__":
|
|
60
|
+
raise SystemExit(main())
|
nilalytics/config.py
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
"""Central configuration for nilalytics.
|
|
2
|
+
|
|
3
|
+
Every value can be overridden with an environment variable so the same code runs
|
|
4
|
+
against local MinIO now and Cloudflare R2 later (R2 is S3-compatible).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import os
|
|
11
|
+
import secrets
|
|
12
|
+
import stat
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
# Writable data directory (catalog file + secrets). Deliberately NOT relative to
|
|
16
|
+
# the package install location, so nilalytics works from any working directory
|
|
17
|
+
# and when pip-installed. Override with NILA_DATA_DIR.
|
|
18
|
+
DATA_DIR = Path(os.getenv("NILA_DATA_DIR", Path.home() / ".nilalytics")).expanduser()
|
|
19
|
+
DATA_DIR.mkdir(parents=True, exist_ok=True)
|
|
20
|
+
|
|
21
|
+
# --- Secrets: never hardcode. Load from env, else generate once and persist
|
|
22
|
+
# to a 0600 file under data/ (which is git-ignored). token_urlsafe() is safe in
|
|
23
|
+
# both HTTP headers and single-quoted SQL strings. ---
|
|
24
|
+
_SECRETS_FILE = DATA_DIR / ".nila_secrets.json"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _load_or_create_secrets() -> dict:
|
|
28
|
+
data = {}
|
|
29
|
+
if _SECRETS_FILE.exists():
|
|
30
|
+
try:
|
|
31
|
+
data = json.loads(_SECRETS_FILE.read_text())
|
|
32
|
+
except (ValueError, OSError):
|
|
33
|
+
data = {}
|
|
34
|
+
changed = False
|
|
35
|
+
for key in ("otlp_token", "quack_token", "id_salt", "gateway_secret", "ingest_key"):
|
|
36
|
+
if not data.get(key):
|
|
37
|
+
data[key] = secrets.token_urlsafe(24)
|
|
38
|
+
changed = True
|
|
39
|
+
if changed:
|
|
40
|
+
_SECRETS_FILE.write_text(json.dumps(data))
|
|
41
|
+
try:
|
|
42
|
+
_SECRETS_FILE.chmod(stat.S_IRUSR | stat.S_IWUSR) # 0600, owner-only
|
|
43
|
+
except OSError:
|
|
44
|
+
pass
|
|
45
|
+
return data
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
_SECRETS = _load_or_create_secrets()
|
|
49
|
+
|
|
50
|
+
# --- Object storage backend (works on all major clouds) ---
|
|
51
|
+
# NILA_STORAGE selects the cloud: s3 | gcs | r2 | azure. See storage.py.
|
|
52
|
+
STORAGE = os.getenv("NILA_STORAGE", "s3").lower()
|
|
53
|
+
# Bucket (or Azure *container*) + key prefix, shared across backends.
|
|
54
|
+
BUCKET = os.getenv("NILA_BUCKET", os.getenv("NILA_S3_BUCKET", "nilalytics"))
|
|
55
|
+
PREFIX = os.getenv("NILA_PREFIX", "lake")
|
|
56
|
+
|
|
57
|
+
# S3 / MinIO / any S3-compatible store. Leave endpoint empty for real AWS S3.
|
|
58
|
+
S3_ENDPOINT = os.getenv("NILA_S3_ENDPOINT", "127.0.0.1:9100")
|
|
59
|
+
S3_ACCESS_KEY = os.getenv("NILA_S3_KEY", "minioadmin")
|
|
60
|
+
S3_SECRET_KEY = os.getenv("NILA_S3_SECRET", "minioadmin")
|
|
61
|
+
S3_SESSION_TOKEN = os.getenv("NILA_S3_SESSION_TOKEN", "") # optional STS token
|
|
62
|
+
S3_USE_SSL = os.getenv("NILA_S3_USE_SSL", "false").lower() == "true"
|
|
63
|
+
S3_URL_STYLE = os.getenv("NILA_S3_URL_STYLE", "path") # MinIO/R2 want path style
|
|
64
|
+
S3_REGION = os.getenv("NILA_S3_REGION", "us-east-1")
|
|
65
|
+
|
|
66
|
+
# Google Cloud Storage (HMAC interoperability keys).
|
|
67
|
+
GCS_KEY = os.getenv("NILA_GCS_KEY", "")
|
|
68
|
+
GCS_SECRET = os.getenv("NILA_GCS_SECRET", "")
|
|
69
|
+
|
|
70
|
+
# Cloudflare R2.
|
|
71
|
+
R2_ACCOUNT_ID = os.getenv("NILA_R2_ACCOUNT_ID", "")
|
|
72
|
+
R2_KEY = os.getenv("NILA_R2_KEY", "")
|
|
73
|
+
R2_SECRET = os.getenv("NILA_R2_SECRET", "")
|
|
74
|
+
|
|
75
|
+
# Azure Blob / ADLS Gen2 (BUCKET is the container).
|
|
76
|
+
AZURE_ACCOUNT = os.getenv("NILA_AZURE_ACCOUNT", "")
|
|
77
|
+
AZURE_AUTH = os.getenv("NILA_AZURE_AUTH", "credential_chain") # credential_chain|connection_string|service_principal
|
|
78
|
+
AZURE_CONNECTION_STRING = os.getenv("NILA_AZURE_CONNECTION_STRING", "")
|
|
79
|
+
AZURE_TENANT_ID = os.getenv("NILA_AZURE_TENANT_ID", "")
|
|
80
|
+
AZURE_CLIENT_ID = os.getenv("NILA_AZURE_CLIENT_ID", "")
|
|
81
|
+
AZURE_CLIENT_SECRET = os.getenv("NILA_AZURE_CLIENT_SECRET", "")
|
|
82
|
+
|
|
83
|
+
# --- DuckLake catalog (a DuckDB file, served to clients over Quack) ---
|
|
84
|
+
CATALOG_PATH = os.getenv("NILA_CATALOG", str(DATA_DIR / "catalog.ducklake"))
|
|
85
|
+
# Inserts smaller than this land as rows inside the catalog (data inlining),
|
|
86
|
+
# so streaming events never create tiny Parquet files.
|
|
87
|
+
DATA_INLINING_ROW_LIMIT = int(os.getenv("NILA_INLINE_LIMIT", "1000"))
|
|
88
|
+
|
|
89
|
+
# --- OTLP ingest server (duckdb-otlp) ---
|
|
90
|
+
OTLP_URI = os.getenv("NILA_OTLP_URI", "otlp:127.0.0.1:4318")
|
|
91
|
+
OTLP_HTTP = os.getenv("NILA_OTLP_HTTP", "http://127.0.0.1:4318")
|
|
92
|
+
# Bearer token that ingest clients (emitter, autocapture, browser) must present.
|
|
93
|
+
# This is the INTERNAL token; it never leaves the host once the gateway is used.
|
|
94
|
+
OTLP_TOKEN = os.getenv("NILA_OTLP_TOKEN", _SECRETS["otlp_token"])
|
|
95
|
+
|
|
96
|
+
# --- Public ingest gateway (front door for browsers + mobile) ---
|
|
97
|
+
# Set host to 0.0.0.0 to accept real devices/browsers. Optional TLS via cert/key.
|
|
98
|
+
GATEWAY_HOST = os.getenv("NILA_GATEWAY_HOST", "127.0.0.1")
|
|
99
|
+
GATEWAY_PORT = int(os.getenv("NILA_GATEWAY_PORT", "4319"))
|
|
100
|
+
GATEWAY_CORS_ORIGINS = os.getenv("NILA_GATEWAY_CORS", "*")
|
|
101
|
+
GATEWAY_CERT = os.getenv("NILA_GATEWAY_CERT", "")
|
|
102
|
+
GATEWAY_KEY = os.getenv("NILA_GATEWAY_KEY", "")
|
|
103
|
+
# Short-lived client tokens are HMAC-signed with this secret and expire after TTL.
|
|
104
|
+
GATEWAY_SECRET = os.getenv("NILA_GATEWAY_SECRET", _SECRETS["gateway_secret"])
|
|
105
|
+
GATEWAY_TOKEN_TTL = int(os.getenv("NILA_GATEWAY_TOKEN_TTL", "900")) # 15 min
|
|
106
|
+
# The (rotatable) key an app presents to MINT a short-lived token. This is the
|
|
107
|
+
# only value that ships in a client; it grants minting, not ingest or reads.
|
|
108
|
+
GATEWAY_INGEST_KEY = os.getenv("NILA_INGEST_KEY", _SECRETS["ingest_key"])
|
|
109
|
+
|
|
110
|
+
# --- Quack catalog server (read path for clients / DuckDB-WASM) ---
|
|
111
|
+
QUACK_URI = os.getenv("NILA_QUACK_URI", "quack:localhost")
|
|
112
|
+
QUACK_TOKEN = os.getenv("NILA_QUACK_TOKEN", _SECRETS["quack_token"])
|
|
113
|
+
|
|
114
|
+
# Name the DuckLake is attached as, inside the server process.
|
|
115
|
+
LAKE = "lake"
|
|
116
|
+
|
|
117
|
+
# --- Ingest tuning (package defaults; override via env) ---
|
|
118
|
+
# Freshness: force a commit when the oldest buffered row hits this age.
|
|
119
|
+
SEAL_MAX_AGE_MS = int(os.getenv("NILA_SEAL_MAX_AGE_MS", "1000"))
|
|
120
|
+
# Retention: how long snapshots/old files are kept before maintenance reclaims them.
|
|
121
|
+
MAINTENANCE_RETENTION_MS = int(os.getenv("NILA_RETENTION_MS", str(7 * 24 * 3600 * 1000)))
|
|
122
|
+
# Resource attributes lifted into first-class columns at ingest for pruning
|
|
123
|
+
# (comma-separated; empty disables). Event-level fields stay in log_attributes.
|
|
124
|
+
PROMOTE_RESOURCE_ATTRS = os.getenv("NILA_PROMOTE_RESOURCE_ATTRS", "deployment.environment")
|
|
125
|
+
|
|
126
|
+
# Salt for pseudonymous identity hashing. User keys (email/account id) are hashed
|
|
127
|
+
# with this salt client-side, so the lake stores a stable person-key it cannot
|
|
128
|
+
# reverse into a real identity.
|
|
129
|
+
ID_SALT = os.getenv("NILA_ID_SALT", _SECRETS["id_salt"])
|
|
130
|
+
|
|
131
|
+
# --- Physical layout applied to the events table (query performance) ---
|
|
132
|
+
# Partition transforms: identity | bucket(N,col) | year|month|day|hour(ts).
|
|
133
|
+
PARTITION_BY = os.getenv("NILA_PARTITION_BY", "day(time_unix_nano)")
|
|
134
|
+
SORTED_BY = os.getenv("NILA_SORTED_BY", "body, time_unix_nano")
|
|
135
|
+
|
|
136
|
+
# --- Safety guardrails ---
|
|
137
|
+
_LOCAL_HOSTS = {"127.0.0.1", "localhost", "::1", "[::1]"}
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def endpoint_host() -> str:
|
|
141
|
+
return S3_ENDPOINT.split(":")[0]
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def is_local_endpoint() -> bool:
|
|
145
|
+
return endpoint_host() in _LOCAL_HOSTS
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def assert_safe() -> None:
|
|
149
|
+
"""Refuse obviously-insecure configurations before starting the server."""
|
|
150
|
+
# Only the S3 backend has the local-MinIO / default-cred footgun. GCS, R2 and
|
|
151
|
+
# Azure are inherently remote + TLS and are validated in storage.py.
|
|
152
|
+
if STORAGE != "s3" or is_local_endpoint():
|
|
153
|
+
return
|
|
154
|
+
problems = []
|
|
155
|
+
if not S3_USE_SSL:
|
|
156
|
+
problems.append("remote S3 requires TLS (set NILA_S3_USE_SSL=true)")
|
|
157
|
+
if S3_ACCESS_KEY == "minioadmin" or S3_SECRET_KEY == "minioadmin":
|
|
158
|
+
problems.append("default 'minioadmin' credentials must not be used with a remote endpoint")
|
|
159
|
+
if problems:
|
|
160
|
+
raise SystemExit("[nilalytics] refusing to start:\n - " + "\n - ".join(problems))
|
nilalytics/emitter.py
ADDED
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
"""nilalytics emitter: sends OTLP/HTTP log records to the ingest endpoint.
|
|
2
|
+
|
|
3
|
+
This mimics what a Grafana Faro / OpenTelemetry browser SDK emits: product
|
|
4
|
+
events and errors are both OTLP *log records*. Product events carry an
|
|
5
|
+
``event.name`` attribute; errors use severity ``ERROR`` plus ``exception.*``
|
|
6
|
+
attributes. The server (duckdb-otlp) lands them in the DuckLake ``otlp_logs``
|
|
7
|
+
table.
|
|
8
|
+
|
|
9
|
+
Run it: uv run python -m nilalytics.emitter --count 200
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import argparse
|
|
15
|
+
import hashlib
|
|
16
|
+
import os
|
|
17
|
+
import random
|
|
18
|
+
import time
|
|
19
|
+
import uuid
|
|
20
|
+
|
|
21
|
+
import requests
|
|
22
|
+
|
|
23
|
+
from . import config
|
|
24
|
+
|
|
25
|
+
PRODUCT_EVENTS = ["page_view", "signup_start", "signup_complete", "add_to_cart", "purchase"]
|
|
26
|
+
PAGES = ["/", "/pricing", "/product", "/checkout", "/docs"]
|
|
27
|
+
ENV = os.getenv("NILA_ENV", "production")
|
|
28
|
+
HEADERS = {"Authorization": f"Bearer {config.OTLP_TOKEN}"}
|
|
29
|
+
|
|
30
|
+
# A pool of "devices" (each an anonymous.id) that normal traffic comes from.
|
|
31
|
+
DEVICE_POOL = [uuid.uuid4().hex for _ in range(30)]
|
|
32
|
+
_SESSIONS: dict[str, str] = {}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def new_anonymous_id() -> str:
|
|
36
|
+
return uuid.uuid4().hex
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def session_for(device: str) -> str:
|
|
40
|
+
# Occasionally roll a new session for the same device.
|
|
41
|
+
if device not in _SESSIONS or random.random() < 0.1:
|
|
42
|
+
_SESSIONS[device] = uuid.uuid4().hex[:16]
|
|
43
|
+
return _SESSIONS[device]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def hash_key(raw_key: str) -> str:
|
|
47
|
+
"""Pseudonymous person-key: a salted hash, done client-side. Never send raw."""
|
|
48
|
+
return hashlib.sha256((config.ID_SALT + raw_key).encode()).hexdigest()[:32]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _resource() -> dict:
|
|
52
|
+
# deployment.environment is promoted to a first-class column at ingest.
|
|
53
|
+
return {"attributes": [_attr("service.name", "nilalytics-web"),
|
|
54
|
+
_attr("deployment.environment", ENV)]}
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _attr(key: str, value: str) -> dict:
|
|
58
|
+
return {"key": key, "value": {"stringValue": str(value)}}
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _record(event_name: str, anon: str, session: str, severity_text: str,
|
|
62
|
+
severity_number: int, user: str | None = None, extra: dict | None = None) -> dict:
|
|
63
|
+
# anonymous.id identifies a device; user.id (hashed) is set only once known.
|
|
64
|
+
attrs = [_attr("event.name", event_name), _attr("anonymous.id", anon), _attr("session.id", session)]
|
|
65
|
+
if user:
|
|
66
|
+
attrs.append(_attr("user.id", user))
|
|
67
|
+
attrs += [_attr(k, v) for k, v in (extra or {}).items()]
|
|
68
|
+
return {
|
|
69
|
+
"timeUnixNano": str(time.time_ns()),
|
|
70
|
+
"severityNumber": severity_number,
|
|
71
|
+
"severityText": severity_text,
|
|
72
|
+
"body": {"stringValue": event_name},
|
|
73
|
+
"attributes": attrs,
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _payload(records: list[dict]) -> dict:
|
|
78
|
+
return {
|
|
79
|
+
"resourceLogs": [
|
|
80
|
+
{
|
|
81
|
+
"resource": _resource(),
|
|
82
|
+
"scopeLogs": [
|
|
83
|
+
{
|
|
84
|
+
"scope": {"name": "nilalytics.browser"},
|
|
85
|
+
"logRecords": records,
|
|
86
|
+
}
|
|
87
|
+
],
|
|
88
|
+
}
|
|
89
|
+
]
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def make_events(n: int, error_rate: float) -> list[dict]:
|
|
94
|
+
records = []
|
|
95
|
+
for _ in range(n):
|
|
96
|
+
device = random.choice(DEVICE_POOL)
|
|
97
|
+
session = session_for(device)
|
|
98
|
+
if random.random() < error_rate:
|
|
99
|
+
records.append(
|
|
100
|
+
_record(
|
|
101
|
+
"exception", device, session, "ERROR", 17,
|
|
102
|
+
extra={
|
|
103
|
+
"exception.type": random.choice(["TypeError", "NetworkError", "RangeError"]),
|
|
104
|
+
"exception.message": "Cannot read properties of undefined",
|
|
105
|
+
"page": random.choice(PAGES),
|
|
106
|
+
},
|
|
107
|
+
)
|
|
108
|
+
)
|
|
109
|
+
else:
|
|
110
|
+
records.append(
|
|
111
|
+
_record(
|
|
112
|
+
random.choice(PRODUCT_EVENTS), device, session, "INFO", 9,
|
|
113
|
+
extra={"page": random.choice(PAGES)},
|
|
114
|
+
)
|
|
115
|
+
)
|
|
116
|
+
return records
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def emit(count: int, batch_size: int, error_rate: float) -> int:
|
|
120
|
+
url = f"{config.OTLP_HTTP}/v1/logs"
|
|
121
|
+
sent = 0
|
|
122
|
+
while sent < count:
|
|
123
|
+
this = min(batch_size, count - sent)
|
|
124
|
+
resp = requests.post(url, json=_payload(make_events(this, error_rate)),
|
|
125
|
+
headers=HEADERS, timeout=10)
|
|
126
|
+
resp.raise_for_status()
|
|
127
|
+
sent += this
|
|
128
|
+
print(f"POST {this:>4} logs -> {resp.status_code} {resp.json()}", flush=True)
|
|
129
|
+
print(f"[emitter] sent {sent} log events to {url}", flush=True)
|
|
130
|
+
return sent
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _post_logs(records: list[dict]) -> None:
|
|
134
|
+
resp = requests.post(f"{config.OTLP_HTTP}/v1/logs", json=_payload(records), headers=HEADERS, timeout=10)
|
|
135
|
+
resp.raise_for_status()
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def identify(device: str, user_key: str) -> str:
|
|
139
|
+
"""Link a device's anonymous.id to a hashed person-key (an 'identify' event)."""
|
|
140
|
+
uid = hash_key(user_key)
|
|
141
|
+
_post_logs([_record("identify", device, session_for(device), "INFO", 9, user=uid, extra={"method": "login"})])
|
|
142
|
+
return uid
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def emit_cross_device(persons: int, events_per_device: int = 8) -> None:
|
|
146
|
+
"""Simulate each person using two devices, then logging in on both.
|
|
147
|
+
|
|
148
|
+
Before login the two devices are unlinkable; the identify events tie both
|
|
149
|
+
anonymous.ids to the same hashed person-key, enabling cross-device stitching.
|
|
150
|
+
"""
|
|
151
|
+
for i in range(persons):
|
|
152
|
+
user_key = f"person{i}@example.com"
|
|
153
|
+
uid = hash_key(user_key)
|
|
154
|
+
for _ in range(2): # phone + laptop
|
|
155
|
+
device = new_anonymous_id()
|
|
156
|
+
session = uuid.uuid4().hex[:16]
|
|
157
|
+
# anonymous activity first
|
|
158
|
+
_post_logs([_record(random.choice(PRODUCT_EVENTS), device, session, "INFO", 9,
|
|
159
|
+
extra={"page": random.choice(PAGES)}) for _ in range(events_per_device)])
|
|
160
|
+
# then the user logs in on this device
|
|
161
|
+
identify(device, user_key)
|
|
162
|
+
print(f"[emitter] person{i}: 2 devices -> {uid[:12]}...", flush=True)
|
|
163
|
+
print(f"[emitter] cross-device sim: {persons} persons x 2 devices", flush=True)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
# --- Traces (performance spans, e.g. page loads / API calls) ---
|
|
167
|
+
def make_spans(n: int) -> list[dict]:
|
|
168
|
+
spans = []
|
|
169
|
+
for _ in range(n):
|
|
170
|
+
start = time.time_ns()
|
|
171
|
+
duration_ns = random.randint(20, 800) * 1_000_000 # 20-800 ms
|
|
172
|
+
spans.append({
|
|
173
|
+
"traceId": os.urandom(16).hex(),
|
|
174
|
+
"spanId": os.urandom(8).hex(),
|
|
175
|
+
"name": random.choice(["page_load", "api_call", "route_change"]),
|
|
176
|
+
"kind": 1,
|
|
177
|
+
"startTimeUnixNano": str(start),
|
|
178
|
+
"endTimeUnixNano": str(start + duration_ns),
|
|
179
|
+
"attributes": [_attr("page", random.choice(PAGES))],
|
|
180
|
+
})
|
|
181
|
+
return spans
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def emit_traces(n: int) -> None:
|
|
185
|
+
payload = {"resourceSpans": [{"resource": _resource(),
|
|
186
|
+
"scopeSpans": [{"scope": {"name": "nilalytics.browser"},
|
|
187
|
+
"spans": make_spans(n)}]}]}
|
|
188
|
+
resp = requests.post(f"{config.OTLP_HTTP}/v1/traces", json=payload, headers=HEADERS, timeout=10)
|
|
189
|
+
resp.raise_for_status()
|
|
190
|
+
print(f"[emitter] sent {n} spans -> {resp.status_code}", flush=True)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
# --- Metrics (e.g. web-vitals gauges) ---
|
|
194
|
+
def make_points(n: int) -> list[dict]:
|
|
195
|
+
return [{"timeUnixNano": str(time.time_ns()),
|
|
196
|
+
"asDouble": round(random.uniform(200, 4000), 1),
|
|
197
|
+
"attributes": [_attr("page", random.choice(PAGES))]} for _ in range(n)]
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def emit_metrics(n: int) -> None:
|
|
201
|
+
payload = {"resourceMetrics": [{"resource": _resource(),
|
|
202
|
+
"scopeMetrics": [{"scope": {"name": "nilalytics.browser"},
|
|
203
|
+
"metrics": [{"name": "web_vitals_lcp_ms",
|
|
204
|
+
"unit": "ms",
|
|
205
|
+
"gauge": {"dataPoints": make_points(n)}}]}]}]}
|
|
206
|
+
resp = requests.post(f"{config.OTLP_HTTP}/v1/metrics", json=payload, headers=HEADERS, timeout=10)
|
|
207
|
+
resp.raise_for_status()
|
|
208
|
+
print(f"[emitter] sent {n} metric points -> {resp.status_code}", flush=True)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def main(argv=None) -> None:
|
|
212
|
+
parser = argparse.ArgumentParser(prog="nilalytics emit",
|
|
213
|
+
description="Emit OTLP logs, traces and metrics to nilalytics.")
|
|
214
|
+
parser.add_argument("-n", "--count", type=int, default=200, help="log events")
|
|
215
|
+
parser.add_argument("-b", "--batch-size", type=int, default=50)
|
|
216
|
+
parser.add_argument("-e", "--error-rate", type=float, default=0.1)
|
|
217
|
+
parser.add_argument("--traces", type=int, default=10, help="trace spans")
|
|
218
|
+
parser.add_argument("--metrics", type=int, default=10, help="metric points")
|
|
219
|
+
parser.add_argument("--persons", type=int, default=0,
|
|
220
|
+
help="simulate N cross-device persons (2 devices each, identified)")
|
|
221
|
+
args = parser.parse_args(argv)
|
|
222
|
+
if args.count:
|
|
223
|
+
emit(args.count, args.batch_size, args.error_rate)
|
|
224
|
+
if args.traces:
|
|
225
|
+
emit_traces(args.traces)
|
|
226
|
+
if args.metrics:
|
|
227
|
+
emit_metrics(args.metrics)
|
|
228
|
+
if args.persons:
|
|
229
|
+
emit_cross_device(args.persons)
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
if __name__ == "__main__":
|
|
233
|
+
main()
|