apphud-mcp 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +65 -61
- package/dist/src/cli.js +12 -6
- package/dist/src/config/env.js +20 -20
- package/dist/src/services/analyticsService.js +81 -0
- package/dist/src/services/apphudClient.js +145 -0
- package/dist/src/services/etlService.js +351 -436
- package/package.json +1 -1
|
@@ -1,22 +1,13 @@
|
|
|
1
|
+
import { execFile } from "node:child_process";
|
|
1
2
|
import { createHash } from "node:crypto";
|
|
2
3
|
import { mkdirSync } from "node:fs";
|
|
3
|
-
import { mkdir, readdir, readFile,
|
|
4
|
+
import { mkdir, readdir, readFile, writeFile } from "node:fs/promises";
|
|
4
5
|
import path from "node:path";
|
|
6
|
+
import { promisify } from "node:util";
|
|
5
7
|
import { gunzipSync } from "node:zlib";
|
|
6
|
-
import Database from "better-sqlite3";
|
|
7
8
|
import { ApphudMcpError } from "../errors/toolError.js";
|
|
8
9
|
const LOCAL_ROWS_GUARD_LIMIT = 1_000_000;
|
|
9
|
-
|
|
10
|
-
return template.replace(/\{([a-zA-Z0-9_]+)\}/g, (_, key) => vars[key] ?? "");
|
|
11
|
-
}
|
|
12
|
-
function normalizeToPath(baseUrl, rawPath) {
|
|
13
|
-
if (rawPath.startsWith("http://") || rawPath.startsWith("https://")) {
|
|
14
|
-
return rawPath;
|
|
15
|
-
}
|
|
16
|
-
const normalizedBase = baseUrl.replace(/\/$/, "");
|
|
17
|
-
const normalizedPath = rawPath.startsWith("/") ? rawPath : `/${rawPath}`;
|
|
18
|
-
return `${normalizedBase}${normalizedPath}`;
|
|
19
|
-
}
|
|
10
|
+
const execFileAsync = promisify(execFile);
|
|
20
11
|
function parseCsvLine(line) {
|
|
21
12
|
const result = [];
|
|
22
13
|
let current = "";
|
|
@@ -50,16 +41,11 @@ function parseCsv(content) {
|
|
|
50
41
|
.map((line) => line.trimEnd())
|
|
51
42
|
.filter((line) => line.length > 0);
|
|
52
43
|
if (lines.length === 0) {
|
|
53
|
-
return {
|
|
54
|
-
headers: [],
|
|
55
|
-
rows: [],
|
|
56
|
-
};
|
|
44
|
+
return { headers: [], rows: [] };
|
|
57
45
|
}
|
|
58
|
-
const headers = parseCsvLine(lines[0] ?? "");
|
|
59
|
-
const rows = lines.slice(1).map((line) => parseCsvLine(line));
|
|
60
46
|
return {
|
|
61
|
-
headers,
|
|
62
|
-
rows,
|
|
47
|
+
headers: parseCsvLine(lines[0] ?? ""),
|
|
48
|
+
rows: lines.slice(1).map((line) => parseCsvLine(line)),
|
|
63
49
|
};
|
|
64
50
|
}
|
|
65
51
|
function rowToRecord(headers, values) {
|
|
@@ -71,45 +57,6 @@ function rowToRecord(headers, values) {
|
|
|
71
57
|
}
|
|
72
58
|
return result;
|
|
73
59
|
}
|
|
74
|
-
function extractExportCandidates(payload, appId) {
|
|
75
|
-
const candidates = [];
|
|
76
|
-
const visited = new WeakSet();
|
|
77
|
-
const walk = (node) => {
|
|
78
|
-
if (Array.isArray(node)) {
|
|
79
|
-
for (const item of node) {
|
|
80
|
-
walk(item);
|
|
81
|
-
}
|
|
82
|
-
return;
|
|
83
|
-
}
|
|
84
|
-
if (!node || typeof node !== "object") {
|
|
85
|
-
return;
|
|
86
|
-
}
|
|
87
|
-
const record = node;
|
|
88
|
-
if (visited.has(record)) {
|
|
89
|
-
return;
|
|
90
|
-
}
|
|
91
|
-
visited.add(record);
|
|
92
|
-
const downloadUrl = (typeof record.download_url === "string" && record.download_url) ||
|
|
93
|
-
(typeof record.downloadUrl === "string" && record.downloadUrl) ||
|
|
94
|
-
(typeof record.url === "string" && record.url) ||
|
|
95
|
-
(typeof record.file_url === "string" && record.file_url);
|
|
96
|
-
const fileName = (typeof record.file_name === "string" && record.file_name) ||
|
|
97
|
-
(typeof record.filename === "string" && record.filename) ||
|
|
98
|
-
(typeof record.name === "string" && record.name);
|
|
99
|
-
if (downloadUrl && fileName) {
|
|
100
|
-
candidates.push({
|
|
101
|
-
appId,
|
|
102
|
-
fileName,
|
|
103
|
-
downloadUrl,
|
|
104
|
-
});
|
|
105
|
-
}
|
|
106
|
-
for (const value of Object.values(record)) {
|
|
107
|
-
walk(value);
|
|
108
|
-
}
|
|
109
|
-
};
|
|
110
|
-
walk(payload);
|
|
111
|
-
return candidates;
|
|
112
|
-
}
|
|
113
60
|
async function listCsvGzFiles(root) {
|
|
114
61
|
const entries = await readdir(root, { withFileTypes: true });
|
|
115
62
|
const files = [];
|
|
@@ -128,25 +75,24 @@ async function listCsvGzFiles(root) {
|
|
|
128
75
|
export class EtlService {
|
|
129
76
|
config;
|
|
130
77
|
apphudClient;
|
|
131
|
-
db;
|
|
132
78
|
timer = null;
|
|
133
79
|
running = false;
|
|
80
|
+
stateLoaded = false;
|
|
81
|
+
appKeys = [];
|
|
82
|
+
manifest = new Map();
|
|
83
|
+
alerts = [];
|
|
134
84
|
constructor(config, apphudClient) {
|
|
135
85
|
this.config = config;
|
|
136
86
|
this.apphudClient = apphudClient;
|
|
137
|
-
|
|
138
|
-
mkdirSync(path.
|
|
139
|
-
this.
|
|
140
|
-
this.db.pragma("journal_mode = WAL");
|
|
141
|
-
this.db.pragma("busy_timeout = 5000");
|
|
142
|
-
this.initializeSchema();
|
|
87
|
+
mkdirSync(path.resolve(this.config.etlStorageDir), { recursive: true });
|
|
88
|
+
mkdirSync(path.resolve(this.config.etlIncomingDir), { recursive: true });
|
|
89
|
+
mkdirSync(path.resolve(this.stateDir()), { recursive: true });
|
|
143
90
|
}
|
|
144
91
|
async start() {
|
|
145
92
|
if (!this.config.etlEnabled) {
|
|
146
93
|
return;
|
|
147
94
|
}
|
|
148
|
-
await
|
|
149
|
-
await mkdir(path.resolve(this.config.etlIncomingDir), { recursive: true });
|
|
95
|
+
await this.ensureStateLoaded();
|
|
150
96
|
await this.runOnce();
|
|
151
97
|
const intervalMs = Math.max(this.config.etlPollIntervalMinutes, 1) * 60_000;
|
|
152
98
|
this.timer = setInterval(() => {
|
|
@@ -158,63 +104,75 @@ export class EtlService {
|
|
|
158
104
|
clearInterval(this.timer);
|
|
159
105
|
this.timer = null;
|
|
160
106
|
}
|
|
161
|
-
this.
|
|
107
|
+
await this.persistState();
|
|
162
108
|
}
|
|
163
109
|
async runOnce() {
|
|
164
110
|
if (!this.config.etlEnabled || this.running) {
|
|
165
111
|
return;
|
|
166
112
|
}
|
|
113
|
+
await this.ensureStateLoaded();
|
|
167
114
|
this.running = true;
|
|
168
115
|
try {
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
}
|
|
173
|
-
await this.ingestIncomingFiles();
|
|
116
|
+
await this.syncAppApiKeys();
|
|
117
|
+
await this.syncFromStorage();
|
|
118
|
+
await this.refreshManifestFromIncoming();
|
|
174
119
|
this.applyAlertTodos();
|
|
120
|
+
await this.persistState();
|
|
175
121
|
}
|
|
176
122
|
catch (error) {
|
|
177
123
|
this.insertAlertTodo("ETL_RUN_FAILED", error instanceof Error ? error.message : String(error));
|
|
124
|
+
await this.persistState();
|
|
178
125
|
}
|
|
179
126
|
finally {
|
|
180
127
|
this.running = false;
|
|
181
128
|
}
|
|
182
129
|
}
|
|
183
130
|
async localStatus() {
|
|
131
|
+
await this.ensureStateLoaded();
|
|
184
132
|
const now = new Date().toISOString();
|
|
185
|
-
const
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
.
|
|
189
|
-
|
|
190
|
-
.
|
|
191
|
-
.
|
|
192
|
-
const
|
|
193
|
-
.prepare(`SELECT code, message, created_at FROM etl_alerts_todo ORDER BY id DESC LIMIT 20`)
|
|
194
|
-
.all();
|
|
133
|
+
const manifestEntries = Array.from(this.manifest.values()).filter((entry) => entry.tenant_id === this.config.etlTenantId);
|
|
134
|
+
const doneEntries = manifestEntries.filter((entry) => entry.status === "done");
|
|
135
|
+
const lastProcessed = doneEntries
|
|
136
|
+
.map((entry) => entry.processed_at)
|
|
137
|
+
.filter((value) => Boolean(value))
|
|
138
|
+
.sort()
|
|
139
|
+
.at(-1) ?? null;
|
|
140
|
+
const rawRowsTotal = doneEntries.reduce((sum, entry) => sum + entry.row_count, 0);
|
|
195
141
|
return {
|
|
196
|
-
source: "
|
|
142
|
+
source: "apphud_storage_etl",
|
|
197
143
|
tenant_id: this.config.etlTenantId,
|
|
198
144
|
etl_enabled: this.config.etlEnabled,
|
|
199
|
-
|
|
145
|
+
etl_source: this.config.etlSource,
|
|
146
|
+
source_config: {
|
|
147
|
+
gcs_bucket: this.config.etlGcsBucket,
|
|
148
|
+
gcs_prefix: this.config.etlGcsPrefix,
|
|
149
|
+
s3_bucket: this.config.etlS3Bucket,
|
|
150
|
+
s3_prefix: this.config.etlS3Prefix,
|
|
151
|
+
},
|
|
200
152
|
poll_interval_minutes: this.config.etlPollIntervalMinutes,
|
|
201
|
-
sqlite_path: path.resolve(this.config.sqlitePath ?? ".apphud-mcp/apphud.db"),
|
|
202
153
|
incoming_dir: path.resolve(this.config.etlIncomingDir),
|
|
203
|
-
manifest_total:
|
|
204
|
-
manifest_done:
|
|
205
|
-
raw_rows_total:
|
|
206
|
-
last_processed_at:
|
|
207
|
-
alerts_todo: alerts,
|
|
154
|
+
manifest_total: manifestEntries.length,
|
|
155
|
+
manifest_done: doneEntries.length,
|
|
156
|
+
raw_rows_total: rawRowsTotal,
|
|
157
|
+
last_processed_at: lastProcessed,
|
|
158
|
+
alerts_todo: this.alerts.slice(-20).reverse(),
|
|
208
159
|
retrieved_at: now,
|
|
209
160
|
};
|
|
210
161
|
}
|
|
211
162
|
async localAppsList(input = {}) {
|
|
212
163
|
await this.ensureLocalBootstrap(false);
|
|
213
|
-
const apps = this.
|
|
214
|
-
.
|
|
215
|
-
.
|
|
164
|
+
const apps = this.appKeys
|
|
165
|
+
.filter((app) => app.tenant_id === this.config.etlTenantId)
|
|
166
|
+
.map((app) => ({
|
|
167
|
+
app_id: app.app_id,
|
|
168
|
+
app_name: app.app_name,
|
|
169
|
+
platform: app.platform ?? null,
|
|
170
|
+
source: app.key_source,
|
|
171
|
+
updated_at: app.updated_at,
|
|
172
|
+
}))
|
|
173
|
+
.sort((a, b) => a.app_name.localeCompare(b.app_name));
|
|
216
174
|
return {
|
|
217
|
-
source: "
|
|
175
|
+
source: "apphud_storage_etl",
|
|
218
176
|
count: apps.length,
|
|
219
177
|
apps,
|
|
220
178
|
retrieved_at: new Date().toISOString(),
|
|
@@ -227,7 +185,7 @@ export class EtlService {
|
|
|
227
185
|
const from = input.from ?? new Date(Date.now() - 7 * 24 * 60 * 60 * 1000).toISOString();
|
|
228
186
|
const to = input.to ?? new Date().toISOString();
|
|
229
187
|
const limit = Math.min(Math.max(input.limit ?? 100, 1), 500);
|
|
230
|
-
const rows = this.loadRows(appId, from, to, input.filters);
|
|
188
|
+
const rows = await this.loadRows(appId, from, to, input.filters);
|
|
231
189
|
const events = rows.slice(0, limit).map((row) => ({
|
|
232
190
|
event_id: this.readString(row.record, ["event_id", "id", "uuid"]),
|
|
233
191
|
event_type: this.readString(row.record, ["event_type", "event", "type", "name"]) ?? "unknown",
|
|
@@ -253,8 +211,8 @@ export class EtlService {
|
|
|
253
211
|
events_count: events.length,
|
|
254
212
|
next_cursor: undefined,
|
|
255
213
|
has_more: rows.length > limit,
|
|
256
|
-
source: "
|
|
257
|
-
source_used: "
|
|
214
|
+
source: "apphud_storage_etl",
|
|
215
|
+
source_used: "incoming_csv_gz",
|
|
258
216
|
retrieved_at: new Date().toISOString(),
|
|
259
217
|
raw_payload: input.include_raw ? rows.map((row) => row.record) : undefined,
|
|
260
218
|
};
|
|
@@ -267,7 +225,7 @@ export class EtlService {
|
|
|
267
225
|
app_id: appId,
|
|
268
226
|
apphud_app_id: appId,
|
|
269
227
|
analytics_available: true,
|
|
270
|
-
source: "
|
|
228
|
+
source: "apphud_storage_etl",
|
|
271
229
|
supported_tools: [
|
|
272
230
|
"apphud_analytics_events_list_local",
|
|
273
231
|
"apphud_analytics_metrics_list_local",
|
|
@@ -284,7 +242,7 @@ export class EtlService {
|
|
|
284
242
|
supported_shapes: ["value", "timeseries", "breakdown", "raw"],
|
|
285
243
|
metrics_count: metrics.length,
|
|
286
244
|
metrics_sample: metrics.slice(0, 20),
|
|
287
|
-
source_used: "
|
|
245
|
+
source_used: "incoming_csv_gz",
|
|
288
246
|
retrieved_at: new Date().toISOString(),
|
|
289
247
|
raw_payload: input.include_raw ? metrics : undefined,
|
|
290
248
|
};
|
|
@@ -298,7 +256,7 @@ export class EtlService {
|
|
|
298
256
|
apphud_app_id: appId,
|
|
299
257
|
metrics,
|
|
300
258
|
count: metrics.length,
|
|
301
|
-
source_used: "
|
|
259
|
+
source_used: "incoming_csv_gz",
|
|
302
260
|
retrieved_at: new Date().toISOString(),
|
|
303
261
|
raw_payload: input.include_raw ? metrics : undefined,
|
|
304
262
|
};
|
|
@@ -307,19 +265,17 @@ export class EtlService {
|
|
|
307
265
|
await this.ensureLocalBootstrap(true);
|
|
308
266
|
const appId = this.resolveLocalAppId(input.app_id, input.apphud_app_id);
|
|
309
267
|
const metricKey = input.metric_key ?? "events_count";
|
|
310
|
-
const
|
|
311
|
-
const to = input.to;
|
|
312
|
-
const rows = this.loadRows(appId, from, to, input.filters);
|
|
268
|
+
const rows = await this.loadRows(appId, input.from, input.to, input.filters);
|
|
313
269
|
const value = this.computeMetric(metricKey, rows);
|
|
314
270
|
return {
|
|
315
271
|
app_id: appId,
|
|
316
272
|
apphud_app_id: appId,
|
|
317
273
|
metric_key: metricKey,
|
|
318
|
-
from,
|
|
319
|
-
to,
|
|
274
|
+
from: input.from,
|
|
275
|
+
to: input.to,
|
|
320
276
|
value,
|
|
321
|
-
source: "
|
|
322
|
-
source_used: "
|
|
277
|
+
source: "apphud_storage_etl",
|
|
278
|
+
source_used: "incoming_csv_gz",
|
|
323
279
|
warnings: [],
|
|
324
280
|
retrieved_at: new Date().toISOString(),
|
|
325
281
|
raw_payload: input.include_raw ? rows.map((row) => row.record) : undefined,
|
|
@@ -330,7 +286,7 @@ export class EtlService {
|
|
|
330
286
|
const appId = this.resolveLocalAppId(input.app_id, input.apphud_app_id);
|
|
331
287
|
const metricKey = input.metric_key ?? "events_count";
|
|
332
288
|
const granularity = input.granularity ?? "day";
|
|
333
|
-
const rows = this.loadRows(appId, input.from, input.to, input.filters);
|
|
289
|
+
const rows = await this.loadRows(appId, input.from, input.to, input.filters);
|
|
334
290
|
const buckets = new Map();
|
|
335
291
|
for (const row of rows) {
|
|
336
292
|
const bucket = this.timeBucket(row.occurredAt, granularity);
|
|
@@ -356,8 +312,8 @@ export class EtlService {
|
|
|
356
312
|
points_count: points.length,
|
|
357
313
|
total: Number(total.toFixed(4)),
|
|
358
314
|
average: points.length > 0 ? Number((total / points.length).toFixed(4)) : null,
|
|
359
|
-
source: "
|
|
360
|
-
source_used: "
|
|
315
|
+
source: "apphud_storage_etl",
|
|
316
|
+
source_used: "incoming_csv_gz",
|
|
361
317
|
warnings: [],
|
|
362
318
|
retrieved_at: new Date().toISOString(),
|
|
363
319
|
raw_payload: input.include_raw ? rows.map((row) => row.record) : undefined,
|
|
@@ -369,7 +325,7 @@ export class EtlService {
|
|
|
369
325
|
const metricKey = input.metric_key ?? "events_count";
|
|
370
326
|
const dimension = input.dimension ?? "country";
|
|
371
327
|
const limit = Math.min(Math.max(input.limit ?? 20, 1), 200);
|
|
372
|
-
const rows = this.loadRows(appId, input.from, input.to, input.filters);
|
|
328
|
+
const rows = await this.loadRows(appId, input.from, input.to, input.filters);
|
|
373
329
|
const groups = new Map();
|
|
374
330
|
for (const row of rows) {
|
|
375
331
|
const key = this.dimensionValue(row.record, dimension);
|
|
@@ -399,8 +355,8 @@ export class EtlService {
|
|
|
399
355
|
granularity: input.granularity ?? "day",
|
|
400
356
|
rows: rowsWithShare,
|
|
401
357
|
rows_count: rowsWithShare.length,
|
|
402
|
-
source: "
|
|
403
|
-
source_used: "
|
|
358
|
+
source: "apphud_storage_etl",
|
|
359
|
+
source_used: "incoming_csv_gz",
|
|
404
360
|
warnings: [],
|
|
405
361
|
retrieved_at: new Date().toISOString(),
|
|
406
362
|
raw_payload: input.include_raw ? rows.map((row) => row.record) : undefined,
|
|
@@ -408,9 +364,8 @@ export class EtlService {
|
|
|
408
364
|
}
|
|
409
365
|
async localRevenueSummary(input) {
|
|
410
366
|
await this.ensureLocalBootstrap(true);
|
|
411
|
-
const
|
|
412
|
-
const
|
|
413
|
-
const refunds = await this.localMetricValue({ ...base, metric_key: "refunds", include_raw: false });
|
|
367
|
+
const gross = await this.localMetricValue({ ...input, metric_key: "revenue_gross", include_raw: false });
|
|
368
|
+
const refunds = await this.localMetricValue({ ...input, metric_key: "refunds", include_raw: false });
|
|
414
369
|
const grossValue = typeof gross.value === "number" ? gross.value : 0;
|
|
415
370
|
const refundValue = Math.abs(typeof refunds.value === "number" ? refunds.value : 0);
|
|
416
371
|
return {
|
|
@@ -421,8 +376,8 @@ export class EtlService {
|
|
|
421
376
|
revenue_gross: gross.value,
|
|
422
377
|
refunds: refunds.value,
|
|
423
378
|
net_revenue_estimate: Number((grossValue - refundValue).toFixed(4)),
|
|
424
|
-
source: "
|
|
425
|
-
source_used: ["
|
|
379
|
+
source: "apphud_storage_etl",
|
|
380
|
+
source_used: ["incoming_csv_gz"],
|
|
426
381
|
warnings: [],
|
|
427
382
|
compare_prev_period: input.compare_prev_period ? { note: "TODO local compare_prev_period" } : undefined,
|
|
428
383
|
retrieved_at: new Date().toISOString(),
|
|
@@ -449,8 +404,8 @@ export class EtlService {
|
|
|
449
404
|
renewals: renewals.value,
|
|
450
405
|
cancellations: cancellations.value,
|
|
451
406
|
churn_rate_estimate: activePaidValue > 0 ? Number((cancellationsValue / activePaidValue).toFixed(6)) : null,
|
|
452
|
-
source: "
|
|
453
|
-
source_used: ["
|
|
407
|
+
source: "apphud_storage_etl",
|
|
408
|
+
source_used: ["incoming_csv_gz"],
|
|
454
409
|
warnings: [],
|
|
455
410
|
retrieved_at: new Date().toISOString(),
|
|
456
411
|
raw_payload: input.include_raw ? { activePaid, activeTrials, newSubs, renewals, cancellations } : undefined,
|
|
@@ -471,8 +426,8 @@ export class EtlService {
|
|
|
471
426
|
trials_converted: converted.value,
|
|
472
427
|
conversion_rate: startedValue > 0 ? Number((convertedValue / startedValue).toFixed(6)) : null,
|
|
473
428
|
median_time_to_convert: null,
|
|
474
|
-
source: "
|
|
475
|
-
source_used: ["
|
|
429
|
+
source: "apphud_storage_etl",
|
|
430
|
+
source_used: ["incoming_csv_gz"],
|
|
476
431
|
warnings: [],
|
|
477
432
|
retrieved_at: new Date().toISOString(),
|
|
478
433
|
raw_payload: input.include_raw ? { started, converted } : undefined,
|
|
@@ -483,11 +438,6 @@ export class EtlService {
|
|
|
483
438
|
const timeseries = await this.localMetricTimeseries({ ...input, metric_key: "active_subs", granularity: input.granularity ?? "week" });
|
|
484
439
|
const points = timeseries.points ?? [];
|
|
485
440
|
const base = points[0]?.value ?? 0;
|
|
486
|
-
const periods = points.map((point, index) => ({
|
|
487
|
-
period_index: index,
|
|
488
|
-
retention_rate: base > 0 ? Number((point.value / base).toFixed(6)) : 0,
|
|
489
|
-
users_count: Math.round(point.value),
|
|
490
|
-
}));
|
|
491
441
|
return {
|
|
492
442
|
app_id: timeseries.app_id,
|
|
493
443
|
apphud_app_id: timeseries.apphud_app_id,
|
|
@@ -495,10 +445,20 @@ export class EtlService {
|
|
|
495
445
|
to: input.to,
|
|
496
446
|
granularity: input.granularity ?? "week",
|
|
497
447
|
cohort_by: "subscription_started",
|
|
498
|
-
rows: [
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
448
|
+
rows: [
|
|
449
|
+
{
|
|
450
|
+
cohort_start_date: (input.from ?? new Date().toISOString()).slice(0, 10),
|
|
451
|
+
users_count: base,
|
|
452
|
+
periods: points.map((point, index) => ({
|
|
453
|
+
period_index: index,
|
|
454
|
+
retention_rate: base > 0 ? Number((point.value / base).toFixed(6)) : 0,
|
|
455
|
+
users_count: Math.round(point.value),
|
|
456
|
+
})),
|
|
457
|
+
},
|
|
458
|
+
],
|
|
459
|
+
source: "apphud_storage_etl",
|
|
460
|
+
source_used: "incoming_csv_gz",
|
|
461
|
+
warnings: ["Retention is approximated from storage event stream."],
|
|
502
462
|
retrieved_at: new Date().toISOString(),
|
|
503
463
|
raw_payload: input.include_raw ? points : undefined,
|
|
504
464
|
};
|
|
@@ -507,21 +467,25 @@ export class EtlService {
|
|
|
507
467
|
await this.ensureLocalBootstrap(true);
|
|
508
468
|
const timeseries = await this.localMetricTimeseries({ ...input, metric_key: "cumulative_ltv", granularity: input.granularity ?? "week" });
|
|
509
469
|
const points = timeseries.points ?? [];
|
|
510
|
-
const periods = points.map((point, index) => ({
|
|
511
|
-
period_index: index,
|
|
512
|
-
ltv_value: Number(point.value.toFixed(6)),
|
|
513
|
-
date: point.date,
|
|
514
|
-
}));
|
|
515
470
|
return {
|
|
516
471
|
app_id: timeseries.app_id,
|
|
517
472
|
apphud_app_id: timeseries.apphud_app_id,
|
|
518
473
|
from: input.from,
|
|
519
474
|
to: input.to,
|
|
520
475
|
granularity: input.granularity ?? "week",
|
|
521
|
-
rows: [
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
476
|
+
rows: [
|
|
477
|
+
{
|
|
478
|
+
cohort_start_date: (input.from ?? new Date().toISOString()).slice(0, 10),
|
|
479
|
+
periods: points.map((point, index) => ({
|
|
480
|
+
period_index: index,
|
|
481
|
+
ltv_value: Number(point.value.toFixed(6)),
|
|
482
|
+
date: point.date,
|
|
483
|
+
})),
|
|
484
|
+
},
|
|
485
|
+
],
|
|
486
|
+
source: "apphud_storage_etl",
|
|
487
|
+
source_used: "incoming_csv_gz",
|
|
488
|
+
warnings: ["LTV is approximated from storage event stream."],
|
|
525
489
|
retrieved_at: new Date().toISOString(),
|
|
526
490
|
raw_payload: input.include_raw ? points : undefined,
|
|
527
491
|
};
|
|
@@ -531,7 +495,13 @@ export class EtlService {
|
|
|
531
495
|
const query = (input.query ?? {});
|
|
532
496
|
const shape = String(query.shape ?? "raw");
|
|
533
497
|
if (shape === "value") {
|
|
534
|
-
return this.localMetricValue({
|
|
498
|
+
return this.localMetricValue({
|
|
499
|
+
...input,
|
|
500
|
+
metric_key: String(query.metric_key ?? ""),
|
|
501
|
+
from: typeof query.from === "string" ? query.from : input.from,
|
|
502
|
+
to: typeof query.to === "string" ? query.to : input.to,
|
|
503
|
+
filters: query.filters ?? input.filters,
|
|
504
|
+
});
|
|
535
505
|
}
|
|
536
506
|
if (shape === "timeseries") {
|
|
537
507
|
return this.localMetricTimeseries({
|
|
@@ -540,6 +510,7 @@ export class EtlService {
|
|
|
540
510
|
from: typeof query.from === "string" ? query.from : input.from,
|
|
541
511
|
to: typeof query.to === "string" ? query.to : input.to,
|
|
542
512
|
granularity: query.granularity === "week" ? "week" : "day",
|
|
513
|
+
filters: query.filters ?? input.filters,
|
|
543
514
|
});
|
|
544
515
|
}
|
|
545
516
|
if (shape === "breakdown") {
|
|
@@ -550,15 +521,16 @@ export class EtlService {
|
|
|
550
521
|
from: typeof query.from === "string" ? query.from : input.from,
|
|
551
522
|
to: typeof query.to === "string" ? query.to : input.to,
|
|
552
523
|
limit: typeof query.limit === "number" ? query.limit : input.limit,
|
|
524
|
+
filters: query.filters ?? input.filters,
|
|
553
525
|
});
|
|
554
526
|
}
|
|
555
527
|
const appId = this.resolveLocalAppId(input.app_id, input.apphud_app_id, true);
|
|
556
|
-
const rows = this.loadRows(appId, input.from, input.to, input.filters);
|
|
528
|
+
const rows = await this.loadRows(appId, input.from, input.to, input.filters);
|
|
557
529
|
return {
|
|
558
530
|
app_id: appId,
|
|
559
531
|
apphud_app_id: appId,
|
|
560
532
|
shape,
|
|
561
|
-
source: "
|
|
533
|
+
source: "apphud_storage_etl",
|
|
562
534
|
status: 200,
|
|
563
535
|
retrieved_at: new Date().toISOString(),
|
|
564
536
|
response: input.include_raw ? rows.map((row) => row.record) : { rows_count: rows.length },
|
|
@@ -581,8 +553,8 @@ export class EtlService {
|
|
|
581
553
|
this.localMetricValue({ app_id: appId, metric_key: "new_subscriptions", from: rangeFrom, to: rangeTo, include_raw: false }),
|
|
582
554
|
this.localMetricValue({ app_id: appId, metric_key: "renewals", from: rangeFrom, to: rangeTo, include_raw: false }),
|
|
583
555
|
]);
|
|
584
|
-
const selectedRows = this.loadRows(appId, rangeFrom, rangeTo, input.filters);
|
|
585
|
-
const allRows = this.loadRows(appId, undefined, undefined, input.filters);
|
|
556
|
+
const selectedRows = await this.loadRows(appId, rangeFrom, rangeTo, input.filters);
|
|
557
|
+
const allRows = await this.loadRows(appId, undefined, undefined, input.filters);
|
|
586
558
|
const selectedRevenueGross = typeof revenueGross.value === "number" ? revenueGross.value : 0;
|
|
587
559
|
const selectedRefunds = typeof refunds.value === "number" ? refunds.value : 0;
|
|
588
560
|
const selectedProceeds = Number((selectedRevenueGross - Math.abs(selectedRefunds)).toFixed(4));
|
|
@@ -591,7 +563,6 @@ export class EtlService {
|
|
|
591
563
|
.filter((row) => ["subscription_started", "renewal", "trial_converted"].includes(this.eventType(row.record)))
|
|
592
564
|
.map((row) => this.userId(row.record))
|
|
593
565
|
.filter((value) => Boolean(value))).size;
|
|
594
|
-
const refundCount = selectedRows.filter((row) => this.eventType(row.record) === "refund").length;
|
|
595
566
|
const failedCharges = selectedRows.filter((row) => {
|
|
596
567
|
const type = this.eventType(row.record);
|
|
597
568
|
return type === "billing_issue" || type === "failed_charge";
|
|
@@ -599,14 +570,14 @@ export class EtlService {
|
|
|
599
570
|
const newUsers = new Set(selectedRows
|
|
600
571
|
.map((row) => this.userId(row.record))
|
|
601
572
|
.filter((value) => Boolean(value))).size;
|
|
573
|
+
const trialCancellations = selectedRows.filter((row) => this.eventType(row.record) === "cancellation").length;
|
|
602
574
|
const mrr = Number(selectedProceeds.toFixed(2));
|
|
603
575
|
const arr = Number((mrr * 12).toFixed(2));
|
|
604
576
|
const arpu = newUsers > 0 ? Number((selectedProceeds / newUsers).toFixed(2)) : null;
|
|
605
577
|
const arppu = paidUsersInRange > 0 ? Number((selectedProceeds / paidUsersInRange).toFixed(2)) : null;
|
|
606
578
|
const refundRate = selectedSales > 0 ? Number((Math.abs(selectedRefunds) / selectedSales).toFixed(4)) : 0;
|
|
607
|
-
const trialCancellations = selectedRows.filter((row) => this.eventType(row.record) === "cancellation").length;
|
|
608
579
|
return {
|
|
609
|
-
source: "
|
|
580
|
+
source: "apphud_storage_etl",
|
|
610
581
|
app_id: appId,
|
|
611
582
|
apphud_app_id: appId,
|
|
612
583
|
where: {
|
|
@@ -637,8 +608,8 @@ export class EtlService {
|
|
|
637
608
|
refunds: Math.abs(selectedRefunds),
|
|
638
609
|
refund_rate: refundRate,
|
|
639
610
|
failed_charges: failedCharges,
|
|
640
|
-
arpu
|
|
641
|
-
arppu
|
|
611
|
+
arpu,
|
|
612
|
+
arppu,
|
|
642
613
|
prevented_refund_requests: "N/A",
|
|
643
614
|
},
|
|
644
615
|
events: {
|
|
@@ -659,55 +630,50 @@ export class EtlService {
|
|
|
659
630
|
rows_in_selected_range: selectedRows.length,
|
|
660
631
|
rows_total_local: allRows.length,
|
|
661
632
|
},
|
|
662
|
-
warnings: [
|
|
663
|
-
"Local dashboard metrics are computed from ingested ETL rows and may differ from Apphud dashboard exact formulas.",
|
|
664
|
-
],
|
|
633
|
+
warnings: ["Dashboard metrics are computed from storage exports and may differ from Apphud dashboard exact formulas."],
|
|
665
634
|
retrieved_at: nowIso,
|
|
666
635
|
raw_payload: input.include_raw ? selectedRows.slice(0, 1000).map((row) => row.record) : undefined,
|
|
667
636
|
};
|
|
668
637
|
}
|
|
669
|
-
|
|
670
|
-
this.
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
created_at TEXT NOT NULL
|
|
709
|
-
);
|
|
710
|
-
`);
|
|
638
|
+
async ensureStateLoaded() {
|
|
639
|
+
if (this.stateLoaded) {
|
|
640
|
+
return;
|
|
641
|
+
}
|
|
642
|
+
this.appKeys = await this.readJsonFile(this.appKeysPath(), []);
|
|
643
|
+
const manifestRows = await this.readJsonFile(this.manifestPath(), []);
|
|
644
|
+
this.manifest = new Map(manifestRows.map((row) => [row.source_file, row]));
|
|
645
|
+
this.alerts = await this.readJsonFile(this.alertsPath(), []);
|
|
646
|
+
this.stateLoaded = true;
|
|
647
|
+
}
|
|
648
|
+
async persistState() {
|
|
649
|
+
await this.writeJsonFile(this.appKeysPath(), this.appKeys);
|
|
650
|
+
await this.writeJsonFile(this.manifestPath(), Array.from(this.manifest.values()));
|
|
651
|
+
await this.writeJsonFile(this.alertsPath(), this.alerts.slice(-1000));
|
|
652
|
+
}
|
|
653
|
+
stateDir() {
|
|
654
|
+
return path.join(path.resolve(this.config.etlStorageDir), "state");
|
|
655
|
+
}
|
|
656
|
+
appKeysPath() {
|
|
657
|
+
return path.join(this.stateDir(), "app_api_keys.json");
|
|
658
|
+
}
|
|
659
|
+
manifestPath() {
|
|
660
|
+
return path.join(this.stateDir(), "manifest.json");
|
|
661
|
+
}
|
|
662
|
+
alertsPath() {
|
|
663
|
+
return path.join(this.stateDir(), "alerts_todo.json");
|
|
664
|
+
}
|
|
665
|
+
async readJsonFile(filePath, fallback) {
|
|
666
|
+
try {
|
|
667
|
+
const raw = await readFile(filePath, "utf8");
|
|
668
|
+
return JSON.parse(raw);
|
|
669
|
+
}
|
|
670
|
+
catch {
|
|
671
|
+
return fallback;
|
|
672
|
+
}
|
|
673
|
+
}
|
|
674
|
+
async writeJsonFile(filePath, payload) {
|
|
675
|
+
await mkdir(path.dirname(filePath), { recursive: true });
|
|
676
|
+
await writeFile(filePath, `${JSON.stringify(payload, null, 2)}\n`, "utf8");
|
|
711
677
|
}
|
|
712
678
|
metricCatalog() {
|
|
713
679
|
return [
|
|
@@ -731,68 +697,57 @@ export class EtlService {
|
|
|
731
697
|
if (!this.config.etlEnabled) {
|
|
732
698
|
return;
|
|
733
699
|
}
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
.
|
|
739
|
-
.get(this.config.etlTenantId);
|
|
740
|
-
const needsBootstrap = keysCount.total === 0 || (requireRows && doneCount.total === 0);
|
|
741
|
-
if (!needsBootstrap) {
|
|
742
|
-
return;
|
|
700
|
+
await this.ensureStateLoaded();
|
|
701
|
+
const keysCount = this.appKeys.filter((key) => key.tenant_id === this.config.etlTenantId).length;
|
|
702
|
+
const doneCount = Array.from(this.manifest.values()).filter((entry) => entry.tenant_id === this.config.etlTenantId && entry.status === "done").length;
|
|
703
|
+
if (keysCount === 0 || (requireRows && doneCount === 0)) {
|
|
704
|
+
await this.runOnce();
|
|
743
705
|
}
|
|
744
|
-
await this.runOnce();
|
|
745
706
|
}
|
|
746
707
|
resolveLocalAppId(appId, apphudAppId, allowEmpty = false) {
|
|
747
708
|
const explicit = appId?.trim() || apphudAppId?.trim();
|
|
748
709
|
if (explicit) {
|
|
749
710
|
return explicit;
|
|
750
711
|
}
|
|
751
|
-
const
|
|
752
|
-
.
|
|
753
|
-
.
|
|
754
|
-
if (
|
|
755
|
-
return
|
|
712
|
+
const latestKey = this.appKeys
|
|
713
|
+
.filter((key) => key.tenant_id === this.config.etlTenantId)
|
|
714
|
+
.sort((a, b) => b.updated_at.localeCompare(a.updated_at))[0];
|
|
715
|
+
if (latestKey?.app_id) {
|
|
716
|
+
return latestKey.app_id;
|
|
756
717
|
}
|
|
757
|
-
const
|
|
758
|
-
.
|
|
759
|
-
.
|
|
760
|
-
if (
|
|
761
|
-
return
|
|
718
|
+
const latestManifest = Array.from(this.manifest.values())
|
|
719
|
+
.filter((entry) => entry.tenant_id === this.config.etlTenantId && entry.app_id)
|
|
720
|
+
.sort((a, b) => (b.processed_at ?? "").localeCompare(a.processed_at ?? ""))[0];
|
|
721
|
+
if (latestManifest?.app_id) {
|
|
722
|
+
return latestManifest.app_id;
|
|
762
723
|
}
|
|
763
724
|
if (allowEmpty) {
|
|
764
725
|
return "unknown_app";
|
|
765
726
|
}
|
|
766
|
-
throw new ApphudMcpError("APP_NOT_FOUND", "No
|
|
727
|
+
throw new ApphudMcpError("APP_NOT_FOUND", "No storage data found", {
|
|
767
728
|
statusCode: 404,
|
|
768
|
-
actionHint: "
|
|
729
|
+
actionHint: "Set etl.enabled=true and etl.source=gcs|s3 with bucket settings, or place .csv.gz files into etl.incoming_dir.",
|
|
769
730
|
});
|
|
770
731
|
}
|
|
771
|
-
loadRows(appId, from, to, filters) {
|
|
772
|
-
const
|
|
773
|
-
const
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
const whereSql = where.join(" AND ");
|
|
787
|
-
const count = this.db
|
|
788
|
-
.prepare(`SELECT COUNT(*) AS total FROM etl_raw_rows WHERE ${whereSql}`)
|
|
789
|
-
.get(...params);
|
|
790
|
-
if (count.total > LOCAL_ROWS_GUARD_LIMIT) {
|
|
791
|
-
throw new ApphudMcpError("INVALID_PAYLOAD", "Local dataset for selected range is too large", {
|
|
732
|
+
async loadRows(appId, from, to, filters) {
|
|
733
|
+
const fromMs = from ? new Date(from).getTime() : Number.NEGATIVE_INFINITY;
|
|
734
|
+
const toMs = to ? new Date(to).getTime() : Number.POSITIVE_INFINITY;
|
|
735
|
+
const candidates = Array.from(this.manifest.values()).filter((entry) => {
|
|
736
|
+
if (entry.tenant_id !== this.config.etlTenantId || entry.status !== "done") {
|
|
737
|
+
return false;
|
|
738
|
+
}
|
|
739
|
+
if (appId && entry.app_id !== appId) {
|
|
740
|
+
return false;
|
|
741
|
+
}
|
|
742
|
+
return true;
|
|
743
|
+
});
|
|
744
|
+
const totalRowsHint = candidates.reduce((sum, entry) => sum + entry.row_count, 0);
|
|
745
|
+
if (totalRowsHint > LOCAL_ROWS_GUARD_LIMIT) {
|
|
746
|
+
throw new ApphudMcpError("INVALID_PAYLOAD", "Storage dataset for selected range is too large", {
|
|
792
747
|
statusCode: 400,
|
|
793
|
-
actionHint: "Narrow date range (from/to) or add stricter filters.
|
|
748
|
+
actionHint: "Narrow date range (from/to) or add stricter filters.",
|
|
794
749
|
details: {
|
|
795
|
-
|
|
750
|
+
rows_hint: totalRowsHint,
|
|
796
751
|
limit: LOCAL_ROWS_GUARD_LIMIT,
|
|
797
752
|
from,
|
|
798
753
|
to,
|
|
@@ -800,31 +755,31 @@ export class EtlService {
|
|
|
800
755
|
},
|
|
801
756
|
});
|
|
802
757
|
}
|
|
803
|
-
const
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
758
|
+
const rows = [];
|
|
759
|
+
for (const entry of candidates) {
|
|
760
|
+
try {
|
|
761
|
+
const content = await readFile(entry.source_file);
|
|
762
|
+
const unzipped = gunzipSync(content).toString("utf8");
|
|
763
|
+
const parsed = parseCsv(unzipped);
|
|
764
|
+
for (const row of parsed.rows) {
|
|
765
|
+
const record = rowToRecord(parsed.headers, row);
|
|
766
|
+
const normalized = record;
|
|
767
|
+
const occurredAt = this.detectOccurredAt(normalized) ?? entry.processed_at ?? new Date().toISOString();
|
|
768
|
+
const ts = new Date(occurredAt).getTime();
|
|
769
|
+
if (Number.isFinite(ts) && (ts < fromMs || ts > toMs)) {
|
|
770
|
+
continue;
|
|
771
|
+
}
|
|
772
|
+
if (!this.passesFilters(normalized, filters)) {
|
|
773
|
+
continue;
|
|
774
|
+
}
|
|
775
|
+
rows.push({ occurredAt, record: normalized });
|
|
776
|
+
}
|
|
777
|
+
}
|
|
778
|
+
catch {
|
|
779
|
+
continue;
|
|
819
780
|
}
|
|
820
|
-
return {};
|
|
821
|
-
}
|
|
822
|
-
catch {
|
|
823
|
-
return {};
|
|
824
781
|
}
|
|
825
|
-
|
|
826
|
-
detectOccurredAt(record) {
|
|
827
|
-
return this.readString(record, ["occurred_at", "timestamp", "time", "date", "event_date", "created_at", "purchased_at"]);
|
|
782
|
+
return rows.sort((a, b) => b.occurredAt.localeCompare(a.occurredAt));
|
|
828
783
|
}
|
|
829
784
|
readString(record, keys) {
|
|
830
785
|
for (const key of keys) {
|
|
@@ -853,6 +808,9 @@ export class EtlService {
|
|
|
853
808
|
}
|
|
854
809
|
return null;
|
|
855
810
|
}
|
|
811
|
+
detectOccurredAt(record) {
|
|
812
|
+
return this.readString(record, ["occurred_at", "timestamp", "time", "date", "event_date", "created_at", "purchased_at"]);
|
|
813
|
+
}
|
|
856
814
|
eventType(record) {
|
|
857
815
|
return (this.readString(record, ["event_type", "event", "type", "name"]) ?? "unknown").toLowerCase();
|
|
858
816
|
}
|
|
@@ -870,8 +828,7 @@ export class EtlService {
|
|
|
870
828
|
const distinctUsers = (types) => {
|
|
871
829
|
const set = new Set();
|
|
872
830
|
for (const row of rows) {
|
|
873
|
-
|
|
874
|
-
if (!types.includes(type)) {
|
|
831
|
+
if (!types.includes(this.eventType(row.record))) {
|
|
875
832
|
continue;
|
|
876
833
|
}
|
|
877
834
|
const uid = this.userId(row.record);
|
|
@@ -881,27 +838,20 @@ export class EtlService {
|
|
|
881
838
|
}
|
|
882
839
|
return set.size;
|
|
883
840
|
};
|
|
884
|
-
if (lower === "events_count")
|
|
841
|
+
if (lower === "events_count")
|
|
885
842
|
return rows.length;
|
|
886
|
-
|
|
887
|
-
if (lower === "trials_started") {
|
|
843
|
+
if (lower === "trials_started")
|
|
888
844
|
return countByType(["trial_started"]);
|
|
889
|
-
|
|
890
|
-
if (lower === "trials_converted") {
|
|
845
|
+
if (lower === "trials_converted")
|
|
891
846
|
return countByType(["trial_converted"]);
|
|
892
|
-
|
|
893
|
-
if (lower === "new_subscriptions") {
|
|
847
|
+
if (lower === "new_subscriptions")
|
|
894
848
|
return countByType(["subscription_started"]);
|
|
895
|
-
|
|
896
|
-
if (lower === "renewals") {
|
|
849
|
+
if (lower === "renewals")
|
|
897
850
|
return countByType(["renewal"]);
|
|
898
|
-
|
|
899
|
-
if (lower === "cancellations") {
|
|
851
|
+
if (lower === "cancellations")
|
|
900
852
|
return countByType(["cancellation", "expiration"]);
|
|
901
|
-
|
|
902
|
-
if (lower === "active_trials") {
|
|
853
|
+
if (lower === "active_trials")
|
|
903
854
|
return distinctUsers(["trial_started"]);
|
|
904
|
-
}
|
|
905
855
|
if (lower === "active_subs" || lower === "subscribers_retention") {
|
|
906
856
|
return distinctUsers(["subscription_started", "renewal", "trial_converted"]);
|
|
907
857
|
}
|
|
@@ -924,12 +874,10 @@ export class EtlService {
|
|
|
924
874
|
}
|
|
925
875
|
dimensionValue(record, dimension) {
|
|
926
876
|
const normalized = dimension.toLowerCase();
|
|
927
|
-
if (normalized === "country")
|
|
877
|
+
if (normalized === "country")
|
|
928
878
|
return this.readString(record, ["country", "country_code"]) ?? "unknown";
|
|
929
|
-
|
|
930
|
-
if (normalized === "platform") {
|
|
879
|
+
if (normalized === "platform")
|
|
931
880
|
return this.readString(record, ["platform", "store", "os"]) ?? "unknown";
|
|
932
|
-
}
|
|
933
881
|
if (normalized === "product" || normalized === "product_id") {
|
|
934
882
|
return this.readString(record, ["product_id", "sku", "subscription_product_id"]) ?? "unknown";
|
|
935
883
|
}
|
|
@@ -948,9 +896,8 @@ export class EtlService {
|
|
|
948
896
|
if (String(actual ?? "").toLowerCase() !== rawValue.toLowerCase()) {
|
|
949
897
|
return false;
|
|
950
898
|
}
|
|
951
|
-
continue;
|
|
952
899
|
}
|
|
953
|
-
if (typeof rawValue === "number" || typeof rawValue === "boolean") {
|
|
900
|
+
else if (typeof rawValue === "number" || typeof rawValue === "boolean") {
|
|
954
901
|
if (actual !== rawValue) {
|
|
955
902
|
return false;
|
|
956
903
|
}
|
|
@@ -973,162 +920,124 @@ export class EtlService {
|
|
|
973
920
|
async syncAppApiKeys() {
|
|
974
921
|
const listed = await this.apphudClient.listDashboardAppApiKeys();
|
|
975
922
|
const now = new Date().toISOString();
|
|
976
|
-
const
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
app_name = excluded.app_name,
|
|
981
|
-
platform = excluded.platform,
|
|
982
|
-
api_key = excluded.api_key,
|
|
983
|
-
key_source = excluded.key_source,
|
|
984
|
-
updated_at = excluded.updated_at
|
|
985
|
-
`);
|
|
986
|
-
const tx = this.db.transaction((apps) => {
|
|
987
|
-
for (const app of apps) {
|
|
988
|
-
upsert.run(this.config.etlTenantId, app.app_id, app.app_name, app.platform ?? null, app.api_key, app.source, now);
|
|
989
|
-
}
|
|
990
|
-
});
|
|
991
|
-
tx(listed.apps);
|
|
992
|
-
return listed.apps;
|
|
993
|
-
}
|
|
994
|
-
async fetchRemoteExports(apiKeys) {
|
|
995
|
-
for (const app of apiKeys) {
|
|
996
|
-
const listPath = interpolatePath(this.config.apphudEtlExportsListPath, {
|
|
923
|
+
const map = new Map(this.appKeys.map((item) => [`${item.tenant_id}:${item.app_id}`, item]));
|
|
924
|
+
for (const app of listed.apps) {
|
|
925
|
+
map.set(`${this.config.etlTenantId}:${app.app_id}`, {
|
|
926
|
+
tenant_id: this.config.etlTenantId,
|
|
997
927
|
app_id: app.app_id,
|
|
928
|
+
app_name: app.app_name,
|
|
929
|
+
platform: app.platform,
|
|
930
|
+
api_key: app.api_key,
|
|
931
|
+
key_source: app.source,
|
|
932
|
+
updated_at: now,
|
|
998
933
|
});
|
|
999
|
-
const listUrl = normalizeToPath(this.config.apphudEtlExportsApiBaseUrl, listPath);
|
|
1000
|
-
try {
|
|
1001
|
-
const response = await fetch(listUrl, {
|
|
1002
|
-
method: "GET",
|
|
1003
|
-
headers: {
|
|
1004
|
-
Authorization: `Bearer ${app.api_key}`,
|
|
1005
|
-
Accept: "application/json",
|
|
1006
|
-
},
|
|
1007
|
-
signal: AbortSignal.timeout(20_000),
|
|
1008
|
-
});
|
|
1009
|
-
if (!response.ok) {
|
|
1010
|
-
this.insertAlertTodo("ETL_EXPORT_LIST_FAILED", `app=${app.app_id} status=${response.status}`);
|
|
1011
|
-
continue;
|
|
1012
|
-
}
|
|
1013
|
-
const payload = (await response.json());
|
|
1014
|
-
const exportsList = extractExportCandidates(payload, app.app_id);
|
|
1015
|
-
for (const item of exportsList) {
|
|
1016
|
-
await this.downloadExport(item, app.api_key);
|
|
1017
|
-
}
|
|
1018
|
-
}
|
|
1019
|
-
catch (error) {
|
|
1020
|
-
this.insertAlertTodo("ETL_EXPORT_LIST_FAILED", `app=${app.app_id} ${error instanceof Error ? error.message : String(error)}`);
|
|
1021
|
-
}
|
|
1022
934
|
}
|
|
935
|
+
this.appKeys = Array.from(map.values());
|
|
1023
936
|
}
|
|
1024
|
-
async
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
await mkdir(destinationDir, { recursive: true });
|
|
1028
|
-
const targetPath = path.join(destinationDir, item.fileName);
|
|
1029
|
-
try {
|
|
1030
|
-
const existing = await stat(targetPath).catch(() => null);
|
|
1031
|
-
if (existing && existing.isFile()) {
|
|
1032
|
-
return;
|
|
1033
|
-
}
|
|
1034
|
-
const response = await fetch(normalizeToPath(this.config.apphudEtlExportsApiBaseUrl, item.downloadUrl), {
|
|
1035
|
-
method: "GET",
|
|
1036
|
-
headers: {
|
|
1037
|
-
Authorization: `Bearer ${apiKey}`,
|
|
1038
|
-
Accept: "application/octet-stream,application/gzip,application/json",
|
|
1039
|
-
},
|
|
1040
|
-
signal: AbortSignal.timeout(60_000),
|
|
1041
|
-
});
|
|
1042
|
-
if (!response.ok) {
|
|
1043
|
-
this.insertAlertTodo("ETL_EXPORT_DOWNLOAD_FAILED", `file=${item.fileName} status=${response.status}`);
|
|
1044
|
-
return;
|
|
1045
|
-
}
|
|
1046
|
-
const contentType = response.headers.get("content-type") ?? "";
|
|
1047
|
-
if (contentType.includes("application/json")) {
|
|
1048
|
-
const payload = (await response.json());
|
|
1049
|
-
const nestedUrl = (typeof payload.download_url === "string" && payload.download_url) ||
|
|
1050
|
-
(typeof payload.url === "string" && payload.url);
|
|
1051
|
-
if (!nestedUrl) {
|
|
1052
|
-
this.insertAlertTodo("ETL_EXPORT_DOWNLOAD_FAILED", `file=${item.fileName} missing nested download url`);
|
|
1053
|
-
return;
|
|
1054
|
-
}
|
|
1055
|
-
await this.downloadExport({
|
|
1056
|
-
...item,
|
|
1057
|
-
downloadUrl: nestedUrl,
|
|
1058
|
-
}, apiKey);
|
|
1059
|
-
return;
|
|
1060
|
-
}
|
|
1061
|
-
const bytes = Buffer.from(await response.arrayBuffer());
|
|
1062
|
-
await writeFile(targetPath, bytes);
|
|
937
|
+
async syncFromStorage() {
|
|
938
|
+
if (this.config.etlSource === "none") {
|
|
939
|
+
return;
|
|
1063
940
|
}
|
|
1064
|
-
|
|
1065
|
-
this.
|
|
941
|
+
if (this.config.etlSource === "gcs") {
|
|
942
|
+
await this.syncFromGcs();
|
|
943
|
+
return;
|
|
944
|
+
}
|
|
945
|
+
if (this.config.etlSource === "s3") {
|
|
946
|
+
await this.syncFromS3();
|
|
947
|
+
return;
|
|
1066
948
|
}
|
|
1067
949
|
}
|
|
1068
|
-
async
|
|
1069
|
-
const
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
950
|
+
async syncFromGcs() {
|
|
951
|
+
const bucket = this.config.etlGcsBucket;
|
|
952
|
+
if (!bucket) {
|
|
953
|
+
this.insertAlertTodo("ETL_GCS_CONFIG_MISSING", "Missing ETL_GCS_BUCKET for etl.source=gcs");
|
|
954
|
+
return;
|
|
955
|
+
}
|
|
956
|
+
const sourceUri = `gs://${bucket}${this.normalizeStoragePrefix(this.config.etlGcsPrefix)}`;
|
|
957
|
+
const destination = path.resolve(this.config.etlIncomingDir);
|
|
958
|
+
try {
|
|
959
|
+
await execFileAsync("gsutil", ["-m", "rsync", "-r", sourceUri, destination], { timeout: 10 * 60 * 1000 });
|
|
960
|
+
}
|
|
961
|
+
catch (error) {
|
|
962
|
+
this.insertAlertTodo("ETL_GCS_SYNC_FAILED", this.stringifyExecError(error, "gsutil"));
|
|
1074
963
|
}
|
|
1075
964
|
}
|
|
1076
|
-
async
|
|
1077
|
-
const
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
const content = await readFile(absolutePath);
|
|
1081
|
-
const checksum = createHash("sha256").update(content).digest("hex");
|
|
1082
|
-
const now = new Date().toISOString();
|
|
1083
|
-
const existing = this.db
|
|
1084
|
-
.prepare(`SELECT checksum, status FROM etl_manifest WHERE tenant_id = ? AND source_file = ?`)
|
|
1085
|
-
.get(tenantId, absolutePath);
|
|
1086
|
-
if (existing?.checksum === checksum && existing.status === "done") {
|
|
965
|
+
async syncFromS3() {
|
|
966
|
+
const bucket = this.config.etlS3Bucket;
|
|
967
|
+
if (!bucket) {
|
|
968
|
+
this.insertAlertTodo("ETL_S3_CONFIG_MISSING", "Missing ETL_S3_BUCKET for etl.source=s3");
|
|
1087
969
|
return;
|
|
1088
970
|
}
|
|
1089
|
-
const
|
|
1090
|
-
|
|
1091
|
-
VALUES (?, ?, ?, ?, 'failed', 0, ?, ?)
|
|
1092
|
-
ON CONFLICT(tenant_id, source_file) DO UPDATE SET
|
|
1093
|
-
checksum = excluded.checksum,
|
|
1094
|
-
app_id = excluded.app_id,
|
|
1095
|
-
status = 'failed',
|
|
1096
|
-
row_count = 0,
|
|
1097
|
-
processed_at = excluded.processed_at,
|
|
1098
|
-
error_message = excluded.error_message
|
|
1099
|
-
`);
|
|
971
|
+
const sourceUri = `s3://${bucket}${this.normalizeStoragePrefix(this.config.etlS3Prefix)}`;
|
|
972
|
+
const destination = path.resolve(this.config.etlIncomingDir);
|
|
1100
973
|
try {
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
const deleteExistingRows = this.db.prepare(`DELETE FROM etl_raw_rows WHERE tenant_id = ? AND source_file = ?`);
|
|
1104
|
-
const insertRaw = this.db.prepare(`
|
|
1105
|
-
INSERT INTO etl_raw_rows (tenant_id, app_id, source_file, row_index, raw_json, ingested_at)
|
|
1106
|
-
VALUES (?, ?, ?, ?, ?, ?)
|
|
1107
|
-
`);
|
|
1108
|
-
const upsertManifest = this.db.prepare(`
|
|
1109
|
-
INSERT INTO etl_manifest (tenant_id, app_id, source_file, checksum, status, row_count, processed_at, error_message)
|
|
1110
|
-
VALUES (?, ?, ?, ?, 'done', ?, ?, NULL)
|
|
1111
|
-
ON CONFLICT(tenant_id, source_file) DO UPDATE SET
|
|
1112
|
-
checksum = excluded.checksum,
|
|
1113
|
-
app_id = excluded.app_id,
|
|
1114
|
-
status = 'done',
|
|
1115
|
-
row_count = excluded.row_count,
|
|
1116
|
-
processed_at = excluded.processed_at,
|
|
1117
|
-
error_message = NULL
|
|
1118
|
-
`);
|
|
1119
|
-
const tx = this.db.transaction(() => {
|
|
1120
|
-
deleteExistingRows.run(tenantId, absolutePath);
|
|
1121
|
-
parsed.rows.forEach((row, index) => {
|
|
1122
|
-
const mapped = rowToRecord(parsed.headers, row);
|
|
1123
|
-
insertRaw.run(tenantId, appId, absolutePath, index + 1, JSON.stringify(mapped), now);
|
|
1124
|
-
});
|
|
1125
|
-
upsertManifest.run(tenantId, appId, absolutePath, checksum, parsed.rows.length, now);
|
|
974
|
+
await execFileAsync("aws", ["s3", "sync", sourceUri, destination, "--no-progress"], {
|
|
975
|
+
timeout: 10 * 60 * 1000,
|
|
1126
976
|
});
|
|
1127
|
-
tx();
|
|
1128
977
|
}
|
|
1129
978
|
catch (error) {
|
|
1130
|
-
|
|
1131
|
-
|
|
979
|
+
this.insertAlertTodo("ETL_S3_SYNC_FAILED", this.stringifyExecError(error, "aws"));
|
|
980
|
+
}
|
|
981
|
+
}
|
|
982
|
+
normalizeStoragePrefix(prefix) {
|
|
983
|
+
if (!prefix || prefix.trim().length === 0) {
|
|
984
|
+
return "";
|
|
985
|
+
}
|
|
986
|
+
const trimmed = prefix.trim().replace(/^\/+/, "").replace(/\/+$/, "");
|
|
987
|
+
return trimmed.length > 0 ? `/${trimmed}` : "";
|
|
988
|
+
}
|
|
989
|
+
stringifyExecError(error, command) {
|
|
990
|
+
const err = error;
|
|
991
|
+
if (err?.code === "ENOENT") {
|
|
992
|
+
return `${command} command not found. Install ${command} CLI and configure credentials.`;
|
|
993
|
+
}
|
|
994
|
+
const stderr = typeof err?.stderr === "string" ? err.stderr.trim() : "";
|
|
995
|
+
const message = typeof err?.message === "string" ? err.message : "unknown error";
|
|
996
|
+
return `${command} sync failed: ${stderr || message}`.slice(0, 800);
|
|
997
|
+
}
|
|
998
|
+
async refreshManifestFromIncoming() {
|
|
999
|
+
const incomingRoot = path.resolve(this.config.etlIncomingDir);
|
|
1000
|
+
await mkdir(incomingRoot, { recursive: true });
|
|
1001
|
+
const files = await listCsvGzFiles(incomingRoot);
|
|
1002
|
+
for (const filePath of files) {
|
|
1003
|
+
const absolutePath = path.resolve(filePath);
|
|
1004
|
+
const tenantId = this.config.etlTenantId;
|
|
1005
|
+
const appId = this.extractAppIdFromPath(absolutePath);
|
|
1006
|
+
const now = new Date().toISOString();
|
|
1007
|
+
try {
|
|
1008
|
+
const content = await readFile(absolutePath);
|
|
1009
|
+
const checksum = createHash("sha256").update(content).digest("hex");
|
|
1010
|
+
const existing = this.manifest.get(absolutePath);
|
|
1011
|
+
if (existing?.checksum === checksum && existing.status === "done") {
|
|
1012
|
+
continue;
|
|
1013
|
+
}
|
|
1014
|
+
const unzipped = gunzipSync(content).toString("utf8");
|
|
1015
|
+
const parsed = parseCsv(unzipped);
|
|
1016
|
+
this.manifest.set(absolutePath, {
|
|
1017
|
+
tenant_id: tenantId,
|
|
1018
|
+
app_id: appId,
|
|
1019
|
+
source_file: absolutePath,
|
|
1020
|
+
checksum,
|
|
1021
|
+
status: "done",
|
|
1022
|
+
row_count: parsed.rows.length,
|
|
1023
|
+
processed_at: now,
|
|
1024
|
+
error_message: null,
|
|
1025
|
+
});
|
|
1026
|
+
}
|
|
1027
|
+
catch (error) {
|
|
1028
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
1029
|
+
this.manifest.set(absolutePath, {
|
|
1030
|
+
tenant_id: tenantId,
|
|
1031
|
+
app_id: appId,
|
|
1032
|
+
source_file: absolutePath,
|
|
1033
|
+
checksum: "",
|
|
1034
|
+
status: "failed",
|
|
1035
|
+
row_count: 0,
|
|
1036
|
+
processed_at: now,
|
|
1037
|
+
error_message: message,
|
|
1038
|
+
});
|
|
1039
|
+
this.insertAlertTodo("ETL_INGEST_FAILED", `file=${path.basename(absolutePath)} ${message}`);
|
|
1040
|
+
}
|
|
1132
1041
|
}
|
|
1133
1042
|
}
|
|
1134
1043
|
extractAppIdFromPath(filePath) {
|
|
@@ -1137,10 +1046,10 @@ export class EtlService {
|
|
|
1137
1046
|
return parts.length > 0 ? parts[0] ?? null : null;
|
|
1138
1047
|
}
|
|
1139
1048
|
applyAlertTodos() {
|
|
1140
|
-
const latestDone = this.
|
|
1141
|
-
.
|
|
1142
|
-
.
|
|
1143
|
-
if (!latestDone
|
|
1049
|
+
const latestDone = Array.from(this.manifest.values())
|
|
1050
|
+
.filter((entry) => entry.tenant_id === this.config.etlTenantId && entry.status === "done" && entry.processed_at)
|
|
1051
|
+
.sort((a, b) => (b.processed_at ?? "").localeCompare(a.processed_at ?? ""))[0];
|
|
1052
|
+
if (!latestDone?.processed_at) {
|
|
1144
1053
|
return;
|
|
1145
1054
|
}
|
|
1146
1055
|
const ageMs = Date.now() - new Date(latestDone.processed_at).getTime();
|
|
@@ -1150,8 +1059,14 @@ export class EtlService {
|
|
|
1150
1059
|
}
|
|
1151
1060
|
}
|
|
1152
1061
|
insertAlertTodo(code, message) {
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1062
|
+
const now = new Date().toISOString();
|
|
1063
|
+
const lastSame = this.alerts[this.alerts.length - 1];
|
|
1064
|
+
if (lastSame && lastSame.code === code && lastSame.message === message) {
|
|
1065
|
+
return;
|
|
1066
|
+
}
|
|
1067
|
+
this.alerts.push({ code, message, created_at: now });
|
|
1068
|
+
if (this.alerts.length > 1000) {
|
|
1069
|
+
this.alerts = this.alerts.slice(this.alerts.length - 1000);
|
|
1070
|
+
}
|
|
1156
1071
|
}
|
|
1157
1072
|
}
|