@gscdump/engine 0.8.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,57 @@
1
+ import { t as AnalysisParams } from "./analysis-types.mjs";
2
+ type WindowPreset = 'last-7d' | 'last-28d' | 'last-30d' | 'last-90d' | 'last-180d' | 'last-365d' | 'mtd' | 'ytd' | 'custom';
3
+ type ComparisonMode = 'none' | 'prev-period' | 'yoy';
4
+ interface ResolveWindowOptions {
5
+ preset: WindowPreset;
6
+ comparison?: ComparisonMode;
7
+ anchor?: string;
8
+ start?: string;
9
+ end?: string;
10
+ }
11
+ interface ResolvedWindow {
12
+ start: string;
13
+ end: string;
14
+ days: number;
15
+ comparison?: {
16
+ start: string;
17
+ end: string;
18
+ };
19
+ }
20
+ interface AnalysisPeriod {
21
+ startDate: string;
22
+ endDate: string;
23
+ }
24
+ interface ComparisonPeriod {
25
+ current: AnalysisPeriod;
26
+ previous: AnalysisPeriod;
27
+ }
28
+ declare function defaultEndDate(): string;
29
+ declare function defaultStartDate(): string;
30
+ declare function periodOf(params: AnalysisParams): AnalysisPeriod;
31
+ declare function comparisonOf(params: AnalysisParams): ComparisonPeriod;
32
+ declare function resolveWindow(opts: ResolveWindowOptions): ResolvedWindow;
33
+ /** Convert a ResolvedWindow into the AnalysisPeriod / ComparisonPeriod shape. */
34
+ declare function windowToPeriod(w: ResolvedWindow): AnalysisPeriod;
35
+ declare function windowToComparisonPeriod(w: ResolvedWindow): ComparisonPeriod | undefined;
36
+ interface PadTimeseriesOptions<T> {
37
+ /** ISO date (YYYY-MM-DD), inclusive lower bound. */
38
+ startDate: string;
39
+ /** ISO date (YYYY-MM-DD), inclusive upper bound. */
40
+ endDate: string;
41
+ /**
42
+ * Row to insert for missing dates. Defaults to `{ clicks: 0, impressions: 0, ctr: 0, position: 0 }`.
43
+ * The `date` field is set automatically.
44
+ */
45
+ fill?: Omit<T, 'date'>;
46
+ /** Row-field that carries the ISO date. Defaults to `date`. */
47
+ dateKey?: string;
48
+ }
49
+ type DateRowShape = Record<string, unknown> & {
50
+ date?: unknown;
51
+ };
52
+ /**
53
+ * Pad rows so every calendar day in `[startDate, endDate]` appears at least
54
+ * once. Existing dates keep all their rows (grouped timeseries safe).
55
+ */
56
+ declare function padTimeseries<T extends DateRowShape = DateRowShape>(rows: readonly T[], options: PadTimeseriesOptions<T>): T[];
57
+ export { ResolveWindowOptions as a, comparisonOf as c, padTimeseries as d, periodOf as f, windowToPeriod as h, PadTimeseriesOptions as i, defaultEndDate as l, windowToComparisonPeriod as m, ComparisonMode as n, ResolvedWindow as o, resolveWindow as p, ComparisonPeriod as r, WindowPreset as s, AnalysisPeriod as t, defaultStartDate as u };
@@ -317,6 +317,13 @@ interface QueryExecuteOptions {
317
317
  * `dataSource.uri` is available.
318
318
  */
319
319
  fileKeys: Record<string, string[]>;
320
+ /**
321
+ * Per-placeholder table identity. Used by the executor to emit a
322
+ * schema-correct empty fallback when a named file set is empty: an
323
+ * `extraFiles` placeholder against `page_keywords` should fall back to
324
+ * the page_keywords schema, not the analyzer's primary `table`.
325
+ */
326
+ placeholderTables?: Record<string, TableName>;
320
327
  dataSource: DataSource;
321
328
  table: TableName;
322
329
  signal?: AbortSignal;
package/dist/index.mjs CHANGED
@@ -113,18 +113,18 @@ function createDuckDBCodec(factory) {
113
113
  }
114
114
  };
115
115
  }
116
- function rewriteEmptyFileSets(sql, placeholders, table) {
117
- const emptyFallback = `(SELECT * FROM ${emptyTableSchema(table)} WHERE FALSE)`;
116
+ function rewriteEmptyFileSets(sql, placeholders, defaultTable, placeholderTables) {
118
117
  let out = sql;
119
118
  for (const [name, keys] of Object.entries(placeholders)) {
120
119
  if (keys.length > 0) continue;
120
+ const emptyFallback = `(SELECT * FROM ${emptyTableSchema(placeholderTables?.[name] ?? defaultTable)} WHERE FALSE)`;
121
121
  const pattern = new RegExp(`read_parquet\\(\\s*\\{\\{${name}\\}\\}\\s*(?:,\\s*union_by_name\\s*=\\s*true\\s*)?\\)`, "g");
122
122
  out = out.replace(pattern, emptyFallback);
123
123
  }
124
124
  return out;
125
125
  }
126
126
  function createDuckDBExecutor(factory) {
127
- return { async execute({ sql, params, fileKeys, dataSource, table, signal }) {
127
+ return { async execute({ sql, params, fileKeys, placeholderTables, dataSource, table, signal }) {
128
128
  signal?.throwIfAborted();
129
129
  const db = await factory.getDuckDB();
130
130
  const placeholders = {};
@@ -145,7 +145,7 @@ function createDuckDBExecutor(factory) {
145
145
  }
146
146
  try {
147
147
  signal?.throwIfAborted();
148
- const finalSql = substituteNamedFiles(rewriteEmptyFileSets(sql, placeholders, table), placeholders);
148
+ const finalSql = substituteNamedFiles(rewriteEmptyFileSets(sql, placeholders, table, placeholderTables), placeholders);
149
149
  return {
150
150
  rows: await db.query(finalSql, params),
151
151
  sql: finalSql
@@ -349,10 +349,13 @@ function createStorageEngine(opts) {
349
349
  table = entries[0]?.[1].table;
350
350
  }
351
351
  if (!table) throw new Error("runSQL requires at least one fileSet or an explicit table");
352
+ const placeholderTables = {};
353
+ for (const [name, ref] of entries) placeholderTables[name] = ref.table;
352
354
  const result = await executor.execute({
353
355
  sql: opts.sql,
354
356
  params: opts.params ?? [],
355
357
  fileKeys,
358
+ placeholderTables,
356
359
  dataSource,
357
360
  table,
358
361
  signal: opts.signal
@@ -1,57 +1,2 @@
1
- import { t as AnalysisParams } from "../_chunks/analysis-types.mjs";
2
- type WindowPreset = 'last-7d' | 'last-28d' | 'last-30d' | 'last-90d' | 'last-180d' | 'last-365d' | 'mtd' | 'ytd' | 'custom';
3
- type ComparisonMode = 'none' | 'prev-period' | 'yoy';
4
- interface ResolveWindowOptions {
5
- preset: WindowPreset;
6
- comparison?: ComparisonMode;
7
- anchor?: string;
8
- start?: string;
9
- end?: string;
10
- }
11
- interface ResolvedWindow {
12
- start: string;
13
- end: string;
14
- days: number;
15
- comparison?: {
16
- start: string;
17
- end: string;
18
- };
19
- }
20
- interface AnalysisPeriod {
21
- startDate: string;
22
- endDate: string;
23
- }
24
- interface ComparisonPeriod {
25
- current: AnalysisPeriod;
26
- previous: AnalysisPeriod;
27
- }
28
- declare function defaultEndDate(): string;
29
- declare function defaultStartDate(): string;
30
- declare function periodOf(params: AnalysisParams): AnalysisPeriod;
31
- declare function comparisonOf(params: AnalysisParams): ComparisonPeriod;
32
- declare function resolveWindow(opts: ResolveWindowOptions): ResolvedWindow;
33
- /** Convert a ResolvedWindow into the AnalysisPeriod / ComparisonPeriod shape. */
34
- declare function windowToPeriod(w: ResolvedWindow): AnalysisPeriod;
35
- declare function windowToComparisonPeriod(w: ResolvedWindow): ComparisonPeriod | undefined;
36
- interface PadTimeseriesOptions<T> {
37
- /** ISO date (YYYY-MM-DD), inclusive lower bound. */
38
- startDate: string;
39
- /** ISO date (YYYY-MM-DD), inclusive upper bound. */
40
- endDate: string;
41
- /**
42
- * Row to insert for missing dates. Defaults to `{ clicks: 0, impressions: 0, ctr: 0, position: 0 }`.
43
- * The `date` field is set automatically.
44
- */
45
- fill?: Omit<T, 'date'>;
46
- /** Row-field that carries the ISO date. Defaults to `date`. */
47
- dateKey?: string;
48
- }
49
- type DateRowShape = Record<string, unknown> & {
50
- date?: unknown;
51
- };
52
- /**
53
- * Pad rows so every calendar day in `[startDate, endDate]` appears at least
54
- * once. Existing dates keep all their rows (grouped timeseries safe).
55
- */
56
- declare function padTimeseries<T extends DateRowShape = DateRowShape>(rows: readonly T[], options: PadTimeseriesOptions<T>): T[];
1
+ import { a as ResolveWindowOptions, c as comparisonOf, d as padTimeseries, f as periodOf, h as windowToPeriod, i as PadTimeseriesOptions, l as defaultEndDate, m as windowToComparisonPeriod, n as ComparisonMode, o as ResolvedWindow, p as resolveWindow, r as ComparisonPeriod, s as WindowPreset, t as AnalysisPeriod, u as defaultStartDate } from "../_chunks/index.mjs";
57
2
  export { AnalysisPeriod, ComparisonMode, ComparisonPeriod, PadTimeseriesOptions, ResolveWindowOptions, ResolvedWindow, WindowPreset, comparisonOf, defaultEndDate, defaultStartDate, padTimeseries, periodOf, resolveWindow, windowToComparisonPeriod, windowToPeriod };
@@ -0,0 +1,171 @@
1
+ import { n as AnalysisResult, t as AnalysisParams } from "../_chunks/analysis-types.mjs";
2
+ import { n as ComparisonMode, o as ResolvedWindow, s as WindowPreset } from "../_chunks/index.mjs";
3
+ /** Status vocabulary mirrors `ActionPrioritySourceStatus`. */
4
+ type ReportStepStatus = 'pending' | 'running' | 'done' | 'skipped' | 'error';
5
+ type ReportSeverity = 'info' | 'low' | 'medium' | 'high';
6
+ type ReportEntityKind = 'page' | 'query';
7
+ type ReportActionKind = 'analyzer' | 'cli' | 'indexing' | 'fix';
8
+ type ReportCoverage = 'full' | 'partial';
9
+ /** Citty-shaped arg spec, kept structural so engine doesn't pull citty in. */
10
+ interface ReportArgDef {
11
+ type: 'string' | 'boolean' | 'number';
12
+ description?: string;
13
+ default?: string | boolean | number;
14
+ required?: boolean;
15
+ alias?: string;
16
+ }
17
+ type ReportArgsSpec = Record<string, ReportArgDef>;
18
+ interface ReportEntity {
19
+ kind: ReportEntityKind;
20
+ value: string;
21
+ }
22
+ interface ReportFindingDelta {
23
+ metric: string;
24
+ prior: number;
25
+ current: number;
26
+ pct: number;
27
+ }
28
+ interface ReportFinding {
29
+ entity: ReportEntity;
30
+ metrics: Record<string, number>;
31
+ delta?: ReportFindingDelta;
32
+ why?: string;
33
+ }
34
+ interface ReportSectionSummary {
35
+ delta?: number;
36
+ direction?: 'up' | 'down' | 'flat';
37
+ magnitudeLabel?: string;
38
+ }
39
+ interface ReportAction {
40
+ kind: ReportActionKind;
41
+ target?: ReportEntity;
42
+ params?: Record<string, unknown>;
43
+ rationale: string;
44
+ /** Human hint, generated; never authoritative. */
45
+ cliHint?: string;
46
+ }
47
+ interface ReportSectionArtifact {
48
+ analyzer: string;
49
+ params: AnalysisParams;
50
+ }
51
+ interface ReportSection {
52
+ id: string;
53
+ title: string;
54
+ severity: ReportSeverity;
55
+ summary: ReportSectionSummary;
56
+ /** Bounded; sorted by stable composite key. */
57
+ findings: ReportFinding[];
58
+ truncated?: {
59
+ kept: number;
60
+ total: number;
61
+ };
62
+ coverage: ReportCoverage;
63
+ actions: ReportAction[];
64
+ artifact?: ReportSectionArtifact;
65
+ }
66
+ interface ReportPlanStep {
67
+ /** Stable identifier within the report (e.g. `movers`, `decay-current`). */
68
+ key: string;
69
+ /** Analyzer id (or future: nested report id). Open string by design. */
70
+ type: string;
71
+ /** Analyzer params; report-runtime applies `type` from the step. */
72
+ params: Omit<AnalysisParams, 'type'>;
73
+ /** Required steps fail the report; optional steps degrade `coverage`. */
74
+ required?: boolean;
75
+ }
76
+ interface ReportStepStateMeta {
77
+ key: string;
78
+ type: string;
79
+ status: ReportStepStatus;
80
+ error?: string;
81
+ }
82
+ interface ReportResultMeta {
83
+ durationMs: number;
84
+ rowsScanned: number;
85
+ degraded: boolean;
86
+ steps: ReportStepStateMeta[];
87
+ }
88
+ interface ReportResult {
89
+ id: string;
90
+ site: string;
91
+ /** sha256(id|site|window|paramsCanonical|registryVersion). Stable. */
92
+ inputHash: string;
93
+ /** ISO 8601. NOT included in inputHash. */
94
+ generatedAt: string;
95
+ window: ResolvedWindow;
96
+ sections: ReportSection[];
97
+ meta: ReportResultMeta;
98
+ }
99
+ /**
100
+ * Loose params bag. Concrete reports refine this with their own interface.
101
+ * Constraint is `object` so report authors can use plain interfaces without
102
+ * needing an index signature.
103
+ */
104
+ type ReportParams = object;
105
+ interface ReportContext<P extends ReportParams = ReportParams> {
106
+ /** Resolved site URL (e.g. `https://example.com/`). */
107
+ site: string;
108
+ /** Already-resolved window — runtime calls `resolveWindow` once before plan(). */
109
+ window: ResolvedWindow;
110
+ params: P;
111
+ /** Hash of registry/code version. Bumped via package version. */
112
+ registryVersion: string;
113
+ }
114
+ /**
115
+ * Reduce step results → sections. Runtime injects `meta` post-reduce.
116
+ */
117
+ type ReportReducer<P extends ReportParams = ReportParams> = (results: Record<string, AnalysisResult>, ctx: ReportContext<P>) => Omit<ReportResult, 'meta' | 'inputHash' | 'generatedAt' | 'site' | 'window' | 'id'> & {
118
+ sections: ReportSection[];
119
+ };
120
+ interface DefinedReport<P extends ReportParams = ReportParams> {
121
+ id: string;
122
+ description: string;
123
+ defaultPeriod: WindowPreset;
124
+ defaultComparison: ComparisonMode;
125
+ /** Single source of truth for CLI flags + MCP input schema. */
126
+ argsSpec: ReportArgsSpec;
127
+ plan: (params: P, window: ResolvedWindow) => readonly ReportPlanStep[];
128
+ reduce: ReportReducer<P>;
129
+ }
130
+ interface DefineReportOptions<P extends ReportParams = ReportParams> {
131
+ id: string;
132
+ description: string;
133
+ defaultPeriod: WindowPreset;
134
+ defaultComparison: ComparisonMode;
135
+ argsSpec?: ReportArgsSpec;
136
+ plan: (params: P, window: ResolvedWindow) => readonly ReportPlanStep[];
137
+ reduce: ReportReducer<P>;
138
+ }
139
+ /**
140
+ * Mirror of `defineAnalyzer`. Pure factory: validates required fields,
141
+ * fills default `argsSpec`. No runtime behaviour — `runReport` consumes
142
+ * the returned object.
143
+ */
144
+ declare function defineReport<P extends ReportParams = ReportParams>(opts: DefineReportOptions<P>): DefinedReport<P>;
145
+ interface InputHashSeeds {
146
+ id: string;
147
+ site: string;
148
+ window: ResolvedWindow;
149
+ params: ReportParams;
150
+ registryVersion: string;
151
+ }
152
+ /** Stable JSON: sorts object keys at every level. Arrays preserve order. */
153
+ declare function canonicalize(value: unknown): unknown;
154
+ declare function computeInputHash(seeds: InputHashSeeds): Promise<string>;
155
+ interface ReportRegistryInit {
156
+ reports?: readonly DefinedReport<ReportParams>[];
157
+ /**
158
+ * Opaque version string. Used as the `registryVersion` input to
159
+ * `inputHash` so cached results invalidate when report code ships.
160
+ * Caller is expected to feed in their package version.
161
+ */
162
+ version?: string;
163
+ }
164
+ interface ReportRegistry {
165
+ version: string;
166
+ listReportIds: () => readonly string[];
167
+ getReport: (id: string) => DefinedReport<ReportParams> | undefined;
168
+ listReports: () => readonly DefinedReport<ReportParams>[];
169
+ }
170
+ declare function createReportRegistry(init?: ReportRegistryInit): ReportRegistry;
171
+ export { type DefineReportOptions, type DefinedReport, type InputHashSeeds, type ReportAction, type ReportActionKind, type ReportArgDef, type ReportArgsSpec, type ReportContext, type ReportCoverage, type ReportEntity, type ReportEntityKind, type ReportFinding, type ReportFindingDelta, type ReportParams, type ReportPlanStep, type ReportReducer, type ReportRegistry, type ReportRegistryInit, type ReportResult, type ReportResultMeta, type ReportSection, type ReportSectionArtifact, type ReportSectionSummary, type ReportSeverity, type ReportStepStateMeta, type ReportStepStatus, canonicalize, computeInputHash, createReportRegistry, defineReport };
@@ -0,0 +1,56 @@
1
+ function defineReport(opts) {
2
+ if (!opts.id) throw new Error("defineReport: id is required");
3
+ if (!opts.plan) throw new Error(`defineReport(${opts.id}): plan is required`);
4
+ if (!opts.reduce) throw new Error(`defineReport(${opts.id}): reduce is required`);
5
+ return {
6
+ id: opts.id,
7
+ description: opts.description,
8
+ defaultPeriod: opts.defaultPeriod,
9
+ defaultComparison: opts.defaultComparison,
10
+ argsSpec: opts.argsSpec ?? {},
11
+ plan: opts.plan,
12
+ reduce: opts.reduce
13
+ };
14
+ }
15
+ function canonicalize(value) {
16
+ if (value == null || typeof value !== "object") return value;
17
+ if (Array.isArray(value)) return value.map(canonicalize);
18
+ const out = {};
19
+ for (const k of Object.keys(value).sort()) out[k] = canonicalize(value[k]);
20
+ return out;
21
+ }
22
+ async function computeInputHash(seeds) {
23
+ const payload = JSON.stringify(canonicalize({
24
+ id: seeds.id,
25
+ site: seeds.site,
26
+ window: {
27
+ start: seeds.window.start,
28
+ end: seeds.window.end,
29
+ comparison: seeds.window.comparison ?? null
30
+ },
31
+ params: seeds.params,
32
+ registryVersion: seeds.registryVersion
33
+ }));
34
+ const bytes = new TextEncoder().encode(payload);
35
+ return bufferToHex(await globalThis.crypto.subtle.digest("SHA-256", bytes));
36
+ }
37
+ function bufferToHex(buffer) {
38
+ const bytes = new Uint8Array(buffer);
39
+ let out = "";
40
+ for (let i = 0; i < bytes.length; i++) out += bytes[i].toString(16).padStart(2, "0");
41
+ return out;
42
+ }
43
+ function createReportRegistry(init = {}) {
44
+ const byId = /* @__PURE__ */ new Map();
45
+ for (const r of init.reports ?? []) {
46
+ if (byId.has(r.id)) throw new Error(`createReportRegistry: duplicate report id ${r.id}`);
47
+ byId.set(r.id, r);
48
+ }
49
+ return {
50
+ version: init.version ?? "0",
51
+ listReportIds: () => [...byId.keys()].sort(),
52
+ getReport: (id) => byId.get(id),
53
+ listReports: () => [...byId.values()]
54
+ };
55
+ }
56
+ export { canonicalize, computeInputHash, createReportRegistry, defineReport };
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@gscdump/engine",
3
3
  "type": "module",
4
- "version": "0.8.1",
4
+ "version": "0.9.0",
5
5
  "description": "Append-only Parquet/DuckDB storage engine + planner + adapters for the gscdump pipeline. Node + edge runtimes; opt-in heavy peers.",
6
6
  "author": {
7
7
  "name": "Harlan Wilton",
@@ -101,6 +101,11 @@
101
101
  "import": "./dist/analyzer/index.mjs",
102
102
  "default": "./dist/analyzer/index.mjs"
103
103
  },
104
+ "./report": {
105
+ "types": "./dist/report/index.d.mts",
106
+ "import": "./dist/report/index.mjs",
107
+ "default": "./dist/report/index.mjs"
108
+ },
104
109
  "./analysis-types": {
105
110
  "types": "./dist/analysis-types.d.mts",
106
111
  "import": "./dist/analysis-types.mjs",
@@ -154,7 +159,7 @@
154
159
  "dependencies": {
155
160
  "drizzle-orm": "^0.45.2",
156
161
  "proper-lockfile": "^4.1.2",
157
- "gscdump": "0.8.1"
162
+ "gscdump": "0.9.0"
158
163
  },
159
164
  "devDependencies": {
160
165
  "@duckdb/duckdb-wasm": "^1.32.0",