@gscdump/analysis 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +251 -0
- package/dist/analyzer/index.d.mts +893 -0
- package/dist/analyzer/index.mjs +4944 -0
- package/dist/default-registry.d.mts +93 -0
- package/dist/default-registry.mjs +1957 -0
- package/dist/index.d.mts +620 -0
- package/dist/index.mjs +2873 -0
- package/dist/period/index.d.mts +57 -0
- package/dist/period/index.mjs +150 -0
- package/dist/query/index.d.mts +26 -0
- package/dist/query/index.mjs +340 -0
- package/dist/semantic/index.d.mts +70 -0
- package/dist/semantic/index.mjs +391 -0
- package/dist/source/index.d.mts +427 -0
- package/dist/source/index.mjs +1865 -0
- package/package.json +86 -0
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import { AnalysisParams } from "gscdump/contracts";
|
|
2
|
+
type WindowPreset = 'last-7d' | 'last-28d' | 'last-30d' | 'last-90d' | 'last-180d' | 'last-365d' | 'mtd' | 'ytd' | 'custom';
|
|
3
|
+
type ComparisonMode = 'none' | 'prev-period' | 'yoy';
|
|
4
|
+
interface ResolveWindowOptions {
|
|
5
|
+
preset: WindowPreset;
|
|
6
|
+
comparison?: ComparisonMode;
|
|
7
|
+
anchor?: string;
|
|
8
|
+
start?: string;
|
|
9
|
+
end?: string;
|
|
10
|
+
}
|
|
11
|
+
interface ResolvedWindow {
|
|
12
|
+
start: string;
|
|
13
|
+
end: string;
|
|
14
|
+
days: number;
|
|
15
|
+
comparison?: {
|
|
16
|
+
start: string;
|
|
17
|
+
end: string;
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
interface AnalysisPeriod {
|
|
21
|
+
startDate: string;
|
|
22
|
+
endDate: string;
|
|
23
|
+
}
|
|
24
|
+
interface ComparisonPeriod {
|
|
25
|
+
current: AnalysisPeriod;
|
|
26
|
+
previous: AnalysisPeriod;
|
|
27
|
+
}
|
|
28
|
+
declare function defaultEndDate(): string;
|
|
29
|
+
declare function defaultStartDate(): string;
|
|
30
|
+
declare function periodOf(params: AnalysisParams): AnalysisPeriod;
|
|
31
|
+
declare function comparisonOf(params: AnalysisParams): ComparisonPeriod;
|
|
32
|
+
declare function resolveWindow(opts: ResolveWindowOptions): ResolvedWindow;
|
|
33
|
+
/** Convert a ResolvedWindow into the AnalysisPeriod / ComparisonPeriod shape. */
|
|
34
|
+
declare function windowToPeriod(w: ResolvedWindow): AnalysisPeriod;
|
|
35
|
+
declare function windowToComparisonPeriod(w: ResolvedWindow): ComparisonPeriod | undefined;
|
|
36
|
+
interface PadTimeseriesOptions<T> {
|
|
37
|
+
/** ISO date (YYYY-MM-DD), inclusive lower bound. */
|
|
38
|
+
startDate: string;
|
|
39
|
+
/** ISO date (YYYY-MM-DD), inclusive upper bound. */
|
|
40
|
+
endDate: string;
|
|
41
|
+
/**
|
|
42
|
+
* Row to insert for missing dates. Defaults to `{ clicks: 0, impressions: 0, ctr: 0, position: 0 }`.
|
|
43
|
+
* The `date` field is set automatically.
|
|
44
|
+
*/
|
|
45
|
+
fill?: Omit<T, 'date'>;
|
|
46
|
+
/** Row-field that carries the ISO date. Defaults to `date`. */
|
|
47
|
+
dateKey?: string;
|
|
48
|
+
}
|
|
49
|
+
type DateRowShape = Record<string, unknown> & {
|
|
50
|
+
date?: unknown;
|
|
51
|
+
};
|
|
52
|
+
/**
|
|
53
|
+
* Pad rows so every calendar day in `[startDate, endDate]` appears at least
|
|
54
|
+
* once. Existing dates keep all their rows (grouped timeseries safe).
|
|
55
|
+
*/
|
|
56
|
+
declare function padTimeseries<T extends DateRowShape = DateRowShape>(rows: readonly T[], options: PadTimeseriesOptions<T>): T[];
|
|
57
|
+
export { AnalysisPeriod, ComparisonMode, ComparisonPeriod, PadTimeseriesOptions, ResolveWindowOptions, ResolvedWindow, WindowPreset, comparisonOf, defaultEndDate, defaultStartDate, padTimeseries, periodOf, resolveWindow, windowToComparisonPeriod, windowToPeriod };
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
import { MS_PER_DAY, daysAgo, toIsoDate } from "gscdump";
|
|
2
|
+
function defaultEndDate() {
|
|
3
|
+
return daysAgo(3);
|
|
4
|
+
}
|
|
5
|
+
function defaultStartDate() {
|
|
6
|
+
return daysAgo(31);
|
|
7
|
+
}
|
|
8
|
+
function periodOf(params) {
|
|
9
|
+
return {
|
|
10
|
+
startDate: params.startDate || defaultStartDate(),
|
|
11
|
+
endDate: params.endDate || defaultEndDate()
|
|
12
|
+
};
|
|
13
|
+
}
|
|
14
|
+
function comparisonOf(params) {
|
|
15
|
+
if (!params.prevStartDate || !params.prevEndDate) throw new Error(`${params.type} analysis requires prevStartDate and prevEndDate`);
|
|
16
|
+
return {
|
|
17
|
+
current: periodOf(params),
|
|
18
|
+
previous: {
|
|
19
|
+
startDate: params.prevStartDate,
|
|
20
|
+
endDate: params.prevEndDate
|
|
21
|
+
}
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
function parseIso(s) {
|
|
25
|
+
return /* @__PURE__ */ new Date(`${s}T00:00:00Z`);
|
|
26
|
+
}
|
|
27
|
+
function addDays(d, n) {
|
|
28
|
+
return new Date(d.getTime() + n * MS_PER_DAY);
|
|
29
|
+
}
|
|
30
|
+
function daysBetween(start, end) {
|
|
31
|
+
return Math.round((parseIso(end).getTime() - parseIso(start).getTime()) / MS_PER_DAY) + 1;
|
|
32
|
+
}
|
|
33
|
+
function resolveWindow(opts) {
|
|
34
|
+
const anchor = opts.anchor ? parseIso(opts.anchor) : /* @__PURE__ */ new Date();
|
|
35
|
+
const anchorIso = toIsoDate(anchor);
|
|
36
|
+
let start;
|
|
37
|
+
let end;
|
|
38
|
+
switch (opts.preset) {
|
|
39
|
+
case "last-7d":
|
|
40
|
+
end = anchorIso;
|
|
41
|
+
start = toIsoDate(addDays(anchor, -6));
|
|
42
|
+
break;
|
|
43
|
+
case "last-28d":
|
|
44
|
+
end = anchorIso;
|
|
45
|
+
start = toIsoDate(addDays(anchor, -27));
|
|
46
|
+
break;
|
|
47
|
+
case "last-30d":
|
|
48
|
+
end = anchorIso;
|
|
49
|
+
start = toIsoDate(addDays(anchor, -29));
|
|
50
|
+
break;
|
|
51
|
+
case "last-90d":
|
|
52
|
+
end = anchorIso;
|
|
53
|
+
start = toIsoDate(addDays(anchor, -89));
|
|
54
|
+
break;
|
|
55
|
+
case "last-180d":
|
|
56
|
+
end = anchorIso;
|
|
57
|
+
start = toIsoDate(addDays(anchor, -179));
|
|
58
|
+
break;
|
|
59
|
+
case "last-365d":
|
|
60
|
+
end = anchorIso;
|
|
61
|
+
start = toIsoDate(addDays(anchor, -364));
|
|
62
|
+
break;
|
|
63
|
+
case "mtd":
|
|
64
|
+
end = anchorIso;
|
|
65
|
+
start = toIsoDate(new Date(Date.UTC(anchor.getUTCFullYear(), anchor.getUTCMonth(), 1)));
|
|
66
|
+
break;
|
|
67
|
+
case "ytd":
|
|
68
|
+
end = anchorIso;
|
|
69
|
+
start = toIsoDate(new Date(Date.UTC(anchor.getUTCFullYear(), 0, 1)));
|
|
70
|
+
break;
|
|
71
|
+
case "custom":
|
|
72
|
+
if (!opts.start || !opts.end) throw new Error("resolveWindow: preset=custom requires start and end");
|
|
73
|
+
start = opts.start;
|
|
74
|
+
end = opts.end;
|
|
75
|
+
break;
|
|
76
|
+
}
|
|
77
|
+
const days = daysBetween(start, end);
|
|
78
|
+
const result = {
|
|
79
|
+
start,
|
|
80
|
+
end,
|
|
81
|
+
days
|
|
82
|
+
};
|
|
83
|
+
const mode = opts.comparison ?? "none";
|
|
84
|
+
if (mode === "prev-period") {
|
|
85
|
+
const prevEnd = toIsoDate(addDays(parseIso(start), -1));
|
|
86
|
+
result.comparison = {
|
|
87
|
+
start: toIsoDate(addDays(parseIso(prevEnd), -(days - 1))),
|
|
88
|
+
end: prevEnd
|
|
89
|
+
};
|
|
90
|
+
} else if (mode === "yoy") {
|
|
91
|
+
const prevEnd = toIsoDate(addDays(parseIso(end), -365));
|
|
92
|
+
result.comparison = {
|
|
93
|
+
start: toIsoDate(addDays(parseIso(start), -365)),
|
|
94
|
+
end: prevEnd
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
return result;
|
|
98
|
+
}
|
|
99
|
+
function windowToPeriod(w) {
|
|
100
|
+
return {
|
|
101
|
+
startDate: w.start,
|
|
102
|
+
endDate: w.end
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
function windowToComparisonPeriod(w) {
|
|
106
|
+
if (!w.comparison) return void 0;
|
|
107
|
+
return {
|
|
108
|
+
current: {
|
|
109
|
+
startDate: w.start,
|
|
110
|
+
endDate: w.end
|
|
111
|
+
},
|
|
112
|
+
previous: {
|
|
113
|
+
startDate: w.comparison.start,
|
|
114
|
+
endDate: w.comparison.end
|
|
115
|
+
}
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
const DEFAULT_FILL = {
|
|
119
|
+
clicks: 0,
|
|
120
|
+
impressions: 0,
|
|
121
|
+
ctr: 0,
|
|
122
|
+
position: 0
|
|
123
|
+
};
|
|
124
|
+
function padTimeseries(rows, options) {
|
|
125
|
+
const { startDate, endDate } = options;
|
|
126
|
+
const dateKey = options.dateKey ?? "date";
|
|
127
|
+
const fill = options.fill ?? DEFAULT_FILL;
|
|
128
|
+
const byDate = /* @__PURE__ */ new Map();
|
|
129
|
+
for (const row of rows) {
|
|
130
|
+
const d = String(row[dateKey]);
|
|
131
|
+
const bucket = byDate.get(d);
|
|
132
|
+
if (bucket) bucket.push(row);
|
|
133
|
+
else byDate.set(d, [row]);
|
|
134
|
+
}
|
|
135
|
+
const result = [];
|
|
136
|
+
const start = /* @__PURE__ */ new Date(`${startDate}T00:00:00Z`);
|
|
137
|
+
const end = /* @__PURE__ */ new Date(`${endDate}T00:00:00Z`);
|
|
138
|
+
if (Number.isNaN(start.getTime()) || Number.isNaN(end.getTime())) throw new Error(`padTimeseries: invalid date range ${startDate}..${endDate}`);
|
|
139
|
+
for (let cursorMs = start.getTime(), endMs = end.getTime(); cursorMs <= endMs; cursorMs += MS_PER_DAY) {
|
|
140
|
+
const dateStr = toIsoDate(new Date(cursorMs));
|
|
141
|
+
const existing = byDate.get(dateStr);
|
|
142
|
+
if (existing) result.push(...existing);
|
|
143
|
+
else result.push({
|
|
144
|
+
...fill,
|
|
145
|
+
[dateKey]: dateStr
|
|
146
|
+
});
|
|
147
|
+
}
|
|
148
|
+
return result;
|
|
149
|
+
}
|
|
150
|
+
export { comparisonOf, defaultEndDate, defaultStartDate, padTimeseries, periodOf, resolveWindow, windowToComparisonPeriod, windowToPeriod };
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { ResolverOptions } from "@gscdump/engine/resolver";
|
|
2
|
+
import { Row } from "@gscdump/engine/contracts";
|
|
3
|
+
import { AnalysisParams } from "gscdump/contracts";
|
|
4
|
+
interface QueryAnalyzerExtraQuery {
|
|
5
|
+
name: string;
|
|
6
|
+
sql: string;
|
|
7
|
+
params: unknown[];
|
|
8
|
+
}
|
|
9
|
+
interface QueryAnalyzerPlan<TK extends string = string> {
|
|
10
|
+
tableKey: TK;
|
|
11
|
+
sql: string;
|
|
12
|
+
params: unknown[];
|
|
13
|
+
extraQueries?: QueryAnalyzerExtraQuery[];
|
|
14
|
+
shape: (rows: Row[], params: AnalysisParams, extras?: Record<string, Row[]>) => {
|
|
15
|
+
results: Row[];
|
|
16
|
+
meta: Record<string, unknown>;
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
declare function buildDataQueryPlan<TK extends string>(params: AnalysisParams, options: ResolverOptions<TK>): QueryAnalyzerPlan<TK>;
|
|
20
|
+
declare function buildDataDetailPlan<TK extends string>(params: AnalysisParams, options: ResolverOptions<TK>): QueryAnalyzerPlan<TK>;
|
|
21
|
+
/**
|
|
22
|
+
* Produce a canonical form of a search query for grouping near-duplicates.
|
|
23
|
+
* Idempotent: `normalizeQuery(normalizeQuery(q)) === normalizeQuery(q)`.
|
|
24
|
+
*/
|
|
25
|
+
declare function normalizeQuery(query: string): string;
|
|
26
|
+
export { type QueryAnalyzerExtraQuery, type QueryAnalyzerPlan, buildDataDetailPlan, buildDataQueryPlan, normalizeQuery };
|
|
@@ -0,0 +1,340 @@
|
|
|
1
|
+
import { buildExtrasQueries, buildTotalsSql, mergeExtras, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized } from "@gscdump/engine/resolver";
|
|
2
|
+
import { extractDateRange } from "gscdump/query";
|
|
3
|
+
import { MS_PER_DAY, toIsoDate } from "gscdump";
|
|
4
|
+
const DEFAULT_FILL = {
|
|
5
|
+
clicks: 0,
|
|
6
|
+
impressions: 0,
|
|
7
|
+
ctr: 0,
|
|
8
|
+
position: 0
|
|
9
|
+
};
|
|
10
|
+
function padTimeseries(rows, options) {
|
|
11
|
+
const { startDate, endDate } = options;
|
|
12
|
+
const dateKey = options.dateKey ?? "date";
|
|
13
|
+
const fill = options.fill ?? DEFAULT_FILL;
|
|
14
|
+
const byDate = /* @__PURE__ */ new Map();
|
|
15
|
+
for (const row of rows) {
|
|
16
|
+
const d = String(row[dateKey]);
|
|
17
|
+
const bucket = byDate.get(d);
|
|
18
|
+
if (bucket) bucket.push(row);
|
|
19
|
+
else byDate.set(d, [row]);
|
|
20
|
+
}
|
|
21
|
+
const result = [];
|
|
22
|
+
const start = /* @__PURE__ */ new Date(`${startDate}T00:00:00Z`);
|
|
23
|
+
const end = /* @__PURE__ */ new Date(`${endDate}T00:00:00Z`);
|
|
24
|
+
if (Number.isNaN(start.getTime()) || Number.isNaN(end.getTime())) throw new Error(`padTimeseries: invalid date range ${startDate}..${endDate}`);
|
|
25
|
+
for (let cursorMs = start.getTime(), endMs = end.getTime(); cursorMs <= endMs; cursorMs += MS_PER_DAY) {
|
|
26
|
+
const dateStr = toIsoDate(new Date(cursorMs));
|
|
27
|
+
const existing = byDate.get(dateStr);
|
|
28
|
+
if (existing) result.push(...existing);
|
|
29
|
+
else result.push({
|
|
30
|
+
...fill,
|
|
31
|
+
[dateKey]: dateStr
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
return result;
|
|
35
|
+
}
|
|
36
|
+
function requireBuilderState(input, tool) {
|
|
37
|
+
if (!input || typeof input !== "object" || !("dimensions" in input) || !Array.isArray(input.dimensions)) throw new Error(`${tool}: params.q is required (BuilderState)`);
|
|
38
|
+
return input;
|
|
39
|
+
}
|
|
40
|
+
function optionalBuilderState(input, tool, key) {
|
|
41
|
+
if (input == null) return null;
|
|
42
|
+
if (typeof input !== "object" || !("dimensions" in input) || !Array.isArray(input.dimensions)) throw new Error(`${tool}: params.${key} must be a BuilderState`);
|
|
43
|
+
return input;
|
|
44
|
+
}
|
|
45
|
+
const NUMERIC_METRIC_COLS = [
|
|
46
|
+
"clicks",
|
|
47
|
+
"impressions",
|
|
48
|
+
"ctr",
|
|
49
|
+
"position",
|
|
50
|
+
"prevClicks",
|
|
51
|
+
"prevImpressions",
|
|
52
|
+
"prevCtr",
|
|
53
|
+
"prevPosition",
|
|
54
|
+
"variantCount",
|
|
55
|
+
"totalCount"
|
|
56
|
+
];
|
|
57
|
+
function coerceNumericCols(row) {
|
|
58
|
+
const out = { ...row };
|
|
59
|
+
for (const col of NUMERIC_METRIC_COLS) if (col in out && out[col] != null) out[col] = Number(out[col]);
|
|
60
|
+
return out;
|
|
61
|
+
}
|
|
62
|
+
function shapeDataQuery(rows, extras, opts) {
|
|
63
|
+
let totalCount;
|
|
64
|
+
let cleaned;
|
|
65
|
+
if (opts.hasPrev) {
|
|
66
|
+
cleaned = rows.map(coerceNumericCols);
|
|
67
|
+
totalCount = Number((extras?.count?.[0])?.total ?? cleaned.length);
|
|
68
|
+
} else {
|
|
69
|
+
const first = rows[0];
|
|
70
|
+
totalCount = Number(first?.totalCount ?? 0);
|
|
71
|
+
cleaned = rows.map((raw) => {
|
|
72
|
+
const { totalCount: _tc, totalClicks: _tclk, totalImpressions: _timp, totalCtr: _tctr, totalPosition: _tpos, sum_position: _sp, ...rest } = raw;
|
|
73
|
+
return coerceNumericCols(rest);
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
const totalsRow = extras?.totals?.[0] ?? {};
|
|
77
|
+
const totals = {
|
|
78
|
+
clicks: Number(totalsRow.clicks ?? 0),
|
|
79
|
+
impressions: Number(totalsRow.impressions ?? 0),
|
|
80
|
+
ctr: Number(totalsRow.ctr ?? 0),
|
|
81
|
+
position: Number(totalsRow.position ?? 0)
|
|
82
|
+
};
|
|
83
|
+
const extrasResults = [];
|
|
84
|
+
if (extras?.canonicalExtras) extrasResults.push({
|
|
85
|
+
key: "canonicalExtras",
|
|
86
|
+
results: extras.canonicalExtras
|
|
87
|
+
});
|
|
88
|
+
return {
|
|
89
|
+
results: mergeExtras(cleaned, extrasResults),
|
|
90
|
+
meta: {
|
|
91
|
+
totalCount,
|
|
92
|
+
totals
|
|
93
|
+
}
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
function buildDataQueryPlan(params, options) {
|
|
97
|
+
const state = requireBuilderState(params.q, "data-query");
|
|
98
|
+
if (state.dimensions.includes("date")) throw new Error("data-query: date dimension not supported; use data-detail");
|
|
99
|
+
const prev = optionalBuilderState(params.qc, "data-query", "qc");
|
|
100
|
+
const totals = buildTotalsSql(state, options);
|
|
101
|
+
const extras = buildExtrasQueries(state, options);
|
|
102
|
+
const extraQueries = [{
|
|
103
|
+
name: "totals",
|
|
104
|
+
sql: totals.sql,
|
|
105
|
+
params: totals.params
|
|
106
|
+
}, ...extras.map((extra) => ({
|
|
107
|
+
name: extra.key,
|
|
108
|
+
sql: extra.sql,
|
|
109
|
+
params: extra.params
|
|
110
|
+
}))];
|
|
111
|
+
const tableKey = options.adapter.inferTable(state.dimensions);
|
|
112
|
+
if (prev) {
|
|
113
|
+
const comparison = resolveComparisonSQL(state, prev, options, params.comparisonFilter);
|
|
114
|
+
extraQueries.push({
|
|
115
|
+
name: "count",
|
|
116
|
+
sql: comparison.countSql,
|
|
117
|
+
params: comparison.countParams
|
|
118
|
+
});
|
|
119
|
+
return {
|
|
120
|
+
tableKey,
|
|
121
|
+
sql: comparison.sql,
|
|
122
|
+
params: comparison.params,
|
|
123
|
+
extraQueries,
|
|
124
|
+
shape: (rows, _params, resolvedExtras) => shapeDataQuery(rows, resolvedExtras, { hasPrev: true })
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
const optimized = resolveToSQLOptimized(state, options);
|
|
128
|
+
return {
|
|
129
|
+
tableKey,
|
|
130
|
+
sql: optimized.sql,
|
|
131
|
+
params: optimized.params,
|
|
132
|
+
extraQueries,
|
|
133
|
+
shape: (rows, _params, resolvedExtras) => shapeDataQuery(rows, resolvedExtras, { hasPrev: false })
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
function buildDataDetailPlan(params, options) {
|
|
137
|
+
const state = requireBuilderState(params.q, "data-detail");
|
|
138
|
+
if (!state.dimensions.includes("date")) throw new Error("data-detail: `date` dimension is required");
|
|
139
|
+
const main = resolveToSQL(state, options);
|
|
140
|
+
const totals = buildTotalsSql(state, options);
|
|
141
|
+
const prev = optionalBuilderState(params.qc, "data-detail", "qc");
|
|
142
|
+
const extraQueries = [{
|
|
143
|
+
name: "totals",
|
|
144
|
+
sql: totals.sql,
|
|
145
|
+
params: totals.params
|
|
146
|
+
}];
|
|
147
|
+
if (prev) {
|
|
148
|
+
const previousTotals = buildTotalsSql(prev, options);
|
|
149
|
+
extraQueries.push({
|
|
150
|
+
name: "prevTotals",
|
|
151
|
+
sql: previousTotals.sql,
|
|
152
|
+
params: previousTotals.params
|
|
153
|
+
});
|
|
154
|
+
}
|
|
155
|
+
const tableKey = options.adapter.inferTable(state.dimensions);
|
|
156
|
+
const { startDate: rangeStart, endDate: rangeEnd } = extractDateRange(state.filter);
|
|
157
|
+
return {
|
|
158
|
+
tableKey,
|
|
159
|
+
sql: main.sql,
|
|
160
|
+
params: main.params,
|
|
161
|
+
extraQueries,
|
|
162
|
+
shape: (rows, _params, extras) => {
|
|
163
|
+
const coerced = rows.map(coerceNumericCols);
|
|
164
|
+
const daily = rangeStart && rangeEnd ? padTimeseries(coerced, {
|
|
165
|
+
startDate: rangeStart,
|
|
166
|
+
endDate: rangeEnd
|
|
167
|
+
}) : coerced;
|
|
168
|
+
const totalsRow = extras?.totals?.[0] ?? {};
|
|
169
|
+
const meta = { totals: {
|
|
170
|
+
clicks: Number(totalsRow.clicks ?? 0),
|
|
171
|
+
impressions: Number(totalsRow.impressions ?? 0),
|
|
172
|
+
ctr: Number(totalsRow.ctr ?? 0),
|
|
173
|
+
position: Number(totalsRow.position ?? 0)
|
|
174
|
+
} };
|
|
175
|
+
if (extras?.prevTotals) {
|
|
176
|
+
const previousTotalsRow = extras.prevTotals[0] ?? {};
|
|
177
|
+
meta.previousTotals = {
|
|
178
|
+
clicks: Number(previousTotalsRow.clicks ?? 0),
|
|
179
|
+
impressions: Number(previousTotalsRow.impressions ?? 0),
|
|
180
|
+
ctr: Number(previousTotalsRow.ctr ?? 0),
|
|
181
|
+
position: Number(previousTotalsRow.position ?? 0)
|
|
182
|
+
};
|
|
183
|
+
}
|
|
184
|
+
return {
|
|
185
|
+
results: daily,
|
|
186
|
+
meta
|
|
187
|
+
};
|
|
188
|
+
}
|
|
189
|
+
};
|
|
190
|
+
}
|
|
191
|
+
const SYNONYMS = {
|
|
192
|
+
checker: "validator",
|
|
193
|
+
tester: "validator",
|
|
194
|
+
verifier: "validator",
|
|
195
|
+
verify: "validate",
|
|
196
|
+
check: "validate",
|
|
197
|
+
test: "validate",
|
|
198
|
+
checking: "validate",
|
|
199
|
+
testing: "validate",
|
|
200
|
+
creator: "generator",
|
|
201
|
+
builder: "generator",
|
|
202
|
+
maker: "generator",
|
|
203
|
+
create: "generate",
|
|
204
|
+
build: "generate",
|
|
205
|
+
make: "generate",
|
|
206
|
+
lookup: "search",
|
|
207
|
+
finder: "search",
|
|
208
|
+
find: "search",
|
|
209
|
+
online: "",
|
|
210
|
+
free: ""
|
|
211
|
+
};
|
|
212
|
+
const NO_STRIP_S = new Set([
|
|
213
|
+
"css",
|
|
214
|
+
"js",
|
|
215
|
+
"ts",
|
|
216
|
+
"os",
|
|
217
|
+
"as",
|
|
218
|
+
"is",
|
|
219
|
+
"us",
|
|
220
|
+
"has",
|
|
221
|
+
"was",
|
|
222
|
+
"its",
|
|
223
|
+
"this",
|
|
224
|
+
"yes",
|
|
225
|
+
"no",
|
|
226
|
+
"bus",
|
|
227
|
+
"gas",
|
|
228
|
+
"dns",
|
|
229
|
+
"rss",
|
|
230
|
+
"sms",
|
|
231
|
+
"gps",
|
|
232
|
+
"aws",
|
|
233
|
+
"sas",
|
|
234
|
+
"cms",
|
|
235
|
+
"ios",
|
|
236
|
+
"less",
|
|
237
|
+
"loss",
|
|
238
|
+
"miss",
|
|
239
|
+
"pass",
|
|
240
|
+
"class",
|
|
241
|
+
"access",
|
|
242
|
+
"process",
|
|
243
|
+
"express",
|
|
244
|
+
"address",
|
|
245
|
+
"cross",
|
|
246
|
+
"press",
|
|
247
|
+
"stress",
|
|
248
|
+
"progress",
|
|
249
|
+
"success",
|
|
250
|
+
"business",
|
|
251
|
+
"wordpress",
|
|
252
|
+
"status",
|
|
253
|
+
"radius",
|
|
254
|
+
"nexus",
|
|
255
|
+
"focus",
|
|
256
|
+
"bonus",
|
|
257
|
+
"campus",
|
|
258
|
+
"census",
|
|
259
|
+
"corpus",
|
|
260
|
+
"nucleus",
|
|
261
|
+
"stimulus",
|
|
262
|
+
"terminus",
|
|
263
|
+
"versus",
|
|
264
|
+
"virus",
|
|
265
|
+
"surplus",
|
|
266
|
+
"cactus",
|
|
267
|
+
"analysis",
|
|
268
|
+
"basis",
|
|
269
|
+
"thesis",
|
|
270
|
+
"crisis",
|
|
271
|
+
"axis",
|
|
272
|
+
"genesis",
|
|
273
|
+
"synopsis",
|
|
274
|
+
"diagnosis",
|
|
275
|
+
"emphasis",
|
|
276
|
+
"hypothesis",
|
|
277
|
+
"synthesis",
|
|
278
|
+
"parenthesis",
|
|
279
|
+
"redis",
|
|
280
|
+
"apis",
|
|
281
|
+
"chaos",
|
|
282
|
+
"demos",
|
|
283
|
+
"logos",
|
|
284
|
+
"photos",
|
|
285
|
+
"videos",
|
|
286
|
+
"nuxtjs",
|
|
287
|
+
"nextjs",
|
|
288
|
+
"nodejs",
|
|
289
|
+
"reactjs",
|
|
290
|
+
"vuejs",
|
|
291
|
+
"angularjs",
|
|
292
|
+
"expressjs",
|
|
293
|
+
"nestjs",
|
|
294
|
+
"threejs",
|
|
295
|
+
"alpinejs",
|
|
296
|
+
"solidjs",
|
|
297
|
+
"sveltejs",
|
|
298
|
+
"dejs",
|
|
299
|
+
"bunjs",
|
|
300
|
+
"denojs",
|
|
301
|
+
"canvas",
|
|
302
|
+
"atlas",
|
|
303
|
+
"alias",
|
|
304
|
+
"bias",
|
|
305
|
+
"perhaps",
|
|
306
|
+
"whereas",
|
|
307
|
+
"kubernetes",
|
|
308
|
+
"sass",
|
|
309
|
+
"postgres",
|
|
310
|
+
"always",
|
|
311
|
+
"across",
|
|
312
|
+
"previous",
|
|
313
|
+
"various",
|
|
314
|
+
"serious",
|
|
315
|
+
"famous",
|
|
316
|
+
"anonymous",
|
|
317
|
+
"continuous",
|
|
318
|
+
"dangerous",
|
|
319
|
+
"generous",
|
|
320
|
+
"obvious",
|
|
321
|
+
"numerous",
|
|
322
|
+
"curious",
|
|
323
|
+
"nervous",
|
|
324
|
+
"conscious"
|
|
325
|
+
]);
|
|
326
|
+
function depluralize(token) {
|
|
327
|
+
if (token.length <= 3) return token;
|
|
328
|
+
if (NO_STRIP_S.has(token)) return token;
|
|
329
|
+
if (token.endsWith("ies") && token.length > 4) return `${token.slice(0, -3)}y`;
|
|
330
|
+
if (token.endsWith("ses") && token.length > 4) return token.slice(0, -1);
|
|
331
|
+
if (token.endsWith("shes") || token.endsWith("ches") || token.endsWith("xes") || token.endsWith("zes")) return token.slice(0, -2);
|
|
332
|
+
if (token.endsWith("s") && !token.endsWith("ss")) return token.slice(0, -1);
|
|
333
|
+
return token;
|
|
334
|
+
}
|
|
335
|
+
const SEPARATOR_RE = /[-_/.@#:+]+/g;
|
|
336
|
+
const WHITESPACE_RE = /\s+/g;
|
|
337
|
+
function normalizeQuery(query) {
|
|
338
|
+
return query.toLowerCase().replace(SEPARATOR_RE, " ").replace(WHITESPACE_RE, " ").trim().split(" ").filter(Boolean).map((token) => SYNONYMS[token] ?? token).filter(Boolean).map(depluralize).sort().join(" ");
|
|
339
|
+
}
|
|
340
|
+
export { buildDataDetailPlan, buildDataQueryPlan, normalizeQuery };
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import { AnalysisQuerySource } from "@gscdump/engine/resolver";
|
|
2
|
+
interface ContentGapResult {
|
|
3
|
+
query: string;
|
|
4
|
+
impressions: number;
|
|
5
|
+
clicks: number;
|
|
6
|
+
avgPosition: number;
|
|
7
|
+
currentUrl: string;
|
|
8
|
+
currentSimilarity: number;
|
|
9
|
+
suggestedUrl: string;
|
|
10
|
+
suggestedSimilarity: number;
|
|
11
|
+
alternatives: Array<{
|
|
12
|
+
url: string;
|
|
13
|
+
similarity: number;
|
|
14
|
+
}>;
|
|
15
|
+
divergence: number;
|
|
16
|
+
impact: number;
|
|
17
|
+
}
|
|
18
|
+
interface ContentGapProgress {
|
|
19
|
+
phase: 'idle' | 'loading-model' | 'fetching-data' | 'embedding-queries' | 'embedding-urls' | 'computing-gaps' | 'done' | 'error';
|
|
20
|
+
message: string;
|
|
21
|
+
done?: number;
|
|
22
|
+
total?: number;
|
|
23
|
+
modelMs?: number;
|
|
24
|
+
sqlMs?: number;
|
|
25
|
+
embedMs?: number;
|
|
26
|
+
computeMs?: number;
|
|
27
|
+
}
|
|
28
|
+
interface ContentGapOptions {
|
|
29
|
+
maxQueries?: number;
|
|
30
|
+
maxUrls?: number;
|
|
31
|
+
minImpressions?: number;
|
|
32
|
+
minDivergence?: number;
|
|
33
|
+
device?: 'webgpu' | 'wasm';
|
|
34
|
+
onProgress?: (progress: ContentGapProgress) => void;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Content-gap requires a source with a raw-SQL escape hatch. The analyzer's
|
|
38
|
+
* query shape (CTEs + window functions against `main.page_keywords`) isn't
|
|
39
|
+
* expressible as a {@link BuilderState}, so it bypasses `queryRows` and
|
|
40
|
+
* goes directly through `source.executeSql`.
|
|
41
|
+
*/
|
|
42
|
+
declare class ContentGapSourceUnsupportedError extends Error {
|
|
43
|
+
constructor(kind: string);
|
|
44
|
+
}
|
|
45
|
+
interface ContentGapAnalysis {
|
|
46
|
+
results: ContentGapResult[];
|
|
47
|
+
meta: {
|
|
48
|
+
modelMs: number;
|
|
49
|
+
sqlMs: number;
|
|
50
|
+
embedMs: number;
|
|
51
|
+
computeMs: number;
|
|
52
|
+
cacheHits: number;
|
|
53
|
+
totalInputs: number;
|
|
54
|
+
device: 'webgpu' | 'wasm';
|
|
55
|
+
modelId: string;
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
interface QueryCandidate {
|
|
59
|
+
query: string;
|
|
60
|
+
impressions: number;
|
|
61
|
+
clicks: number;
|
|
62
|
+
avgPosition: number;
|
|
63
|
+
currentUrl: string;
|
|
64
|
+
}
|
|
65
|
+
declare function normalizeUrl(u: string): string;
|
|
66
|
+
declare function deriveUrlText(url: string): string;
|
|
67
|
+
declare function cosineNormalized(a: Float32Array, b: Float32Array): number;
|
|
68
|
+
declare function rankContentGaps(queries: QueryCandidate[], urls: string[], queryEmbeddings: Float32Array[], urlEmbeddings: Float32Array[], minDivergence: number): ContentGapResult[];
|
|
69
|
+
declare function analyzeContentGap(source: AnalysisQuerySource, opts?: ContentGapOptions): Promise<ContentGapAnalysis>;
|
|
70
|
+
export { type ContentGapAnalysis, type ContentGapOptions, type ContentGapProgress, type ContentGapResult, ContentGapSourceUnsupportedError, analyzeContentGap, cosineNormalized, deriveUrlText, normalizeUrl, rankContentGaps };
|