@gscdump/engine 0.30.0 → 0.31.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_chunks/entities.mjs +100 -1
- package/dist/_chunks/libs/icebird.d.mts +2 -2
- package/dist/_chunks/resolver.mjs +8 -6
- package/dist/entities.d.mts +55 -1
- package/dist/entities.mjs +2 -2
- package/dist/rollups.d.mts +6 -0
- package/dist/rollups.mjs +38 -17
- package/package.json +3 -3
|
@@ -16,6 +16,105 @@ async function readOptional(ds, key, signal) {
|
|
|
16
16
|
throw e;
|
|
17
17
|
});
|
|
18
18
|
}
|
|
19
|
+
const QUERY_DIM_COLUMNS = [
|
|
20
|
+
{
|
|
21
|
+
name: "query",
|
|
22
|
+
type: "VARCHAR",
|
|
23
|
+
nullable: false
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
name: "query_canonical",
|
|
27
|
+
type: "VARCHAR",
|
|
28
|
+
nullable: false
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
name: "intent_code",
|
|
32
|
+
type: "INTEGER",
|
|
33
|
+
nullable: false
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
name: "normalizer_version",
|
|
37
|
+
type: "INTEGER",
|
|
38
|
+
nullable: false
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
name: "intent_version",
|
|
42
|
+
type: "INTEGER",
|
|
43
|
+
nullable: false
|
|
44
|
+
}
|
|
45
|
+
];
|
|
46
|
+
function queryDimPrefix(ctx) {
|
|
47
|
+
return ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/entities/query_dim` : `u_${ctx.userId}/entities/query_dim`;
|
|
48
|
+
}
|
|
49
|
+
function queryDimParquetKey(ctx) {
|
|
50
|
+
return `${queryDimPrefix(ctx)}/index.parquet`;
|
|
51
|
+
}
|
|
52
|
+
function queryDimMetaKey(ctx) {
|
|
53
|
+
return `${queryDimPrefix(ctx)}/index.json`;
|
|
54
|
+
}
|
|
55
|
+
function buildQueryDimRecords(queries, deps) {
|
|
56
|
+
const seen = /* @__PURE__ */ new Set();
|
|
57
|
+
const out = [];
|
|
58
|
+
for (const raw of queries) {
|
|
59
|
+
const query = String(raw);
|
|
60
|
+
if (query.trim() === "" || seen.has(query)) continue;
|
|
61
|
+
seen.add(query);
|
|
62
|
+
const canonical = deps.normalizeQuery(query);
|
|
63
|
+
out.push({
|
|
64
|
+
query,
|
|
65
|
+
query_canonical: canonical === "" ? query : canonical,
|
|
66
|
+
intent_code: deps.classifyIntentCode(query),
|
|
67
|
+
normalizer_version: deps.normalizerVersion,
|
|
68
|
+
intent_version: deps.intentVersion
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
return out;
|
|
72
|
+
}
|
|
73
|
+
function createQueryDimStore({ dataSource }) {
|
|
74
|
+
async function exists(key, prefix) {
|
|
75
|
+
return (await dataSource.list(prefix)).includes(key);
|
|
76
|
+
}
|
|
77
|
+
return {
|
|
78
|
+
parquetKey: queryDimParquetKey,
|
|
79
|
+
async write(ctx, records, builtAt) {
|
|
80
|
+
const parquetKey = queryDimParquetKey(ctx);
|
|
81
|
+
const bytes = encodeRowsToParquetFlex(records, {
|
|
82
|
+
columns: QUERY_DIM_COLUMNS,
|
|
83
|
+
sortKey: ["query"]
|
|
84
|
+
});
|
|
85
|
+
await dataSource.write(parquetKey, bytes);
|
|
86
|
+
const meta = {
|
|
87
|
+
version: 1,
|
|
88
|
+
builtAt,
|
|
89
|
+
rowCount: records.length,
|
|
90
|
+
normalizerVersion: records[0]?.normalizer_version ?? 0,
|
|
91
|
+
intentVersion: records[0]?.intent_version ?? 0
|
|
92
|
+
};
|
|
93
|
+
await dataSource.write(queryDimMetaKey(ctx), new TextEncoder().encode(JSON.stringify(meta)));
|
|
94
|
+
return {
|
|
95
|
+
parquetKey,
|
|
96
|
+
rowCount: records.length
|
|
97
|
+
};
|
|
98
|
+
},
|
|
99
|
+
async loadMeta(ctx) {
|
|
100
|
+
const key = queryDimMetaKey(ctx);
|
|
101
|
+
if (!await exists(key, `${queryDimPrefix(ctx)}/`)) return null;
|
|
102
|
+
const bytes = await dataSource.read(key);
|
|
103
|
+
return JSON.parse(new TextDecoder().decode(bytes));
|
|
104
|
+
},
|
|
105
|
+
async loadRecords(ctx) {
|
|
106
|
+
const key = queryDimParquetKey(ctx);
|
|
107
|
+
if (!await exists(key, `${queryDimPrefix(ctx)}/`)) return [];
|
|
108
|
+
return (await decodeParquetToRows(await dataSource.read(key))).map((r) => ({
|
|
109
|
+
query: String(r.query),
|
|
110
|
+
query_canonical: String(r.query_canonical),
|
|
111
|
+
intent_code: Number(r.intent_code),
|
|
112
|
+
normalizer_version: Number(r.normalizer_version),
|
|
113
|
+
intent_version: Number(r.intent_version)
|
|
114
|
+
}));
|
|
115
|
+
}
|
|
116
|
+
};
|
|
117
|
+
}
|
|
19
118
|
const YEAR_MONTH_RE = /^(\d{4})-(\d{2})-/;
|
|
20
119
|
function inspectionIndexKey(ctx) {
|
|
21
120
|
return ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/entities/inspections/index.json` : `u_${ctx.userId}/entities/inspections/index.json`;
|
|
@@ -893,4 +992,4 @@ function createEmptyTypesStore(opts) {
|
|
|
893
992
|
}
|
|
894
993
|
};
|
|
895
994
|
}
|
|
896
|
-
export { INSPECTION_EVENT_COLUMNS, INSPECTION_HISTORY_MAX_BYTES, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createSitemapStore, emptyTypesKey, hashUrl, hashUrlList, indexingMetadataIndexKey, inspectionBaseKey, inspectionEventKey, inspectionEventsPrefix, inspectionHistoryPrefix, inspectionHistoryShardKey, inspectionIndexKey, inspectionParquetKey, sitemapHistoryKey, sitemapIndexKey, sitemapUrlsDeltaKey, sitemapUrlsIndexKey, sitemapUrlsIndexPrefix };
|
|
995
|
+
export { INSPECTION_EVENT_COLUMNS, INSPECTION_HISTORY_MAX_BYTES, buildQueryDimRecords, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createQueryDimStore, createSitemapStore, emptyTypesKey, hashUrl, hashUrlList, indexingMetadataIndexKey, inspectionBaseKey, inspectionEventKey, inspectionEventsPrefix, inspectionHistoryPrefix, inspectionHistoryShardKey, inspectionIndexKey, inspectionParquetKey, queryDimMetaKey, queryDimParquetKey, sitemapHistoryKey, sitemapIndexKey, sitemapUrlsDeltaKey, sitemapUrlsIndexKey, sitemapUrlsIndexPrefix };
|
|
@@ -133,7 +133,7 @@ interface Snapshot {
|
|
|
133
133
|
'sequence-number': number;
|
|
134
134
|
'timestamp-ms': number;
|
|
135
135
|
'manifest-list': string;
|
|
136
|
-
manifests?: Manifest
|
|
136
|
+
manifests?: Manifest[];
|
|
137
137
|
summary: {
|
|
138
138
|
// spec: "value of these fields should be of string type"
|
|
139
139
|
operation: string; // 'spark.app.id'?: string
|
|
@@ -192,7 +192,7 @@ interface MetadataLog {
|
|
|
192
192
|
'timestamp-ms': number;
|
|
193
193
|
'metadata-file': string;
|
|
194
194
|
}
|
|
195
|
-
interface Manifest
|
|
195
|
+
interface Manifest {
|
|
196
196
|
manifest_path: string;
|
|
197
197
|
manifest_length: bigint;
|
|
198
198
|
partition_spec_id: number;
|
|
@@ -297,8 +297,10 @@ function createSqlFragments(config) {
|
|
|
297
297
|
if (isMetricDimension(f.dimension)) continue;
|
|
298
298
|
if (f.dimension === "date") continue;
|
|
299
299
|
if (f.operator === "topLevel") continue;
|
|
300
|
-
const
|
|
301
|
-
const
|
|
300
|
+
const dim = f.dimension;
|
|
301
|
+
const cRef = colRef(tableKey, dimColumn(dim, tableKey));
|
|
302
|
+
const matchExpr = dim === "page" || dim === "queryCanonical" ? dimExprSql(dim, tableKey) : cRef;
|
|
303
|
+
const patternExpr = dim === "queryCanonical" ? matchExpr : cRef;
|
|
302
304
|
switch (f.operator) {
|
|
303
305
|
case "equals":
|
|
304
306
|
preds.push(sql`${matchExpr} = ${f.expression}`);
|
|
@@ -307,16 +309,16 @@ function createSqlFragments(config) {
|
|
|
307
309
|
preds.push(sql`${matchExpr} != ${f.expression}`);
|
|
308
310
|
break;
|
|
309
311
|
case "contains":
|
|
310
|
-
preds.push(sql`${
|
|
312
|
+
preds.push(sql`${patternExpr} LIKE ${`%${escapeLike(f.expression)}%`} ESCAPE '\\'`);
|
|
311
313
|
break;
|
|
312
314
|
case "notContains":
|
|
313
|
-
preds.push(sql`${
|
|
315
|
+
preds.push(sql`${patternExpr} NOT LIKE ${`%${escapeLike(f.expression)}%`} ESCAPE '\\'`);
|
|
314
316
|
break;
|
|
315
317
|
case "includingRegex":
|
|
316
|
-
preds.push(regexPredicate(
|
|
318
|
+
preds.push(regexPredicate(patternExpr, f.expression, false));
|
|
317
319
|
break;
|
|
318
320
|
case "excludingRegex":
|
|
319
|
-
preds.push(regexPredicate(
|
|
321
|
+
preds.push(regexPredicate(patternExpr, f.expression, true));
|
|
320
322
|
break;
|
|
321
323
|
}
|
|
322
324
|
}
|
package/dist/entities.d.mts
CHANGED
|
@@ -1,6 +1,60 @@
|
|
|
1
1
|
import { DataSource } from "./_chunks/storage.mjs";
|
|
2
2
|
import { ScheduleState } from "./schedule.mjs";
|
|
3
3
|
import { ColumnDef, TenantCtx } from "@gscdump/contracts";
|
|
4
|
+
interface QueryDimRecord {
|
|
5
|
+
query: string;
|
|
6
|
+
/** Lexical canonical, never empty: NULL/'' folds to the raw query. */
|
|
7
|
+
query_canonical: string;
|
|
8
|
+
/** Packed search-intent code (see `@gscdump/analysis` `encodeIntent`). */
|
|
9
|
+
intent_code: number;
|
|
10
|
+
normalizer_version: number;
|
|
11
|
+
intent_version: number;
|
|
12
|
+
}
|
|
13
|
+
/** JSON sidecar: versions + freshness, readable without decoding the parquet. */
|
|
14
|
+
interface QueryDimMeta {
|
|
15
|
+
version: 1;
|
|
16
|
+
builtAt: number;
|
|
17
|
+
rowCount: number;
|
|
18
|
+
normalizerVersion: number;
|
|
19
|
+
intentVersion: number;
|
|
20
|
+
}
|
|
21
|
+
declare function queryDimParquetKey(ctx: TenantCtx): string;
|
|
22
|
+
declare function queryDimMetaKey(ctx: TenantCtx): string;
|
|
23
|
+
/**
|
|
24
|
+
* Injected derivation. `engine` never imports `@gscdump/analysis`; the host
|
|
25
|
+
* passes `normalizeQuery` / `classifyIntentCode` (e.g. `encodeIntent ∘
|
|
26
|
+
* classifyQueryIntent`) plus their version constants.
|
|
27
|
+
*/
|
|
28
|
+
interface QueryDimDeps {
|
|
29
|
+
normalizeQuery: (query: string) => string;
|
|
30
|
+
normalizerVersion: number;
|
|
31
|
+
/** Returns the packed intent code for a raw query. */
|
|
32
|
+
classifyIntentCode: (query: string) => number;
|
|
33
|
+
intentVersion: number;
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Pure: distinct raw queries → dimension records. De-dupes, drops empties, and
|
|
37
|
+
* folds an empty/whitespace canonical back to the raw query so the key is
|
|
38
|
+
* total (matches the read path's `COALESCE(NULLIF(query_canonical, ''), query)`).
|
|
39
|
+
*/
|
|
40
|
+
declare function buildQueryDimRecords(queries: Iterable<string>, deps: QueryDimDeps): QueryDimRecord[];
|
|
41
|
+
interface QueryDimStore {
|
|
42
|
+
parquetKey: (ctx: TenantCtx) => string;
|
|
43
|
+
/** Write the parquet + JSON sidecar. Last-write-wins; no history. */
|
|
44
|
+
write: (ctx: TenantCtx, records: readonly QueryDimRecord[], builtAt: number) => Promise<{
|
|
45
|
+
parquetKey: string;
|
|
46
|
+
rowCount: number;
|
|
47
|
+
}>;
|
|
48
|
+
/** Read the sidecar (versions + freshness), or null on first build. */
|
|
49
|
+
loadMeta: (ctx: TenantCtx) => Promise<QueryDimMeta | null>;
|
|
50
|
+
/** Decode the dimension rows (test/inspection; reads JOIN the parquet by key). */
|
|
51
|
+
loadRecords: (ctx: TenantCtx) => Promise<QueryDimRecord[]>;
|
|
52
|
+
}
|
|
53
|
+
declare function createQueryDimStore({
|
|
54
|
+
dataSource
|
|
55
|
+
}: {
|
|
56
|
+
dataSource: DataSource;
|
|
57
|
+
}): QueryDimStore;
|
|
4
58
|
/**
|
|
5
59
|
* GSC URL inspection result fields we persist. Mirrors the
|
|
6
60
|
* `searchconsole_v1.Schema$UrlInspectionResult` shape but as plain JSON
|
|
@@ -442,4 +496,4 @@ interface CreateEmptyTypesStoreOptions {
|
|
|
442
496
|
now?: () => number;
|
|
443
497
|
}
|
|
444
498
|
declare function createEmptyTypesStore(opts: CreateEmptyTypesStoreOptions): EmptyTypesStore;
|
|
445
|
-
export { CreateEmptyTypesStoreOptions, CreateIndexingMetadataStoreOptions, CreateInspectionStoreOptions, CreateSitemapStoreOptions, DateRange, DeltaEntry, EmptyTypesDoc, EmptyTypesStore, INSPECTION_EVENT_COLUMNS, INSPECTION_HISTORY_MAX_BYTES, IndexingMetadataIndex, IndexingMetadataRecord, IndexingMetadataStore, InspectionEventRow, InspectionHistoryShard, InspectionIndex, InspectionParquetRow, InspectionRecord, InspectionStore, LoadUrlsOptions, ParsedUrl, ReconcileResult, SitemapHistoryDoc, SitemapIndex, SitemapRecord, SitemapStore, SitemapUrlRecord, SnapshotUrlsResult, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createSitemapStore, emptyTypesKey, hashUrl, hashUrlList, indexingMetadataIndexKey, inspectionBaseKey, inspectionEventKey, inspectionEventsPrefix, inspectionHistoryPrefix, inspectionHistoryShardKey, inspectionIndexKey, inspectionParquetKey, sitemapHistoryKey, sitemapIndexKey, sitemapUrlsDeltaKey, sitemapUrlsIndexKey, sitemapUrlsIndexPrefix };
|
|
499
|
+
export { CreateEmptyTypesStoreOptions, CreateIndexingMetadataStoreOptions, CreateInspectionStoreOptions, CreateSitemapStoreOptions, DateRange, DeltaEntry, EmptyTypesDoc, EmptyTypesStore, INSPECTION_EVENT_COLUMNS, INSPECTION_HISTORY_MAX_BYTES, IndexingMetadataIndex, IndexingMetadataRecord, IndexingMetadataStore, InspectionEventRow, InspectionHistoryShard, InspectionIndex, InspectionParquetRow, InspectionRecord, InspectionStore, LoadUrlsOptions, ParsedUrl, QueryDimDeps, QueryDimMeta, QueryDimRecord, QueryDimStore, ReconcileResult, SitemapHistoryDoc, SitemapIndex, SitemapRecord, SitemapStore, SitemapUrlRecord, SnapshotUrlsResult, buildQueryDimRecords, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createQueryDimStore, createSitemapStore, emptyTypesKey, hashUrl, hashUrlList, indexingMetadataIndexKey, inspectionBaseKey, inspectionEventKey, inspectionEventsPrefix, inspectionHistoryPrefix, inspectionHistoryShardKey, inspectionIndexKey, inspectionParquetKey, queryDimMetaKey, queryDimParquetKey, sitemapHistoryKey, sitemapIndexKey, sitemapUrlsDeltaKey, sitemapUrlsIndexKey, sitemapUrlsIndexPrefix };
|
package/dist/entities.mjs
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { INSPECTION_EVENT_COLUMNS, INSPECTION_HISTORY_MAX_BYTES, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createSitemapStore, emptyTypesKey, hashUrl, hashUrlList, indexingMetadataIndexKey, inspectionBaseKey, inspectionEventKey, inspectionEventsPrefix, inspectionHistoryPrefix, inspectionHistoryShardKey, inspectionIndexKey, inspectionParquetKey, sitemapHistoryKey, sitemapIndexKey, sitemapUrlsDeltaKey, sitemapUrlsIndexKey, sitemapUrlsIndexPrefix } from "./_chunks/entities.mjs";
|
|
2
|
-
export { INSPECTION_EVENT_COLUMNS, INSPECTION_HISTORY_MAX_BYTES, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createSitemapStore, emptyTypesKey, hashUrl, hashUrlList, indexingMetadataIndexKey, inspectionBaseKey, inspectionEventKey, inspectionEventsPrefix, inspectionHistoryPrefix, inspectionHistoryShardKey, inspectionIndexKey, inspectionParquetKey, sitemapHistoryKey, sitemapIndexKey, sitemapUrlsDeltaKey, sitemapUrlsIndexKey, sitemapUrlsIndexPrefix };
|
|
1
|
+
import { INSPECTION_EVENT_COLUMNS, INSPECTION_HISTORY_MAX_BYTES, buildQueryDimRecords, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createQueryDimStore, createSitemapStore, emptyTypesKey, hashUrl, hashUrlList, indexingMetadataIndexKey, inspectionBaseKey, inspectionEventKey, inspectionEventsPrefix, inspectionHistoryPrefix, inspectionHistoryShardKey, inspectionIndexKey, inspectionParquetKey, queryDimMetaKey, queryDimParquetKey, sitemapHistoryKey, sitemapIndexKey, sitemapUrlsDeltaKey, sitemapUrlsIndexKey, sitemapUrlsIndexPrefix } from "./_chunks/entities.mjs";
|
|
2
|
+
export { INSPECTION_EVENT_COLUMNS, INSPECTION_HISTORY_MAX_BYTES, buildQueryDimRecords, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createQueryDimStore, createSitemapStore, emptyTypesKey, hashUrl, hashUrlList, indexingMetadataIndexKey, inspectionBaseKey, inspectionEventKey, inspectionEventsPrefix, inspectionHistoryPrefix, inspectionHistoryShardKey, inspectionIndexKey, inspectionParquetKey, queryDimMetaKey, queryDimParquetKey, sitemapHistoryKey, sitemapIndexKey, sitemapUrlsDeltaKey, sitemapUrlsIndexKey, sitemapUrlsIndexPrefix };
|
package/dist/rollups.d.mts
CHANGED
|
@@ -244,6 +244,12 @@ declare function runWindowed(opts: {
|
|
|
244
244
|
start: string;
|
|
245
245
|
end: string;
|
|
246
246
|
}) => string;
|
|
247
|
+
/**
|
|
248
|
+
* Extra named file sets merged into every window's `runSQL` (alongside the
|
|
249
|
+
* windowed `FILES`). Use to JOIN a non-windowed sidecar (e.g. the query
|
|
250
|
+
* dimension parquet via `{ QUERY_DIM: { keys: [...] } }`) inside `sqlFor`.
|
|
251
|
+
*/
|
|
252
|
+
extraFileSets?: Record<string, FileSetRef>;
|
|
247
253
|
}): Promise<Row$1[]>;
|
|
248
254
|
/**
|
|
249
255
|
* Daily totals across the full history. One row per (date, table) with
|
package/dist/rollups.mjs
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import "./_chunks/layout.mjs";
|
|
2
2
|
import { engineErrors } from "./errors.mjs";
|
|
3
3
|
import { encodeRowsToParquetFlex } from "./adapters/hyparquet.mjs";
|
|
4
|
-
import { createIndexingMetadataStore, createSitemapStore, inspectionParquetKey, sitemapUrlsIndexPrefix } from "./_chunks/entities.mjs";
|
|
4
|
+
import { createIndexingMetadataStore, createQueryDimStore, createSitemapStore, inspectionParquetKey, sitemapUrlsIndexPrefix } from "./_chunks/entities.mjs";
|
|
5
5
|
import { MS_PER_DAY } from "gscdump";
|
|
6
6
|
function rollupPrefix(ctx, searchType) {
|
|
7
7
|
const base = ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/rollups` : `u_${ctx.userId}/rollups`;
|
|
@@ -237,10 +237,13 @@ async function runWindowed(opts) {
|
|
|
237
237
|
const result = await opts.engine.runSQL({
|
|
238
238
|
ctx: opts.ctx,
|
|
239
239
|
table: opts.table,
|
|
240
|
-
fileSets: {
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
240
|
+
fileSets: {
|
|
241
|
+
FILES: {
|
|
242
|
+
table: opts.table,
|
|
243
|
+
partitions: w.partitions
|
|
244
|
+
},
|
|
245
|
+
...opts.extraFileSets
|
|
246
|
+
},
|
|
244
247
|
sql: opts.sqlFor(w),
|
|
245
248
|
...opts.searchType !== void 0 ? { searchType: opts.searchType } : {}
|
|
246
249
|
});
|
|
@@ -638,23 +641,41 @@ const queryCanonicalDailyRollup = {
|
|
|
638
641
|
}
|
|
639
642
|
],
|
|
640
643
|
parquetSortKey: ["date", "query_canonical"],
|
|
641
|
-
async build({ engine, ctx, searchType }) {
|
|
644
|
+
async build({ engine, ctx, dataSource, searchType }) {
|
|
645
|
+
const dimStore = createQueryDimStore({ dataSource });
|
|
646
|
+
const useDim = await dimStore.loadMeta(ctx) !== null;
|
|
647
|
+
const canonExpr = useDim ? `COALESCE(qd.query_canonical, NULLIF(q.query_canonical, ''), q.query)` : `COALESCE(NULLIF(query_canonical, ''), query)`;
|
|
642
648
|
return (await runWindowed({
|
|
643
649
|
engine,
|
|
644
650
|
ctx,
|
|
645
651
|
table: "queries",
|
|
646
652
|
...searchType !== void 0 ? { searchType } : {},
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
653
|
+
...useDim ? { extraFileSets: { QUERY_DIM: {
|
|
654
|
+
table: "queries",
|
|
655
|
+
keys: [dimStore.parquetKey(ctx)]
|
|
656
|
+
} } } : {},
|
|
657
|
+
sqlFor: useDim ? (w) => `
|
|
658
|
+
SELECT
|
|
659
|
+
${canonExpr} AS query_canonical,
|
|
660
|
+
CAST(q.date AS VARCHAR) AS date,
|
|
661
|
+
SUM(q.clicks)::BIGINT AS clicks,
|
|
662
|
+
SUM(q.impressions)::BIGINT AS impressions,
|
|
663
|
+
SUM(q.sum_position)::DOUBLE AS sum_position
|
|
664
|
+
FROM read_parquet({{FILES}}, union_by_name = true) q
|
|
665
|
+
LEFT JOIN read_parquet({{QUERY_DIM}}, union_by_name = true) qd ON q.query = qd.query
|
|
666
|
+
WHERE q.date >= '${w.start}' AND q.date <= '${w.end}'
|
|
667
|
+
GROUP BY ${canonExpr}, q.date
|
|
668
|
+
` : (w) => `
|
|
669
|
+
SELECT
|
|
670
|
+
${canonExpr} AS query_canonical,
|
|
671
|
+
CAST(date AS VARCHAR) AS date,
|
|
672
|
+
SUM(clicks)::BIGINT AS clicks,
|
|
673
|
+
SUM(impressions)::BIGINT AS impressions,
|
|
674
|
+
SUM(sum_position)::DOUBLE AS sum_position
|
|
675
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
676
|
+
WHERE date >= '${w.start}' AND date <= '${w.end}'
|
|
677
|
+
GROUP BY ${canonExpr}, date
|
|
678
|
+
`
|
|
658
679
|
})).map((r) => ({
|
|
659
680
|
query_canonical: String(r.query_canonical),
|
|
660
681
|
date: String(r.date),
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@gscdump/engine",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "0.
|
|
4
|
+
"version": "0.31.0",
|
|
5
5
|
"description": "Append-only Parquet/DuckDB storage engine + planner + adapters for the gscdump pipeline. Node + edge runtimes; opt-in heavy peers.",
|
|
6
6
|
"author": {
|
|
7
7
|
"name": "Harlan Wilton",
|
|
@@ -191,8 +191,8 @@
|
|
|
191
191
|
"hyparquet": "^1.26.1",
|
|
192
192
|
"hyparquet-writer": "^0.16.1",
|
|
193
193
|
"proper-lockfile": "^4.1.2",
|
|
194
|
-
"@gscdump/contracts": "0.
|
|
195
|
-
"gscdump": "0.
|
|
194
|
+
"@gscdump/contracts": "0.31.0",
|
|
195
|
+
"gscdump": "0.31.0"
|
|
196
196
|
},
|
|
197
197
|
"devDependencies": {
|
|
198
198
|
"@duckdb/duckdb-wasm": "^1.32.0",
|