@gscdump/engine 0.20.3 → 0.21.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -264,7 +264,7 @@ async function gcOrphansImpl(deps, now, graceMs, opts = {}) {
264
264
  }
265
265
  return { deleted: retired.length + sweptOrphans + hourlyDeleted };
266
266
  }
267
- const URL_PURGE_TABLES = ["pages", "page_keywords"];
267
+ const URL_PURGE_TABLES = ["pages", "page_queries"];
268
268
  const MAX_DAY_BYTES = 100 * 1024 * 1024;
269
269
  const URL_COLUMNS = /* @__PURE__ */ new Set();
270
270
  for (const t of Object.keys(SCHEMAS)) for (const col of SCHEMAS[t].columns) if (col.name === "url") URL_COLUMNS.add(`${t}:url`);
@@ -0,0 +1,67 @@
1
+ import { t as SCHEMAS } from "./schema.mjs";
2
+ const ICEBERG_TABLES = [
3
+ "pages",
4
+ "queries",
5
+ "countries",
6
+ "page_queries",
7
+ "dates"
8
+ ];
9
+ const ICEBERG_PARTITION_COLUMNS = [{
10
+ name: "site_id",
11
+ type: "STRING",
12
+ required: true,
13
+ fieldId: 1
14
+ }, {
15
+ name: "search_type",
16
+ type: "STRING",
17
+ required: true,
18
+ fieldId: 2
19
+ }];
20
+ const ICEBERG_FIELD_ID_BASE = 3;
21
+ const ICEBERG_PARTITION_SPEC = [
22
+ {
23
+ sourceColumn: "site_id",
24
+ transform: "identity",
25
+ name: "site_id"
26
+ },
27
+ {
28
+ sourceColumn: "search_type",
29
+ transform: "identity",
30
+ name: "search_type"
31
+ },
32
+ {
33
+ sourceColumn: "date",
34
+ transform: "month",
35
+ name: "date_month"
36
+ }
37
+ ];
38
+ function mapColumnType(t) {
39
+ switch (t) {
40
+ case "VARCHAR": return "STRING";
41
+ case "INTEGER": return "INT";
42
+ case "BIGINT": return "LONG";
43
+ case "DOUBLE": return "DOUBLE";
44
+ case "DATE": return "DATE";
45
+ }
46
+ }
47
+ function icebergTableSpec(table) {
48
+ const base = SCHEMAS[table];
49
+ const dataColumns = base.columns.map((col, i) => ({
50
+ name: col.name,
51
+ type: mapColumnType(col.type),
52
+ required: !col.nullable,
53
+ fieldId: 3 + i
54
+ }));
55
+ return {
56
+ table,
57
+ columns: [...ICEBERG_PARTITION_COLUMNS, ...dataColumns],
58
+ partitionSpec: ICEBERG_PARTITION_SPEC,
59
+ identityColumns: [
60
+ "site_id",
61
+ "search_type",
62
+ ...base.sortKey
63
+ ]
64
+ };
65
+ }
66
+ const ICEBERG_SCHEMAS = Object.fromEntries(ICEBERG_TABLES.map((t) => [t, icebergTableSpec(t)]));
67
+ export { ICEBERG_TABLES as a, ICEBERG_SCHEMAS as i, ICEBERG_PARTITION_COLUMNS as n, icebergTableSpec as o, ICEBERG_PARTITION_SPEC as r, ICEBERG_FIELD_ID_BASE as t };
@@ -120,7 +120,7 @@ interface ReduceContext<TRow extends Row$1 = Row$1> {
120
120
  }
121
121
  /**
122
122
  * Unified analyzer contract. `TRow` lets authors narrow from the default
123
- * `Row = Record<string, unknown>` to a typed row shape (e.g. `KeywordRow`)
123
+ * `Row = Record<string, unknown>` to a typed row shape (e.g. `QueriesRow`)
124
124
  * when their reducer assumes specific columns exist — catches drift between
125
125
  * `build` (SELECT list) and `reduce` (column access) at compile time.
126
126
  */
@@ -27,7 +27,7 @@ const LOGICAL_DATASETS = {
27
27
  surfaces: ["api", "stored"]
28
28
  }
29
29
  } },
30
- keywords: { dimensions: {
30
+ queries: { dimensions: {
31
31
  query: {
32
32
  column: "query",
33
33
  surfaces: ["api", "stored"]
@@ -41,7 +41,7 @@ const LOGICAL_DATASETS = {
41
41
  surfaces: ["api", "stored"]
42
42
  }
43
43
  } },
44
- page_keywords: { dimensions: {
44
+ page_queries: { dimensions: {
45
45
  page: {
46
46
  column: "url",
47
47
  surfaces: ["api", "stored"]
@@ -69,16 +69,10 @@ const LOGICAL_DATASETS = {
69
69
  surfaces: ["api", "stored"]
70
70
  }
71
71
  } },
72
- devices: { dimensions: {
73
- device: {
74
- column: "device",
75
- surfaces: ["api", "stored"]
76
- },
77
- date: {
78
- column: "date",
79
- surfaces: ["api", "stored"]
80
- }
81
- } },
72
+ dates: { dimensions: { date: {
73
+ column: "date",
74
+ surfaces: ["api", "stored"]
75
+ } } },
82
76
  search_appearance: { dimensions: {
83
77
  searchAppearance: {
84
78
  column: "searchAppearance",
@@ -584,13 +578,13 @@ function buildExtrasQueries(state, options) {
584
578
  const dims = plan.groupByDimensions;
585
579
  const extras = [];
586
580
  if (!dims.includes("queryCanonical")) return extras;
587
- const keywordsKey = adapter.tableKeyForDataset("keywords");
588
- const t = adapter.schema[keywordsKey];
589
- const table = adapter.tableRef(keywordsKey);
581
+ const queriesKey = adapter.tableKeyForDataset("queries");
582
+ const t = adapter.schema[queriesKey];
583
+ const table = adapter.tableRef(queriesKey);
590
584
  const whereParts = [];
591
- if (adapter.siteIdColRef && siteId != null) whereParts.push(sql`${adapter.siteIdColRef(keywordsKey)} = ${siteId}`);
592
- whereParts.push(sql`${adapter.dateColRef(keywordsKey)} >= ${plan.dateRange.startDate}`);
593
- whereParts.push(sql`${adapter.dateColRef(keywordsKey)} <= ${plan.dateRange.endDate}`);
585
+ if (adapter.siteIdColRef && siteId != null) whereParts.push(sql`${adapter.siteIdColRef(queriesKey)} = ${siteId}`);
586
+ whereParts.push(sql`${adapter.dateColRef(queriesKey)} >= ${plan.dateRange.startDate}`);
587
+ whereParts.push(sql`${adapter.dateColRef(queriesKey)} <= ${plan.dateRange.endDate}`);
594
588
  const whereExpr = whereParts.length > 0 ? sql`WHERE ${joinAnd(whereParts)}` : sql``;
595
589
  const outerQueryCol = sql.raw("query");
596
590
  const compiled = compileCollapsed(adapter, sql`WITH per_variant AS (SELECT ${t.query_canonical} as joinKey, ${t.query} as query, SUM(${t.clicks}) as clicks, SUM(${t.impressions}) as impressions, SUM(${t.sum_position}) as sum_pos, ROW_NUMBER() OVER (PARTITION BY ${t.query_canonical} ORDER BY SUM(${t.clicks}) DESC) as rn, COUNT(*) OVER (PARTITION BY ${t.query_canonical}) as variantCount FROM ${table} ${whereExpr} GROUP BY ${t.query_canonical}, ${t.query}) SELECT joinKey, MAX(variantCount) as variantCount, MAX(CASE WHEN rn = 1 THEN ${outerQueryCol} END) as canonicalName, GROUP_CONCAT(CASE WHEN rn <= 10 THEN ${outerQueryCol} || ':::' || clicks || ':::' || impressions || ':::' || CAST(ROUND(CAST(sum_pos AS REAL) / NULLIF(impressions, 0) + 1, 1) AS TEXT) END, '||') as variants FROM per_variant GROUP BY joinKey`);
@@ -735,10 +729,10 @@ const PG_BASE_CONFIG = {
735
729
  schema: drizzleSchema,
736
730
  datasetToTableKey: {
737
731
  pages: "pages",
738
- keywords: "keywords",
739
- page_keywords: "page_keywords",
732
+ queries: "queries",
733
+ page_queries: "page_queries",
740
734
  countries: "countries",
741
- devices: "devices",
735
+ dates: "dates",
742
736
  search_appearance: "search_appearance",
743
737
  hourly_pages: "hourly_pages"
744
738
  },