@gscdump/engine 0.17.5 → 0.18.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_chunks/compiler.mjs +2 -1
- package/dist/_chunks/duckdb.d.mts +1 -1
- package/dist/_chunks/engine.mjs +87 -4
- package/dist/_chunks/planner.d.mts +1 -1
- package/dist/_chunks/registry.d.mts +1 -1
- package/dist/_chunks/resolver.mjs +2 -1
- package/dist/_chunks/schema.d.mts +305 -80
- package/dist/_chunks/schema.mjs +19 -3
- package/dist/_chunks/storage.d.mts +37 -2
- package/dist/_chunks/storage.mjs +4 -1
- package/dist/adapters/filesystem.d.mts +1 -1
- package/dist/adapters/filesystem.mjs +1 -1
- package/dist/adapters/hyparquet.d.mts +1 -1
- package/dist/adapters/node.d.mts +1 -1
- package/dist/adapters/node.mjs +1 -1
- package/dist/adapters/r2-manifest.d.mts +1 -1
- package/dist/adapters/r2-manifest.mjs +1 -1
- package/dist/contracts.d.mts +2 -2
- package/dist/index.d.mts +38 -4
- package/dist/index.mjs +6 -5
- package/dist/ingest.d.mts +1 -1
- package/dist/ingest.mjs +17 -1
- package/dist/planner.d.mts +2 -2
- package/dist/resolver/index.d.mts +10 -2
- package/dist/rollups.d.mts +36 -5
- package/dist/rollups.mjs +38 -1
- package/dist/schema.d.mts +2 -2
- package/dist/schema.mjs +2 -2
- package/dist/scope.d.mts +6 -0
- package/dist/scope.mjs +5 -3
- package/dist/source/index.d.mts +1 -1
- package/package.json +3 -3
package/dist/_chunks/schema.mjs
CHANGED
|
@@ -40,13 +40,20 @@ const search_appearance = pgTable("search_appearance", {
|
|
|
40
40
|
date: dateCol(),
|
|
41
41
|
...metricCols()
|
|
42
42
|
});
|
|
43
|
+
const hourly_pages = pgTable("hourly_pages", {
|
|
44
|
+
url: varchar("url").notNull(),
|
|
45
|
+
hour: varchar("hour").notNull(),
|
|
46
|
+
date: dateCol(),
|
|
47
|
+
...metricCols()
|
|
48
|
+
});
|
|
43
49
|
const drizzleSchema = {
|
|
44
50
|
pages,
|
|
45
51
|
keywords,
|
|
46
52
|
countries,
|
|
47
53
|
devices,
|
|
48
54
|
page_keywords,
|
|
49
|
-
search_appearance
|
|
55
|
+
search_appearance,
|
|
56
|
+
hourly_pages
|
|
50
57
|
};
|
|
51
58
|
const TABLE_METADATA = {
|
|
52
59
|
pages: {
|
|
@@ -76,6 +83,14 @@ const TABLE_METADATA = {
|
|
|
76
83
|
search_appearance: {
|
|
77
84
|
sortKey: ["date", "searchAppearance"],
|
|
78
85
|
version: 1
|
|
86
|
+
},
|
|
87
|
+
hourly_pages: {
|
|
88
|
+
sortKey: [
|
|
89
|
+
"date",
|
|
90
|
+
"hour",
|
|
91
|
+
"url"
|
|
92
|
+
],
|
|
93
|
+
version: 1
|
|
79
94
|
}
|
|
80
95
|
};
|
|
81
96
|
function pgSqlTypeToColumnType(sqlType) {
|
|
@@ -107,7 +122,8 @@ const METRIC_TABLES = [
|
|
|
107
122
|
"countries",
|
|
108
123
|
"devices",
|
|
109
124
|
"page_keywords",
|
|
110
|
-
"search_appearance"
|
|
125
|
+
"search_appearance",
|
|
126
|
+
"hourly_pages"
|
|
111
127
|
];
|
|
112
128
|
const SCHEMAS = Object.fromEntries(METRIC_TABLES.map((t) => [t, tableSchemaFrom(t)]));
|
|
113
129
|
function currentSchemaVersion(table) {
|
|
@@ -136,4 +152,4 @@ function dimensionToColumn(dim, _table) {
|
|
|
136
152
|
if (dim === "queryCanonical") return "query_canonical";
|
|
137
153
|
return dim;
|
|
138
154
|
}
|
|
139
|
-
export { inferTable as a, countries as c,
|
|
155
|
+
export { inferTable as a, countries as c, hourly_pages as d, keywords as f, search_appearance as h, dimensionToColumn as i, devices as l, pages as m, allTables as n, schemaFor as o, page_keywords as p, currentSchemaVersion as r, TABLE_METADATA as s, SCHEMAS as t, drizzleSchema as u };
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { Row, Row as Row$1, TableName, TableName as TableName$1, TenantCtx, TenantCtx as TenantCtx$1 } from "@gscdump/contracts";
|
|
1
|
+
import { Grain, Grain as Grain$1, Row, Row as Row$1, TableName, TableName as TableName$1, TenantCtx, TenantCtx as TenantCtx$1 } from "@gscdump/contracts";
|
|
2
2
|
import { BuilderState, SearchType, SearchType as SearchType$1 } from "gscdump/query";
|
|
3
3
|
/**
|
|
4
4
|
* Per-tier age threshold in days. Default ladder collapses on these gates:
|
|
@@ -34,6 +34,12 @@ interface WriteCtx extends TenantCtx {
|
|
|
34
34
|
* for different search types coexist without colliding.
|
|
35
35
|
*/
|
|
36
36
|
searchType?: SearchType;
|
|
37
|
+
/**
|
|
38
|
+
* Temporal granularity for this write. `'day'` (default) routes to
|
|
39
|
+
* `writeDay` semantics. `'hour'` routes to `writeHour` — the host (ingest
|
|
40
|
+
* accumulator) interprets this; the engine surfaces both methods directly.
|
|
41
|
+
*/
|
|
42
|
+
grain?: Grain;
|
|
37
43
|
}
|
|
38
44
|
interface QueryCtx extends TenantCtx {
|
|
39
45
|
table?: TableName;
|
|
@@ -45,11 +51,24 @@ interface QueryCtx extends TenantCtx {
|
|
|
45
51
|
* manifest entries written for that type. Mirrors {@link WriteCtx.searchType}.
|
|
46
52
|
*/
|
|
47
53
|
searchType?: SearchType;
|
|
54
|
+
/**
|
|
55
|
+
* Temporal granularity for this query. `'day'` (default) reads daily
|
|
56
|
+
* partitions only and skips any `hourly/` partitions. `'hour'` reads only
|
|
57
|
+
* hourly partitions. The two never mix — daily-from-hourly aggregation
|
|
58
|
+
* happens through the `discover-daily-from-hourly` rollup, not at read.
|
|
59
|
+
*/
|
|
60
|
+
grain?: Grain;
|
|
48
61
|
}
|
|
49
62
|
interface GcCtx {
|
|
50
63
|
now?: () => number;
|
|
51
64
|
userId?: string;
|
|
52
65
|
siteId?: string;
|
|
66
|
+
/**
|
|
67
|
+
* Override retention for hourly partitions in milliseconds. Defaults to
|
|
68
|
+
* 90 days inside `gcOrphansImpl`. Hourly is GC-only — never compacted —
|
|
69
|
+
* so this is the only lifecycle knob for `hourly/{date}` entries.
|
|
70
|
+
*/
|
|
71
|
+
hourlyRetentionMs?: number;
|
|
53
72
|
}
|
|
54
73
|
/**
|
|
55
74
|
* Compaction tier of a manifest entry. Determines which compactor stage may
|
|
@@ -414,6 +433,15 @@ interface RunSQLOptions {
|
|
|
414
433
|
}
|
|
415
434
|
interface StorageEngine {
|
|
416
435
|
writeDay: (ctx: WriteCtx, rows: Row[]) => Promise<void>;
|
|
436
|
+
/**
|
|
437
|
+
* Read-merge-write a single-day hourly partition. Idempotent on
|
|
438
|
+
* `(url, hour)` (last-write-wins): callers can re-fire the same slice
|
|
439
|
+
* after a retry and the partition converges. `ctx.date` is the PT
|
|
440
|
+
* calendar day; rows must carry `hour` + `date` fields. Partition shape
|
|
441
|
+
* `hourly/{date}`; coexists with daily partitions in the same `table`
|
|
442
|
+
* prefix (`hourly_pages`).
|
|
443
|
+
*/
|
|
444
|
+
writeHour: (ctx: WriteCtx, rows: Row[]) => Promise<void>;
|
|
417
445
|
query: (ctx: QueryCtx, state: BuilderState) => Promise<QueryResult>;
|
|
418
446
|
/**
|
|
419
447
|
* Run arbitrary SQL resolved against named partition sets. Composes
|
|
@@ -472,5 +500,12 @@ interface EngineOptions {
|
|
|
472
500
|
now?: () => number;
|
|
473
501
|
}
|
|
474
502
|
declare function dayPartition(date: string): string;
|
|
503
|
+
/**
|
|
504
|
+
* Hourly partition keyed by the PT calendar day (`YYYY-MM-DD`). One parquet
|
|
505
|
+
* per day holds 24 hourly buckets — read-merge-write keeps `(url, hour)`
|
|
506
|
+
* idempotency across retries. Names sort lexically alongside daily ones but
|
|
507
|
+
* never collide because of the `hourly/` prefix.
|
|
508
|
+
*/
|
|
509
|
+
declare function hourPartition(date: string): string;
|
|
475
510
|
declare function objectKey(ctx: TenantCtx, table: TableName, partition: string, version: number, searchType?: SearchType): string;
|
|
476
|
-
export {
|
|
511
|
+
export { SyncStateKind as A, hourPartition as B, Row$1 as C, SyncState as D, StorageEngine as E, WatermarkFilter as F, RAW_DAILY_COMPACT_THRESHOLD as G, inferSearchType as H, WatermarkScope as I, countRawDailies as K, WriteCtx as L, TableName$1 as M, TenantCtx$1 as N, SyncStateDetail as O, Watermark as P, WriteResult as R, QueryResult as S, SearchType$1 as T, objectKey as U, inferLegacyTier as V, CompactionThresholds as W, PurgeUrlsResult as _, EngineOptions as a, QueryExecuteResult as b, Grain$1 as c, ManifestEntry as d, ManifestPurgeResult as f, PurgeResult as g, PurgeFilter as h, DataSource as i, SyncStateScope as j, SyncStateFilter as k, ListLiveFilter as l, ParquetCodec as m, CompactionTier as n, FileSetRef as o, ManifestStore as p, enumeratePartitions as q, DEFAULT_SEARCH_TYPE as r, GcCtx as s, CodecCtx as t, LockScope as u, QueryCtx as v, RunSQLOptions as w, QueryExecutor as x, QueryExecuteOptions as y, dayPartition as z };
|
package/dist/_chunks/storage.mjs
CHANGED
|
@@ -11,6 +11,9 @@ function inferLegacyTier(entry) {
|
|
|
11
11
|
function dayPartition(date) {
|
|
12
12
|
return `daily/${date}`;
|
|
13
13
|
}
|
|
14
|
+
function hourPartition(date) {
|
|
15
|
+
return `hourly/${date}`;
|
|
16
|
+
}
|
|
14
17
|
function monthPartition(month) {
|
|
15
18
|
return `monthly/${month}`;
|
|
16
19
|
}
|
|
@@ -36,4 +39,4 @@ function objectKey(ctx, table, partition, version, searchType) {
|
|
|
36
39
|
function tenantPrefix(ctx) {
|
|
37
40
|
return ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/` : `u_${ctx.userId}/`;
|
|
38
41
|
}
|
|
39
|
-
export {
|
|
42
|
+
export { inferSearchType as a, objectKey as c, tenantPrefix as d, weekPartition as f, inferLegacyTier as i, quarterOfMonth as l, dayPartition as n, mondayOfWeek as o, hourPartition as r, monthPartition as s, DEFAULT_SEARCH_TYPE as t, quarterPartition as u };
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { a as inferSearchType, i as inferLegacyTier } from "../_chunks/storage.mjs";
|
|
2
2
|
import { dirname, join, resolve } from "node:path";
|
|
3
3
|
import { Buffer } from "node:buffer";
|
|
4
4
|
import { randomBytes } from "node:crypto";
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { C as Row, M as TableName, i as DataSource, m as ParquetCodec, t as CodecCtx } from "../_chunks/storage.mjs";
|
|
2
2
|
import { t as ColumnDef } from "../_chunks/schema.mjs";
|
|
3
3
|
declare function encodeRowsToParquet(table: TableName, rows: readonly Row[]): Uint8Array;
|
|
4
4
|
interface EncodeFlexOptions {
|
package/dist/adapters/node.d.mts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { E as StorageEngine, i as DataSource } from "../_chunks/storage.mjs";
|
|
2
2
|
import { NodeDuckDBOptions, createNodeDuckDBHandle, resetNodeDuckDB } from "./duckdb-node.mjs";
|
|
3
3
|
import { t as SnapshotIndex } from "../_chunks/snapshot.mjs";
|
|
4
4
|
import { Row, TableName } from "@gscdump/contracts";
|
package/dist/adapters/node.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { a as
|
|
1
|
+
import { a as createDuckDBCodec, n as createStorageEngine, o as createDuckDBExecutor } from "../_chunks/engine.mjs";
|
|
2
2
|
import { createNodeDuckDBHandle, resetNodeDuckDB } from "./duckdb-node.mjs";
|
|
3
3
|
import { createFilesystemDataSource, createFilesystemManifestStore } from "./filesystem.mjs";
|
|
4
4
|
import path from "node:path";
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { a as inferSearchType, i as inferLegacyTier } from "../_chunks/storage.mjs";
|
|
2
2
|
const SHARD_RE = /^u_[^/]+\/manifest\/(?<siteId>[^/]+)\/(?<table>[^/]+)\/HEAD$/;
|
|
3
3
|
function defaultSnapshotId() {
|
|
4
4
|
return `${Date.now()}-${Math.random().toString(36).slice(2, 10)}`;
|
package/dist/contracts.d.mts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { A as
|
|
2
|
-
export { CodecCtx, CompactionTier, DataSource, EngineOptions, FileSetRef, GcCtx, ListLiveFilter, LockScope, ManifestEntry, ManifestStore, ParquetCodec, QueryCtx, QueryExecuteOptions, QueryExecuteResult, QueryExecutor, QueryResult, Row, RunSQLOptions, SearchType, StorageEngine, SyncState, SyncStateDetail, SyncStateFilter, SyncStateKind, SyncStateScope, TableName, TenantCtx, Watermark, WatermarkFilter, WatermarkScope, WriteCtx, WriteResult };
|
|
1
|
+
import { A as SyncStateKind, C as Row, D as SyncState, E as StorageEngine, F as WatermarkFilter, I as WatermarkScope, L as WriteCtx, M as TableName, N as TenantCtx, O as SyncStateDetail, P as Watermark, R as WriteResult, S as QueryResult, T as SearchType, a as EngineOptions, b as QueryExecuteResult, d as ManifestEntry, i as DataSource, j as SyncStateScope, k as SyncStateFilter, l as ListLiveFilter, m as ParquetCodec, n as CompactionTier, o as FileSetRef, p as ManifestStore, s as GcCtx, t as CodecCtx, u as LockScope, v as QueryCtx, w as RunSQLOptions, x as QueryExecutor, y as QueryExecuteOptions } from "./_chunks/storage.mjs";
|
|
2
|
+
export type { CodecCtx, CompactionTier, DataSource, EngineOptions, FileSetRef, GcCtx, ListLiveFilter, LockScope, ManifestEntry, ManifestStore, ParquetCodec, QueryCtx, QueryExecuteOptions, QueryExecuteResult, QueryExecutor, QueryResult, Row, RunSQLOptions, SearchType, StorageEngine, SyncState, SyncStateDetail, SyncStateFilter, SyncStateKind, SyncStateScope, TableName, TenantCtx, Watermark, WatermarkFilter, WatermarkScope, WriteCtx, WriteResult };
|
package/dist/index.d.mts
CHANGED
|
@@ -1,21 +1,49 @@
|
|
|
1
|
-
import { A as
|
|
1
|
+
import { A as SyncStateKind, B as hourPartition, C as Row, D as SyncState, E as StorageEngine, F as WatermarkFilter, G as RAW_DAILY_COMPACT_THRESHOLD, H as inferSearchType, I as WatermarkScope, K as countRawDailies, L as WriteCtx, M as TableName, N as TenantCtx, O as SyncStateDetail, P as Watermark, R as WriteResult, S as QueryResult, T as SearchType, U as objectKey, V as inferLegacyTier, W as CompactionThresholds, _ as PurgeUrlsResult, a as EngineOptions, b as QueryExecuteResult, c as Grain, d as ManifestEntry, f as ManifestPurgeResult, g as PurgeResult, h as PurgeFilter, i as DataSource, j as SyncStateScope, k as SyncStateFilter, l as ListLiveFilter, m as ParquetCodec, n as CompactionTier, o as FileSetRef, p as ManifestStore, q as enumeratePartitions, r as DEFAULT_SEARCH_TYPE, s as GcCtx, t as CodecCtx, u as LockScope, v as QueryCtx, w as RunSQLOptions, x as QueryExecutor, y as QueryExecuteOptions, z as dayPartition } from "./_chunks/storage.mjs";
|
|
2
2
|
import { a as createDuckDBExecutor, i as createDuckDBCodec, n as DuckDBHandle, r as canonicalEmptyParquetSchema, t as DuckDBFactory } from "./_chunks/duckdb.mjs";
|
|
3
|
-
import { _ as
|
|
3
|
+
import { _ as page_keywords, a as allTables, c as inferTable, d as TABLE_METADATA, f as countries, g as keywords, h as hourly_pages, i as TableSchema, m as drizzleSchema, n as ColumnType, o as currentSchemaVersion, p as devices, r as SCHEMAS, s as dimensionToColumn, t as ColumnDef, u as DrizzleSchema, v as pages } from "./_chunks/schema.mjs";
|
|
4
4
|
import { InspectionVerdict, SchedulePolicy, ScheduleState, fixedPolicy, inspectionPolicy, sitemapPolicy } from "./schedule.mjs";
|
|
5
5
|
import { GscApiRow, IngestOptions, RowAccumulator, RowAccumulatorOptions, createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
|
|
6
6
|
import { a as substituteNamedFiles, i as resolveToSQL, n as ResolvedQuery, t as FILES_PLACEHOLDER } from "./_chunks/planner.mjs";
|
|
7
|
+
import { rebuildDailyFromHourly } from "./rollups.mjs";
|
|
7
8
|
import { bindLiterals, formatLiteral } from "./sql-bind.mjs";
|
|
8
|
-
import { Row as Row$1, TableName as TableName$1 } from "@gscdump/contracts";
|
|
9
|
+
import { Grain as Grain$1, Row as Row$1, TableName as TableName$1 } from "@gscdump/contracts";
|
|
9
10
|
declare function coerceRow(row: Row$1): Row$1;
|
|
10
11
|
declare function coerceRows(rows: readonly Row$1[]): Row$1[];
|
|
11
12
|
declare const MAX_DAY_BYTES: number;
|
|
12
13
|
declare function createStorageEngine(opts: EngineOptions): StorageEngine;
|
|
14
|
+
interface GcDeps {
|
|
15
|
+
dataSource: DataSource;
|
|
16
|
+
manifestStore: ManifestStore;
|
|
17
|
+
}
|
|
18
|
+
interface GcOptions {
|
|
19
|
+
userId?: string;
|
|
20
|
+
siteId?: string;
|
|
21
|
+
/**
|
|
22
|
+
* Retention for hourly partitions (`hourly/{date}`) in milliseconds.
|
|
23
|
+
* Defaults to 90 days; entries with `createdAt < now - hourlyRetentionMs`
|
|
24
|
+
* are retired and their bytes deleted alongside ordinary orphan sweeping.
|
|
25
|
+
*/
|
|
26
|
+
hourlyRetentionMs?: number;
|
|
27
|
+
}
|
|
28
|
+
declare function gcOrphansImpl(deps: GcDeps, now: number, graceMs: number, opts?: GcOptions): Promise<{
|
|
29
|
+
deleted: number;
|
|
30
|
+
}>;
|
|
13
31
|
interface IngestAccumulatorEngine {
|
|
14
32
|
writeDay: (scope: TenantCtx & {
|
|
15
33
|
table: TableName$1;
|
|
16
34
|
date: string;
|
|
17
35
|
searchType?: SearchType;
|
|
18
36
|
}, rows: Row$1[]) => Promise<void>;
|
|
37
|
+
/**
|
|
38
|
+
* Routed when the accumulator's `ctx.grain === 'hour'`. Same scope shape as
|
|
39
|
+
* `writeDay`; `date` is the PT calendar day, rows carry `hour` + `date`.
|
|
40
|
+
* Optional so hosts that never opt into hourly need not implement it.
|
|
41
|
+
*/
|
|
42
|
+
writeHour?: (scope: TenantCtx & {
|
|
43
|
+
table: TableName$1;
|
|
44
|
+
date: string;
|
|
45
|
+
searchType?: SearchType;
|
|
46
|
+
}, rows: Row$1[]) => Promise<void>;
|
|
19
47
|
setSyncState: (scope: TenantCtx & {
|
|
20
48
|
table: TableName$1;
|
|
21
49
|
date: string;
|
|
@@ -28,6 +56,12 @@ interface IngestAccumulatorCtx {
|
|
|
28
56
|
userId: string | number;
|
|
29
57
|
siteId: string;
|
|
30
58
|
searchType?: SearchType;
|
|
59
|
+
/**
|
|
60
|
+
* Temporal granularity for this accumulator. `'day'` (default) routes
|
|
61
|
+
* flushed buckets to `engine.writeDay`. `'hour'` routes to
|
|
62
|
+
* `engine.writeHour` and requires the engine implementation to be set.
|
|
63
|
+
*/
|
|
64
|
+
grain?: Grain$1;
|
|
31
65
|
}
|
|
32
66
|
interface IngestAccumulatorHooks {
|
|
33
67
|
/**
|
|
@@ -113,4 +147,4 @@ declare const MIN_SYNC_IMPRESSIONS = 1;
|
|
|
113
147
|
declare const MIN_COUNTRY_IMPRESSIONS = 10;
|
|
114
148
|
declare const MAX_SITEMAP_URLS_PER_SITE = 50000;
|
|
115
149
|
declare const MAX_TRACKED_URLS_PER_SITE = 200000;
|
|
116
|
-
export { type CodecCtx, type ColumnDef, type ColumnType, type CompactionThresholds, type CompactionTier, type CreateIngestAccumulatorOptions, DEFAULT_SEARCH_TYPE, type DataSource, type DateWeight, type DrizzleSchema, type DuckDBFactory, type DuckDBHandle, type EngineOptions, FILES_PLACEHOLDER, type FileSetRef, type FinalizeOptions, type FinalizeResult, type GcCtx, type GscApiRow, type IngestAccumulator, type IngestAccumulatorCtx, type IngestAccumulatorEngine, type IngestAccumulatorHooks, type IngestOptions, type InspectionVerdict, type ListLiveFilter, type LockScope, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, type ManifestEntry, type ManifestPurgeResult, type ManifestStore, type ParquetCodec, type PurgeFilter, type PurgeResult, type PurgeUrlsResult, type QueryCtx, type QueryExecuteOptions, type QueryExecuteResult, type QueryExecutor, type QueryResult, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, type ResolvedQuery, type Row, type RowAccumulator, type RowAccumulatorOptions, type RunSQLOptions, SCHEMAS, type SchedulePolicy, type ScheduleState, type SearchType, type StorageEngine, type SyncState, type SyncStateDetail, type SyncStateFilter, type SyncStateKind, type SyncStateScope, type SyncTableName, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, type TableName, type TableSchema, type TableTier, type TenantCtx, type TieredTableName, WEIGHT_PRIORITY, type Watermark, type WatermarkFilter, type WatermarkScope, type WriteCtx, type WriteResult, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, fixedPolicy, formatLiteral, getDateWeight, getTableTier, getTablesForTier, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, keywords, objectKey, page_keywords, pages, parseEnabledSearchTypes, resolveToSQL, sitemapPolicy, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
|
|
150
|
+
export { type CodecCtx, type ColumnDef, type ColumnType, type CompactionThresholds, type CompactionTier, type CreateIngestAccumulatorOptions, DEFAULT_SEARCH_TYPE, type DataSource, type DateWeight, type DrizzleSchema, type DuckDBFactory, type DuckDBHandle, type EngineOptions, FILES_PLACEHOLDER, type FileSetRef, type FinalizeOptions, type FinalizeResult, type GcCtx, type Grain, type GscApiRow, type IngestAccumulator, type IngestAccumulatorCtx, type IngestAccumulatorEngine, type IngestAccumulatorHooks, type IngestOptions, type InspectionVerdict, type ListLiveFilter, type LockScope, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, type ManifestEntry, type ManifestPurgeResult, type ManifestStore, type ParquetCodec, type PurgeFilter, type PurgeResult, type PurgeUrlsResult, type QueryCtx, type QueryExecuteOptions, type QueryExecuteResult, type QueryExecutor, type QueryResult, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, type ResolvedQuery, type Row, type RowAccumulator, type RowAccumulatorOptions, type RunSQLOptions, SCHEMAS, type SchedulePolicy, type ScheduleState, type SearchType, type StorageEngine, type SyncState, type SyncStateDetail, type SyncStateFilter, type SyncStateKind, type SyncStateScope, type SyncTableName, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, type TableName, type TableSchema, type TableTier, type TenantCtx, type TieredTableName, WEIGHT_PRIORITY, type Watermark, type WatermarkFilter, type WatermarkScope, type WriteCtx, type WriteResult, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, fixedPolicy, formatLiteral, gcOrphansImpl, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, keywords, objectKey, page_keywords, pages, parseEnabledSearchTypes, rebuildDailyFromHourly, resolveToSQL, sitemapPolicy, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
|
package/dist/index.mjs
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
import { n as coerceRows, t as coerceRow } from "./_chunks/coerce.mjs";
|
|
2
|
-
import { a as inferTable, c as countries, d as
|
|
3
|
-
import {
|
|
2
|
+
import { a as inferTable, c as countries, d as hourly_pages, f as keywords, i as dimensionToColumn, l as devices, m as pages, n as allTables, p as page_keywords, r as currentSchemaVersion, s as TABLE_METADATA, t as SCHEMAS, u as drizzleSchema } from "./_chunks/schema.mjs";
|
|
3
|
+
import { a as inferSearchType, c as objectKey, i as inferLegacyTier, n as dayPartition, r as hourPartition, t as DEFAULT_SEARCH_TYPE } from "./_chunks/storage.mjs";
|
|
4
4
|
import { a as RAW_DAILY_COMPACT_THRESHOLD, c as enumeratePartitions, i as substituteNamedFiles, r as resolveToSQL, s as countRawDailies, t as FILES_PLACEHOLDER } from "./_chunks/compiler.mjs";
|
|
5
5
|
import { bindLiterals, formatLiteral } from "./sql-bind.mjs";
|
|
6
|
-
import { a as
|
|
6
|
+
import { a as createDuckDBCodec, i as canonicalEmptyParquetSchema, n as createStorageEngine, o as createDuckDBExecutor, r as gcOrphansImpl, t as MAX_DAY_BYTES } from "./_chunks/engine.mjs";
|
|
7
7
|
import { createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
|
|
8
8
|
import "./planner.mjs";
|
|
9
|
+
import { rebuildDailyFromHourly } from "./rollups.mjs";
|
|
9
10
|
import { fixedPolicy, inspectionPolicy, sitemapPolicy } from "./schedule.mjs";
|
|
10
11
|
const NOOP_RESULT = {
|
|
11
12
|
flushed: 0,
|
|
@@ -37,7 +38,7 @@ function createIngestAccumulator(opts) {
|
|
|
37
38
|
const acc = createRowAccumulator(accOpts);
|
|
38
39
|
async function writeOne(table, date, rows) {
|
|
39
40
|
const scope = scopeOf(ctx, table, date);
|
|
40
|
-
return engine.writeDay(scope, rows).then(() => engine.setSyncState(scope, "done")).then(async () => {
|
|
41
|
+
return (ctx.grain === "hour" ? engine.writeHour ?? (() => Promise.reject(/* @__PURE__ */ new Error("ingest accumulator: grain=hour requires engine.writeHour"))) : engine.writeDay)(scope, rows).then(() => engine.setSyncState(scope, "done")).then(async () => {
|
|
41
42
|
await hooks.onWritten?.({
|
|
42
43
|
table,
|
|
43
44
|
date,
|
|
@@ -214,4 +215,4 @@ const MIN_SYNC_IMPRESSIONS = 1;
|
|
|
214
215
|
const MIN_COUNTRY_IMPRESSIONS = 10;
|
|
215
216
|
const MAX_SITEMAP_URLS_PER_SITE = 5e4;
|
|
216
217
|
const MAX_TRACKED_URLS_PER_SITE = 2e5;
|
|
217
|
-
export { DEFAULT_SEARCH_TYPE, FILES_PLACEHOLDER, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, SCHEMAS, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, WEIGHT_PRIORITY, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, fixedPolicy, formatLiteral, getDateWeight, getTableTier, getTablesForTier, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, keywords, objectKey, page_keywords, pages, parseEnabledSearchTypes, resolveToSQL, sitemapPolicy, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
|
|
218
|
+
export { DEFAULT_SEARCH_TYPE, FILES_PLACEHOLDER, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, SCHEMAS, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, WEIGHT_PRIORITY, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, fixedPolicy, formatLiteral, gcOrphansImpl, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, keywords, objectKey, page_keywords, pages, parseEnabledSearchTypes, rebuildDailyFromHourly, resolveToSQL, sitemapPolicy, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
|
package/dist/ingest.d.mts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { C as Row, M as TableName } from "./_chunks/storage.mjs";
|
|
2
2
|
/**
|
|
3
3
|
* Canonical GSC API dimension order per table. Consumers hitting the raw
|
|
4
4
|
* `searchanalytics.query` endpoint must request dimensions in this order so
|
package/dist/ingest.mjs
CHANGED
|
@@ -8,7 +8,8 @@ const TABLE_DIMS = {
|
|
|
8
8
|
"query",
|
|
9
9
|
"date"
|
|
10
10
|
],
|
|
11
|
-
search_appearance: ["searchAppearance", "date"]
|
|
11
|
+
search_appearance: ["searchAppearance", "date"],
|
|
12
|
+
hourly_pages: ["hour", "page"]
|
|
12
13
|
};
|
|
13
14
|
function toPath(gscUrl) {
|
|
14
15
|
try {
|
|
@@ -80,6 +81,21 @@ function transformGscRow(table, apiRow, options = {}) {
|
|
|
80
81
|
}
|
|
81
82
|
};
|
|
82
83
|
}
|
|
84
|
+
if (table === "hourly_pages") {
|
|
85
|
+
const hour = String(keys[0] ?? "");
|
|
86
|
+
const date = hour.slice(0, 10);
|
|
87
|
+
return {
|
|
88
|
+
date,
|
|
89
|
+
row: {
|
|
90
|
+
url: toPath(String(keys[1] ?? "")),
|
|
91
|
+
hour,
|
|
92
|
+
date,
|
|
93
|
+
clicks,
|
|
94
|
+
impressions,
|
|
95
|
+
sum_position
|
|
96
|
+
}
|
|
97
|
+
};
|
|
98
|
+
}
|
|
83
99
|
if (table === "search_appearance") {
|
|
84
100
|
const date = String(keys[1] ?? "");
|
|
85
101
|
return {
|
package/dist/planner.d.mts
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { q as enumeratePartitions } from "./_chunks/storage.mjs";
|
|
2
2
|
import { a as substituteNamedFiles, i as resolveToSQL, n as ResolvedQuery, r as compileLogicalQueryPlan, t as FILES_PLACEHOLDER } from "./_chunks/planner.mjs";
|
|
3
|
-
export { FILES_PLACEHOLDER, ResolvedQuery, compileLogicalQueryPlan, enumeratePartitions, resolveToSQL, substituteNamedFiles };
|
|
3
|
+
export { FILES_PLACEHOLDER, type ResolvedQuery, compileLogicalQueryPlan, enumeratePartitions, resolveToSQL, substituteNamedFiles };
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { M as TableName$1, T as SearchType$1 } from "../_chunks/storage.mjs";
|
|
2
2
|
import { a as ResolvedSQLOptimized, i as ResolvedSQL, n as ExtraQuery, o as ResolverAdapter, r as ResolvedComparisonSQL, s as ResolverOptions, t as ComparisonFilter } from "../_chunks/types.mjs";
|
|
3
3
|
import { LogicalDataset, LogicalDataset as LogicalDataset$1, PlannerCapabilities } from "gscdump/query/plan";
|
|
4
4
|
import { SQL } from "drizzle-orm";
|
|
5
|
-
import { TableName } from "@gscdump/contracts";
|
|
5
|
+
import { Grain, TableName } from "@gscdump/contracts";
|
|
6
6
|
import { BuilderState, Dimension, FilterInput, InternalFilter, Metric } from "gscdump/query";
|
|
7
7
|
type DimensionSurface = 'api' | 'stored' | 'derived';
|
|
8
8
|
interface DimensionBinding {
|
|
@@ -102,6 +102,14 @@ interface RunQueryCtx {
|
|
|
102
102
|
siteId: string;
|
|
103
103
|
table: TableName$1;
|
|
104
104
|
searchType?: SearchType$1;
|
|
105
|
+
/**
|
|
106
|
+
* Temporal granularity. `'day'` (default) drives `enumeratePartitions` to
|
|
107
|
+
* emit `daily/{date}` only; hourly partitions are skipped by construction.
|
|
108
|
+
* `'hour'` is reserved for hourly read paths and must use the dedicated
|
|
109
|
+
* hourly query surface (callers pass `partitions: [hourly/{date}]`
|
|
110
|
+
* directly through `runSQL`).
|
|
111
|
+
*/
|
|
112
|
+
grain?: Grain;
|
|
105
113
|
}
|
|
106
114
|
interface RunSQLFn {
|
|
107
115
|
(opts: {
|
package/dist/rollups.d.mts
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
|
-
import { i as DataSource, o as FileSetRef } from "./_chunks/storage.mjs";
|
|
1
|
+
import { C as Row$1, i as DataSource, o as FileSetRef } from "./_chunks/storage.mjs";
|
|
2
2
|
import { t as ColumnDef } from "./_chunks/schema.mjs";
|
|
3
3
|
import { TenantCtx } from "@gscdump/contracts";
|
|
4
4
|
import { SearchType } from "gscdump/query";
|
|
5
|
-
import * as _$_gscdump_engine_contracts0 from "@gscdump/engine/contracts";
|
|
6
5
|
interface RollupCtx extends TenantCtx {
|
|
7
6
|
/** When the rollup was built. Stamped into payload + filename. */
|
|
8
7
|
builtAt: number;
|
|
@@ -15,7 +14,7 @@ interface RollupEngine {
|
|
|
15
14
|
runSQL: (opts: {
|
|
16
15
|
ctx: TenantCtx;
|
|
17
16
|
fileSets: Record<string, FileSetRef>;
|
|
18
|
-
table?:
|
|
17
|
+
table?: import('@gscdump/engine/contracts').TableName;
|
|
19
18
|
sql: string;
|
|
20
19
|
params?: unknown[];
|
|
21
20
|
/**
|
|
@@ -26,7 +25,7 @@ interface RollupEngine {
|
|
|
26
25
|
*/
|
|
27
26
|
searchType?: SearchType;
|
|
28
27
|
}) => Promise<{
|
|
29
|
-
rows:
|
|
28
|
+
rows: import('@gscdump/engine/contracts').Row[];
|
|
30
29
|
}>;
|
|
31
30
|
}
|
|
32
31
|
/**
|
|
@@ -235,5 +234,37 @@ declare const sitemapHealthRollup: RollupDef;
|
|
|
235
234
|
* exist on the site.
|
|
236
235
|
*/
|
|
237
236
|
declare const sitemapChanges28dRollup: RollupDef;
|
|
237
|
+
/**
|
|
238
|
+
* Aggregate one day's `hourly_pages` partition into the daily `pages` shape
|
|
239
|
+
* and write it to the daily Discover partition. After this runs for date D,
|
|
240
|
+
* the daily query path serves D from `pages/.../daily/D` and the `hourly/D`
|
|
241
|
+
* partition becomes read-only / GC-only.
|
|
242
|
+
*
|
|
243
|
+
* `(position - 1)` weighting matches the storage convention encoded by
|
|
244
|
+
* `toSumPosition`: `sum_position = SUM((position - 1) * impressions)`, so a
|
|
245
|
+
* downstream `SUM(sum_position) / SUM(impressions) + 1` recovers the mean.
|
|
246
|
+
*
|
|
247
|
+
* searchType-scoped: only call with `searchType: 'discover'`. The hourly
|
|
248
|
+
* partition lives under `hourly_pages` and the output lands under `pages` so
|
|
249
|
+
* existing dashboard queries (which read `pages`) see the rolled-up day
|
|
250
|
+
* transparently.
|
|
251
|
+
*/
|
|
252
|
+
interface RebuildDailyFromHourlyOptions {
|
|
253
|
+
engine: RollupEngine & {
|
|
254
|
+
writeDay: (scope: TenantCtx & {
|
|
255
|
+
table: TableTypeName;
|
|
256
|
+
date: string;
|
|
257
|
+
searchType?: SearchType;
|
|
258
|
+
}, rows: Row$1[]) => Promise<void>;
|
|
259
|
+
};
|
|
260
|
+
ctx: TenantCtx;
|
|
261
|
+
/** PT calendar day to roll up. */
|
|
262
|
+
date: string;
|
|
263
|
+
searchType: 'discover';
|
|
264
|
+
}
|
|
265
|
+
type TableTypeName = import('@gscdump/contracts').TableName;
|
|
266
|
+
declare function rebuildDailyFromHourly(opts: RebuildDailyFromHourlyOptions): Promise<{
|
|
267
|
+
rowsWritten: number;
|
|
268
|
+
}>;
|
|
238
269
|
declare const DEFAULT_ROLLUPS: readonly RollupDef[];
|
|
239
|
-
export { DEFAULT_ROLLUPS, ParquetRollupPointer, RebuildRollupResult, RebuildRollupsOptions, RollupBucket, RollupCtx, RollupDef, RollupEngine, RollupEnvelope, dailyTotalsRollup, indexPercentRollup, indexingHealthRollup, indexingMetadataRollup, readLatestRollup, rebuildRollups, rollupKey, rollupParquetKey, sitemapChanges28dRollup, sitemapHealthRollup, topCountries28dRollup, topKeywords28dParquetRollup, topKeywords28dRollup, topPages28dRollup, weeklyTotalsRollup };
|
|
270
|
+
export { DEFAULT_ROLLUPS, ParquetRollupPointer, RebuildDailyFromHourlyOptions, RebuildRollupResult, RebuildRollupsOptions, RollupBucket, RollupCtx, RollupDef, RollupEngine, RollupEnvelope, dailyTotalsRollup, indexPercentRollup, indexingHealthRollup, indexingMetadataRollup, readLatestRollup, rebuildDailyFromHourly, rebuildRollups, rollupKey, rollupParquetKey, sitemapChanges28dRollup, sitemapHealthRollup, topCountries28dRollup, topKeywords28dParquetRollup, topKeywords28dRollup, topPages28dRollup, weeklyTotalsRollup };
|
package/dist/rollups.mjs
CHANGED
|
@@ -577,6 +577,43 @@ const sitemapChanges28dRollup = {
|
|
|
577
577
|
};
|
|
578
578
|
}
|
|
579
579
|
};
|
|
580
|
+
async function rebuildDailyFromHourly(opts) {
|
|
581
|
+
const { engine, ctx, date, searchType } = opts;
|
|
582
|
+
const rows = (await engine.runSQL({
|
|
583
|
+
ctx,
|
|
584
|
+
table: "hourly_pages",
|
|
585
|
+
fileSets: { FILES: {
|
|
586
|
+
table: "hourly_pages",
|
|
587
|
+
partitions: [`hourly/${date}`]
|
|
588
|
+
} },
|
|
589
|
+
searchType,
|
|
590
|
+
sql: `
|
|
591
|
+
SELECT
|
|
592
|
+
url,
|
|
593
|
+
DATE '${date}' AS date,
|
|
594
|
+
SUM(clicks)::BIGINT AS clicks,
|
|
595
|
+
SUM(impressions)::BIGINT AS impressions,
|
|
596
|
+
SUM(sum_position)::DOUBLE AS sum_position
|
|
597
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
598
|
+
WHERE date = '${date}'
|
|
599
|
+
GROUP BY url
|
|
600
|
+
`
|
|
601
|
+
})).rows.map((r) => ({
|
|
602
|
+
url: r.url,
|
|
603
|
+
date,
|
|
604
|
+
clicks: Number(r.clicks),
|
|
605
|
+
impressions: Number(r.impressions),
|
|
606
|
+
sum_position: Number(r.sum_position)
|
|
607
|
+
}));
|
|
608
|
+
await engine.writeDay({
|
|
609
|
+
userId: ctx.userId,
|
|
610
|
+
siteId: ctx.siteId,
|
|
611
|
+
table: "pages",
|
|
612
|
+
date,
|
|
613
|
+
searchType
|
|
614
|
+
}, rows);
|
|
615
|
+
return { rowsWritten: rows.length };
|
|
616
|
+
}
|
|
580
617
|
const DEFAULT_ROLLUPS = [
|
|
581
618
|
dailyTotalsRollup,
|
|
582
619
|
weeklyTotalsRollup,
|
|
@@ -589,4 +626,4 @@ const DEFAULT_ROLLUPS = [
|
|
|
589
626
|
sitemapHealthRollup,
|
|
590
627
|
sitemapChanges28dRollup
|
|
591
628
|
];
|
|
592
|
-
export { DEFAULT_ROLLUPS, dailyTotalsRollup, indexPercentRollup, indexingHealthRollup, indexingMetadataRollup, readLatestRollup, rebuildRollups, rollupKey, rollupParquetKey, sitemapChanges28dRollup, sitemapHealthRollup, topCountries28dRollup, topKeywords28dParquetRollup, topKeywords28dRollup, topPages28dRollup, weeklyTotalsRollup };
|
|
629
|
+
export { DEFAULT_ROLLUPS, dailyTotalsRollup, indexPercentRollup, indexingHealthRollup, indexingMetadataRollup, readLatestRollup, rebuildDailyFromHourly, rebuildRollups, rollupKey, rollupParquetKey, sitemapChanges28dRollup, sitemapHealthRollup, topCountries28dRollup, topKeywords28dParquetRollup, topKeywords28dRollup, topPages28dRollup, weeklyTotalsRollup };
|
package/dist/schema.d.mts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { _ as
|
|
2
|
-
export { ColumnDef, ColumnType, DrizzleSchema, SCHEMAS, TABLE_METADATA, TableSchema, allTables, countries, currentSchemaVersion, devices, dimensionToColumn, drizzleSchema, inferTable, keywords, page_keywords, pages, schemaFor, search_appearance };
|
|
1
|
+
import { _ as page_keywords, a as allTables, c as inferTable, d as TABLE_METADATA, f as countries, g as keywords, h as hourly_pages, i as TableSchema, l as schemaFor, m as drizzleSchema, n as ColumnType, o as currentSchemaVersion, p as devices, r as SCHEMAS, s as dimensionToColumn, t as ColumnDef, u as DrizzleSchema, v as pages, y as search_appearance } from "./_chunks/schema.mjs";
|
|
2
|
+
export { type ColumnDef, type ColumnType, type DrizzleSchema, SCHEMAS, TABLE_METADATA, type TableSchema, allTables, countries, currentSchemaVersion, devices, dimensionToColumn, drizzleSchema, hourly_pages, inferTable, keywords, page_keywords, pages, schemaFor, search_appearance };
|
package/dist/schema.mjs
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { a as inferTable, c as countries, d as
|
|
2
|
-
export { SCHEMAS, TABLE_METADATA, allTables, countries, currentSchemaVersion, devices, dimensionToColumn, drizzleSchema, inferTable, keywords, page_keywords, pages, schemaFor, search_appearance };
|
|
1
|
+
import { a as inferTable, c as countries, d as hourly_pages, f as keywords, h as search_appearance, i as dimensionToColumn, l as devices, m as pages, n as allTables, o as schemaFor, p as page_keywords, r as currentSchemaVersion, s as TABLE_METADATA, t as SCHEMAS, u as drizzleSchema } from "./_chunks/schema.mjs";
|
|
2
|
+
export { SCHEMAS, TABLE_METADATA, allTables, countries, currentSchemaVersion, devices, dimensionToColumn, drizzleSchema, hourly_pages, inferTable, keywords, page_keywords, pages, schemaFor, search_appearance };
|
package/dist/scope.d.mts
CHANGED
|
@@ -16,6 +16,12 @@ interface ScopedRunnerOptions {
|
|
|
16
16
|
startDate?: string;
|
|
17
17
|
/** Inclusive upper bound for `date`. Ignored if `window` is supplied. */
|
|
18
18
|
endDate?: string;
|
|
19
|
+
/**
|
|
20
|
+
* Temporal granularity. `'day'` (default) filters on `table.date`. `'hour'`
|
|
21
|
+
* filters on `table.hour` when the table exposes that column (e.g.
|
|
22
|
+
* `hourly_pages`); falls back to date filtering otherwise.
|
|
23
|
+
*/
|
|
24
|
+
grain?: 'day' | 'hour';
|
|
19
25
|
}
|
|
20
26
|
interface TableScope {
|
|
21
27
|
wherePredicates: SQL[];
|
package/dist/scope.mjs
CHANGED
|
@@ -2,11 +2,13 @@ import { and, eq, gte, lte } from "drizzle-orm";
|
|
|
2
2
|
function buildTableScope(table, opts) {
|
|
3
3
|
const predicates = [];
|
|
4
4
|
if (opts.siteId && "site_id" in table) predicates.push(eq(table.site_id, opts.siteId));
|
|
5
|
-
|
|
5
|
+
const useHour = (opts.grain ?? "day") === "hour" && "hour" in table;
|
|
6
|
+
const filterCol = useHour ? table.hour : table.date;
|
|
7
|
+
if ("date" in table || useHour) {
|
|
6
8
|
const start = opts.window?.start ?? opts.startDate;
|
|
7
9
|
const end = opts.window?.end ?? opts.endDate;
|
|
8
|
-
if (start) predicates.push(gte(
|
|
9
|
-
if (end) predicates.push(lte(
|
|
10
|
+
if (start) predicates.push(gte(filterCol, start));
|
|
11
|
+
if (end) predicates.push(lte(filterCol, end));
|
|
10
12
|
}
|
|
11
13
|
return {
|
|
12
14
|
wherePredicates: predicates,
|
package/dist/source/index.d.mts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { C as Row, E as StorageEngine, N as TenantCtx, T as SearchType$1 } from "../_chunks/storage.mjs";
|
|
2
2
|
import { n as AnalysisResult, t as AnalysisParams } from "../_chunks/analysis-types.mjs";
|
|
3
3
|
import { o as ResolverAdapter } from "../_chunks/types.mjs";
|
|
4
4
|
import { C as ExecuteSqlOptions, E as SourceCapabilities, S as AnalysisSourceKind, T as QueryRow, t as AnalyzerRegistry, w as FileSet, x as AnalysisQuerySource } from "../_chunks/registry.mjs";
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@gscdump/engine",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "0.
|
|
4
|
+
"version": "0.18.1",
|
|
5
5
|
"description": "Append-only Parquet/DuckDB storage engine + planner + adapters for the gscdump pipeline. Node + edge runtimes; opt-in heavy peers.",
|
|
6
6
|
"author": {
|
|
7
7
|
"name": "Harlan Wilton",
|
|
@@ -169,8 +169,8 @@
|
|
|
169
169
|
"dependencies": {
|
|
170
170
|
"drizzle-orm": "^0.45.2",
|
|
171
171
|
"proper-lockfile": "^4.1.2",
|
|
172
|
-
"
|
|
173
|
-
"gscdump": "0.
|
|
172
|
+
"gscdump": "0.18.1",
|
|
173
|
+
"@gscdump/contracts": "0.18.1"
|
|
174
174
|
},
|
|
175
175
|
"devDependencies": {
|
|
176
176
|
"@duckdb/duckdb-wasm": "^1.32.0",
|