@gscdump/engine 0.17.5 → 0.18.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -40,13 +40,20 @@ const search_appearance = pgTable("search_appearance", {
40
40
  date: dateCol(),
41
41
  ...metricCols()
42
42
  });
43
+ const hourly_pages = pgTable("hourly_pages", {
44
+ url: varchar("url").notNull(),
45
+ hour: varchar("hour").notNull(),
46
+ date: dateCol(),
47
+ ...metricCols()
48
+ });
43
49
  const drizzleSchema = {
44
50
  pages,
45
51
  keywords,
46
52
  countries,
47
53
  devices,
48
54
  page_keywords,
49
- search_appearance
55
+ search_appearance,
56
+ hourly_pages
50
57
  };
51
58
  const TABLE_METADATA = {
52
59
  pages: {
@@ -76,6 +83,14 @@ const TABLE_METADATA = {
76
83
  search_appearance: {
77
84
  sortKey: ["date", "searchAppearance"],
78
85
  version: 1
86
+ },
87
+ hourly_pages: {
88
+ sortKey: [
89
+ "date",
90
+ "hour",
91
+ "url"
92
+ ],
93
+ version: 1
79
94
  }
80
95
  };
81
96
  function pgSqlTypeToColumnType(sqlType) {
@@ -107,7 +122,8 @@ const METRIC_TABLES = [
107
122
  "countries",
108
123
  "devices",
109
124
  "page_keywords",
110
- "search_appearance"
125
+ "search_appearance",
126
+ "hourly_pages"
111
127
  ];
112
128
  const SCHEMAS = Object.fromEntries(METRIC_TABLES.map((t) => [t, tableSchemaFrom(t)]));
113
129
  function currentSchemaVersion(table) {
@@ -136,4 +152,4 @@ function dimensionToColumn(dim, _table) {
136
152
  if (dim === "queryCanonical") return "query_canonical";
137
153
  return dim;
138
154
  }
139
- export { inferTable as a, countries as c, keywords as d, page_keywords as f, dimensionToColumn as i, devices as l, search_appearance as m, allTables as n, schemaFor as o, pages as p, currentSchemaVersion as r, TABLE_METADATA as s, SCHEMAS as t, drizzleSchema as u };
155
+ export { inferTable as a, countries as c, hourly_pages as d, keywords as f, search_appearance as h, dimensionToColumn as i, devices as l, pages as m, allTables as n, schemaFor as o, page_keywords as p, currentSchemaVersion as r, TABLE_METADATA as s, SCHEMAS as t, drizzleSchema as u };
@@ -1,4 +1,4 @@
1
- import { Row, Row as Row$1, TableName, TableName as TableName$1, TenantCtx, TenantCtx as TenantCtx$1 } from "@gscdump/contracts";
1
+ import { Grain, Grain as Grain$1, Row, Row as Row$1, TableName, TableName as TableName$1, TenantCtx, TenantCtx as TenantCtx$1 } from "@gscdump/contracts";
2
2
  import { BuilderState, SearchType, SearchType as SearchType$1 } from "gscdump/query";
3
3
  /**
4
4
  * Per-tier age threshold in days. Default ladder collapses on these gates:
@@ -34,6 +34,12 @@ interface WriteCtx extends TenantCtx {
34
34
  * for different search types coexist without colliding.
35
35
  */
36
36
  searchType?: SearchType;
37
+ /**
38
+ * Temporal granularity for this write. `'day'` (default) routes to
39
+ * `writeDay` semantics. `'hour'` routes to `writeHour` — the host (ingest
40
+ * accumulator) interprets this; the engine surfaces both methods directly.
41
+ */
42
+ grain?: Grain;
37
43
  }
38
44
  interface QueryCtx extends TenantCtx {
39
45
  table?: TableName;
@@ -45,11 +51,24 @@ interface QueryCtx extends TenantCtx {
45
51
  * manifest entries written for that type. Mirrors {@link WriteCtx.searchType}.
46
52
  */
47
53
  searchType?: SearchType;
54
+ /**
55
+ * Temporal granularity for this query. `'day'` (default) reads daily
56
+ * partitions only and skips any `hourly/` partitions. `'hour'` reads only
57
+ * hourly partitions. The two never mix — daily-from-hourly aggregation
58
+ * happens through the `discover-daily-from-hourly` rollup, not at read.
59
+ */
60
+ grain?: Grain;
48
61
  }
49
62
  interface GcCtx {
50
63
  now?: () => number;
51
64
  userId?: string;
52
65
  siteId?: string;
66
+ /**
67
+ * Override retention for hourly partitions in milliseconds. Defaults to
68
+ * 90 days inside `gcOrphansImpl`. Hourly is GC-only — never compacted —
69
+ * so this is the only lifecycle knob for `hourly/{date}` entries.
70
+ */
71
+ hourlyRetentionMs?: number;
53
72
  }
54
73
  /**
55
74
  * Compaction tier of a manifest entry. Determines which compactor stage may
@@ -414,6 +433,15 @@ interface RunSQLOptions {
414
433
  }
415
434
  interface StorageEngine {
416
435
  writeDay: (ctx: WriteCtx, rows: Row[]) => Promise<void>;
436
+ /**
437
+ * Read-merge-write a single-day hourly partition. Idempotent on
438
+ * `(url, hour)` (last-write-wins): callers can re-fire the same slice
439
+ * after a retry and the partition converges. `ctx.date` is the PT
440
+ * calendar day; rows must carry `hour` + `date` fields. Partition shape
441
+ * `hourly/{date}`; coexists with daily partitions in the same `table`
442
+ * prefix (`hourly_pages`).
443
+ */
444
+ writeHour: (ctx: WriteCtx, rows: Row[]) => Promise<void>;
417
445
  query: (ctx: QueryCtx, state: BuilderState) => Promise<QueryResult>;
418
446
  /**
419
447
  * Run arbitrary SQL resolved against named partition sets. Composes
@@ -472,5 +500,12 @@ interface EngineOptions {
472
500
  now?: () => number;
473
501
  }
474
502
  declare function dayPartition(date: string): string;
503
+ /**
504
+ * Hourly partition keyed by the PT calendar day (`YYYY-MM-DD`). One parquet
505
+ * per day holds 24 hourly buckets — read-merge-write keeps `(url, hour)`
506
+ * idempotency across retries. Names sort lexically alongside daily ones but
507
+ * never collide because of the `hourly/` prefix.
508
+ */
509
+ declare function hourPartition(date: string): string;
475
510
  declare function objectKey(ctx: TenantCtx, table: TableName, partition: string, version: number, searchType?: SearchType): string;
476
- export { SyncStateScope as A, inferSearchType as B, RunSQLOptions as C, SyncStateDetail as D, SyncState as E, WatermarkScope as F, enumeratePartitions as G, CompactionThresholds as H, WriteCtx as I, WriteResult as L, TenantCtx$1 as M, Watermark as N, SyncStateFilter as O, WatermarkFilter as P, dayPartition as R, Row$1 as S, StorageEngine as T, RAW_DAILY_COMPACT_THRESHOLD as U, objectKey as V, countRawDailies as W, QueryCtx as _, EngineOptions as a, QueryExecutor as b, ListLiveFilter as c, ManifestPurgeResult as d, ManifestStore as f, PurgeUrlsResult as g, PurgeResult as h, DataSource as i, TableName$1 as j, SyncStateKind as k, LockScope as l, PurgeFilter as m, CompactionTier as n, FileSetRef as o, ParquetCodec as p, DEFAULT_SEARCH_TYPE as r, GcCtx as s, CodecCtx as t, ManifestEntry as u, QueryExecuteOptions as v, SearchType$1 as w, QueryResult as x, QueryExecuteResult as y, inferLegacyTier as z };
511
+ export { SyncStateKind as A, hourPartition as B, Row$1 as C, SyncState as D, StorageEngine as E, WatermarkFilter as F, RAW_DAILY_COMPACT_THRESHOLD as G, inferSearchType as H, WatermarkScope as I, countRawDailies as K, WriteCtx as L, TableName$1 as M, TenantCtx$1 as N, SyncStateDetail as O, Watermark as P, WriteResult as R, QueryResult as S, SearchType$1 as T, objectKey as U, inferLegacyTier as V, CompactionThresholds as W, PurgeUrlsResult as _, EngineOptions as a, QueryExecuteResult as b, Grain$1 as c, ManifestEntry as d, ManifestPurgeResult as f, PurgeResult as g, PurgeFilter as h, DataSource as i, SyncStateScope as j, SyncStateFilter as k, ListLiveFilter as l, ParquetCodec as m, CompactionTier as n, FileSetRef as o, ManifestStore as p, enumeratePartitions as q, DEFAULT_SEARCH_TYPE as r, GcCtx as s, CodecCtx as t, LockScope as u, QueryCtx as v, RunSQLOptions as w, QueryExecutor as x, QueryExecuteOptions as y, dayPartition as z };
@@ -11,6 +11,9 @@ function inferLegacyTier(entry) {
11
11
  function dayPartition(date) {
12
12
  return `daily/${date}`;
13
13
  }
14
+ function hourPartition(date) {
15
+ return `hourly/${date}`;
16
+ }
14
17
  function monthPartition(month) {
15
18
  return `monthly/${month}`;
16
19
  }
@@ -36,4 +39,4 @@ function objectKey(ctx, table, partition, version, searchType) {
36
39
  function tenantPrefix(ctx) {
37
40
  return ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/` : `u_${ctx.userId}/`;
38
41
  }
39
- export { mondayOfWeek as a, quarterOfMonth as c, weekPartition as d, inferSearchType as i, quarterPartition as l, dayPartition as n, monthPartition as o, inferLegacyTier as r, objectKey as s, DEFAULT_SEARCH_TYPE as t, tenantPrefix as u };
42
+ export { inferSearchType as a, objectKey as c, tenantPrefix as d, weekPartition as f, inferLegacyTier as i, quarterOfMonth as l, dayPartition as n, mondayOfWeek as o, hourPartition as r, monthPartition as s, DEFAULT_SEARCH_TYPE as t, quarterPartition as u };
@@ -1,4 +1,4 @@
1
- import { f as ManifestStore, i as DataSource } from "../_chunks/storage.mjs";
1
+ import { i as DataSource, p as ManifestStore } from "../_chunks/storage.mjs";
2
2
  interface FilesystemDataSourceOptions {
3
3
  rootDir: string;
4
4
  }
@@ -1,4 +1,4 @@
1
- import { i as inferSearchType, r as inferLegacyTier } from "../_chunks/storage.mjs";
1
+ import { a as inferSearchType, i as inferLegacyTier } from "../_chunks/storage.mjs";
2
2
  import { dirname, join, resolve } from "node:path";
3
3
  import { Buffer } from "node:buffer";
4
4
  import { randomBytes } from "node:crypto";
@@ -1,4 +1,4 @@
1
- import { S as Row, i as DataSource, j as TableName, p as ParquetCodec, t as CodecCtx } from "../_chunks/storage.mjs";
1
+ import { C as Row, M as TableName, i as DataSource, m as ParquetCodec, t as CodecCtx } from "../_chunks/storage.mjs";
2
2
  import { t as ColumnDef } from "../_chunks/schema.mjs";
3
3
  declare function encodeRowsToParquet(table: TableName, rows: readonly Row[]): Uint8Array;
4
4
  interface EncodeFlexOptions {
@@ -1,4 +1,4 @@
1
- import { T as StorageEngine, i as DataSource } from "../_chunks/storage.mjs";
1
+ import { E as StorageEngine, i as DataSource } from "../_chunks/storage.mjs";
2
2
  import { NodeDuckDBOptions, createNodeDuckDBHandle, resetNodeDuckDB } from "./duckdb-node.mjs";
3
3
  import { t as SnapshotIndex } from "../_chunks/snapshot.mjs";
4
4
  import { Row, TableName } from "@gscdump/contracts";
@@ -1,4 +1,4 @@
1
- import { a as createDuckDBExecutor, i as createDuckDBCodec, n as createStorageEngine } from "../_chunks/engine.mjs";
1
+ import { a as createDuckDBCodec, n as createStorageEngine, o as createDuckDBExecutor } from "../_chunks/engine.mjs";
2
2
  import { createNodeDuckDBHandle, resetNodeDuckDB } from "./duckdb-node.mjs";
3
3
  import { createFilesystemDataSource, createFilesystemManifestStore } from "./filesystem.mjs";
4
4
  import path from "node:path";
@@ -1,4 +1,4 @@
1
- import { f as ManifestStore, j as TableName } from "../_chunks/storage.mjs";
1
+ import { M as TableName, p as ManifestStore } from "../_chunks/storage.mjs";
2
2
  interface R2ObjectMetadata {
3
3
  etag: string;
4
4
  }
@@ -1,4 +1,4 @@
1
- import { i as inferSearchType, r as inferLegacyTier } from "../_chunks/storage.mjs";
1
+ import { a as inferSearchType, i as inferLegacyTier } from "../_chunks/storage.mjs";
2
2
  const SHARD_RE = /^u_[^/]+\/manifest\/(?<siteId>[^/]+)\/(?<table>[^/]+)\/HEAD$/;
3
3
  function defaultSnapshotId() {
4
4
  return `${Date.now()}-${Math.random().toString(36).slice(2, 10)}`;
@@ -1,2 +1,2 @@
1
- import { A as SyncStateScope, C as RunSQLOptions, D as SyncStateDetail, E as SyncState, F as WatermarkScope, I as WriteCtx, L as WriteResult, M as TenantCtx, N as Watermark, O as SyncStateFilter, P as WatermarkFilter, S as Row, T as StorageEngine, _ as QueryCtx, a as EngineOptions, b as QueryExecutor, c as ListLiveFilter, f as ManifestStore, i as DataSource, j as TableName, k as SyncStateKind, l as LockScope, n as CompactionTier, o as FileSetRef, p as ParquetCodec, s as GcCtx, t as CodecCtx, u as ManifestEntry, v as QueryExecuteOptions, w as SearchType, x as QueryResult, y as QueryExecuteResult } from "./_chunks/storage.mjs";
2
- export { CodecCtx, CompactionTier, DataSource, EngineOptions, FileSetRef, GcCtx, ListLiveFilter, LockScope, ManifestEntry, ManifestStore, ParquetCodec, QueryCtx, QueryExecuteOptions, QueryExecuteResult, QueryExecutor, QueryResult, Row, RunSQLOptions, SearchType, StorageEngine, SyncState, SyncStateDetail, SyncStateFilter, SyncStateKind, SyncStateScope, TableName, TenantCtx, Watermark, WatermarkFilter, WatermarkScope, WriteCtx, WriteResult };
1
+ import { A as SyncStateKind, C as Row, D as SyncState, E as StorageEngine, F as WatermarkFilter, I as WatermarkScope, L as WriteCtx, M as TableName, N as TenantCtx, O as SyncStateDetail, P as Watermark, R as WriteResult, S as QueryResult, T as SearchType, a as EngineOptions, b as QueryExecuteResult, d as ManifestEntry, i as DataSource, j as SyncStateScope, k as SyncStateFilter, l as ListLiveFilter, m as ParquetCodec, n as CompactionTier, o as FileSetRef, p as ManifestStore, s as GcCtx, t as CodecCtx, u as LockScope, v as QueryCtx, w as RunSQLOptions, x as QueryExecutor, y as QueryExecuteOptions } from "./_chunks/storage.mjs";
2
+ export type { CodecCtx, CompactionTier, DataSource, EngineOptions, FileSetRef, GcCtx, ListLiveFilter, LockScope, ManifestEntry, ManifestStore, ParquetCodec, QueryCtx, QueryExecuteOptions, QueryExecuteResult, QueryExecutor, QueryResult, Row, RunSQLOptions, SearchType, StorageEngine, SyncState, SyncStateDetail, SyncStateFilter, SyncStateKind, SyncStateScope, TableName, TenantCtx, Watermark, WatermarkFilter, WatermarkScope, WriteCtx, WriteResult };
package/dist/index.d.mts CHANGED
@@ -1,21 +1,49 @@
1
- import { A as SyncStateScope, B as inferSearchType, C as RunSQLOptions, D as SyncStateDetail, E as SyncState, F as WatermarkScope, G as enumeratePartitions, H as CompactionThresholds, I as WriteCtx, L as WriteResult, M as TenantCtx, N as Watermark, O as SyncStateFilter, P as WatermarkFilter, R as dayPartition, S as Row, T as StorageEngine, U as RAW_DAILY_COMPACT_THRESHOLD, V as objectKey, W as countRawDailies, _ as QueryCtx, a as EngineOptions, b as QueryExecutor, c as ListLiveFilter, d as ManifestPurgeResult, f as ManifestStore, g as PurgeUrlsResult, h as PurgeResult, i as DataSource, j as TableName, k as SyncStateKind, l as LockScope, m as PurgeFilter, n as CompactionTier, o as FileSetRef, p as ParquetCodec, r as DEFAULT_SEARCH_TYPE, s as GcCtx, t as CodecCtx, u as ManifestEntry, v as QueryExecuteOptions, w as SearchType, x as QueryResult, y as QueryExecuteResult, z as inferLegacyTier } from "./_chunks/storage.mjs";
1
+ import { A as SyncStateKind, B as hourPartition, C as Row, D as SyncState, E as StorageEngine, F as WatermarkFilter, G as RAW_DAILY_COMPACT_THRESHOLD, H as inferSearchType, I as WatermarkScope, K as countRawDailies, L as WriteCtx, M as TableName, N as TenantCtx, O as SyncStateDetail, P as Watermark, R as WriteResult, S as QueryResult, T as SearchType, U as objectKey, V as inferLegacyTier, W as CompactionThresholds, _ as PurgeUrlsResult, a as EngineOptions, b as QueryExecuteResult, c as Grain, d as ManifestEntry, f as ManifestPurgeResult, g as PurgeResult, h as PurgeFilter, i as DataSource, j as SyncStateScope, k as SyncStateFilter, l as ListLiveFilter, m as ParquetCodec, n as CompactionTier, o as FileSetRef, p as ManifestStore, q as enumeratePartitions, r as DEFAULT_SEARCH_TYPE, s as GcCtx, t as CodecCtx, u as LockScope, v as QueryCtx, w as RunSQLOptions, x as QueryExecutor, y as QueryExecuteOptions, z as dayPartition } from "./_chunks/storage.mjs";
2
2
  import { a as createDuckDBExecutor, i as createDuckDBCodec, n as DuckDBHandle, r as canonicalEmptyParquetSchema, t as DuckDBFactory } from "./_chunks/duckdb.mjs";
3
- import { _ as pages, a as allTables, c as inferTable, d as TABLE_METADATA, f as countries, g as page_keywords, h as keywords, i as TableSchema, m as drizzleSchema, n as ColumnType, o as currentSchemaVersion, p as devices, r as SCHEMAS, s as dimensionToColumn, t as ColumnDef, u as DrizzleSchema } from "./_chunks/schema.mjs";
3
+ import { _ as page_keywords, a as allTables, c as inferTable, d as TABLE_METADATA, f as countries, g as keywords, h as hourly_pages, i as TableSchema, m as drizzleSchema, n as ColumnType, o as currentSchemaVersion, p as devices, r as SCHEMAS, s as dimensionToColumn, t as ColumnDef, u as DrizzleSchema, v as pages } from "./_chunks/schema.mjs";
4
4
  import { InspectionVerdict, SchedulePolicy, ScheduleState, fixedPolicy, inspectionPolicy, sitemapPolicy } from "./schedule.mjs";
5
5
  import { GscApiRow, IngestOptions, RowAccumulator, RowAccumulatorOptions, createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
6
6
  import { a as substituteNamedFiles, i as resolveToSQL, n as ResolvedQuery, t as FILES_PLACEHOLDER } from "./_chunks/planner.mjs";
7
+ import { rebuildDailyFromHourly } from "./rollups.mjs";
7
8
  import { bindLiterals, formatLiteral } from "./sql-bind.mjs";
8
- import { Row as Row$1, TableName as TableName$1 } from "@gscdump/contracts";
9
+ import { Grain as Grain$1, Row as Row$1, TableName as TableName$1 } from "@gscdump/contracts";
9
10
  declare function coerceRow(row: Row$1): Row$1;
10
11
  declare function coerceRows(rows: readonly Row$1[]): Row$1[];
11
12
  declare const MAX_DAY_BYTES: number;
12
13
  declare function createStorageEngine(opts: EngineOptions): StorageEngine;
14
+ interface GcDeps {
15
+ dataSource: DataSource;
16
+ manifestStore: ManifestStore;
17
+ }
18
+ interface GcOptions {
19
+ userId?: string;
20
+ siteId?: string;
21
+ /**
22
+ * Retention for hourly partitions (`hourly/{date}`) in milliseconds.
23
+ * Defaults to 90 days; entries with `createdAt < now - hourlyRetentionMs`
24
+ * are retired and their bytes deleted alongside ordinary orphan sweeping.
25
+ */
26
+ hourlyRetentionMs?: number;
27
+ }
28
+ declare function gcOrphansImpl(deps: GcDeps, now: number, graceMs: number, opts?: GcOptions): Promise<{
29
+ deleted: number;
30
+ }>;
13
31
  interface IngestAccumulatorEngine {
14
32
  writeDay: (scope: TenantCtx & {
15
33
  table: TableName$1;
16
34
  date: string;
17
35
  searchType?: SearchType;
18
36
  }, rows: Row$1[]) => Promise<void>;
37
+ /**
38
+ * Routed when the accumulator's `ctx.grain === 'hour'`. Same scope shape as
39
+ * `writeDay`; `date` is the PT calendar day, rows carry `hour` + `date`.
40
+ * Optional so hosts that never opt into hourly need not implement it.
41
+ */
42
+ writeHour?: (scope: TenantCtx & {
43
+ table: TableName$1;
44
+ date: string;
45
+ searchType?: SearchType;
46
+ }, rows: Row$1[]) => Promise<void>;
19
47
  setSyncState: (scope: TenantCtx & {
20
48
  table: TableName$1;
21
49
  date: string;
@@ -28,6 +56,12 @@ interface IngestAccumulatorCtx {
28
56
  userId: string | number;
29
57
  siteId: string;
30
58
  searchType?: SearchType;
59
+ /**
60
+ * Temporal granularity for this accumulator. `'day'` (default) routes
61
+ * flushed buckets to `engine.writeDay`. `'hour'` routes to
62
+ * `engine.writeHour` and requires the engine implementation to be set.
63
+ */
64
+ grain?: Grain$1;
31
65
  }
32
66
  interface IngestAccumulatorHooks {
33
67
  /**
@@ -113,4 +147,4 @@ declare const MIN_SYNC_IMPRESSIONS = 1;
113
147
  declare const MIN_COUNTRY_IMPRESSIONS = 10;
114
148
  declare const MAX_SITEMAP_URLS_PER_SITE = 50000;
115
149
  declare const MAX_TRACKED_URLS_PER_SITE = 200000;
116
- export { type CodecCtx, type ColumnDef, type ColumnType, type CompactionThresholds, type CompactionTier, type CreateIngestAccumulatorOptions, DEFAULT_SEARCH_TYPE, type DataSource, type DateWeight, type DrizzleSchema, type DuckDBFactory, type DuckDBHandle, type EngineOptions, FILES_PLACEHOLDER, type FileSetRef, type FinalizeOptions, type FinalizeResult, type GcCtx, type GscApiRow, type IngestAccumulator, type IngestAccumulatorCtx, type IngestAccumulatorEngine, type IngestAccumulatorHooks, type IngestOptions, type InspectionVerdict, type ListLiveFilter, type LockScope, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, type ManifestEntry, type ManifestPurgeResult, type ManifestStore, type ParquetCodec, type PurgeFilter, type PurgeResult, type PurgeUrlsResult, type QueryCtx, type QueryExecuteOptions, type QueryExecuteResult, type QueryExecutor, type QueryResult, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, type ResolvedQuery, type Row, type RowAccumulator, type RowAccumulatorOptions, type RunSQLOptions, SCHEMAS, type SchedulePolicy, type ScheduleState, type SearchType, type StorageEngine, type SyncState, type SyncStateDetail, type SyncStateFilter, type SyncStateKind, type SyncStateScope, type SyncTableName, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, type TableName, type TableSchema, type TableTier, type TenantCtx, type TieredTableName, WEIGHT_PRIORITY, type Watermark, type WatermarkFilter, type WatermarkScope, type WriteCtx, type WriteResult, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, fixedPolicy, formatLiteral, getDateWeight, getTableTier, getTablesForTier, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, keywords, objectKey, page_keywords, pages, parseEnabledSearchTypes, resolveToSQL, sitemapPolicy, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
150
+ export { type CodecCtx, type ColumnDef, type ColumnType, type CompactionThresholds, type CompactionTier, type CreateIngestAccumulatorOptions, DEFAULT_SEARCH_TYPE, type DataSource, type DateWeight, type DrizzleSchema, type DuckDBFactory, type DuckDBHandle, type EngineOptions, FILES_PLACEHOLDER, type FileSetRef, type FinalizeOptions, type FinalizeResult, type GcCtx, type Grain, type GscApiRow, type IngestAccumulator, type IngestAccumulatorCtx, type IngestAccumulatorEngine, type IngestAccumulatorHooks, type IngestOptions, type InspectionVerdict, type ListLiveFilter, type LockScope, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, type ManifestEntry, type ManifestPurgeResult, type ManifestStore, type ParquetCodec, type PurgeFilter, type PurgeResult, type PurgeUrlsResult, type QueryCtx, type QueryExecuteOptions, type QueryExecuteResult, type QueryExecutor, type QueryResult, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, type ResolvedQuery, type Row, type RowAccumulator, type RowAccumulatorOptions, type RunSQLOptions, SCHEMAS, type SchedulePolicy, type ScheduleState, type SearchType, type StorageEngine, type SyncState, type SyncStateDetail, type SyncStateFilter, type SyncStateKind, type SyncStateScope, type SyncTableName, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, type TableName, type TableSchema, type TableTier, type TenantCtx, type TieredTableName, WEIGHT_PRIORITY, type Watermark, type WatermarkFilter, type WatermarkScope, type WriteCtx, type WriteResult, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, fixedPolicy, formatLiteral, gcOrphansImpl, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, keywords, objectKey, page_keywords, pages, parseEnabledSearchTypes, rebuildDailyFromHourly, resolveToSQL, sitemapPolicy, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
package/dist/index.mjs CHANGED
@@ -1,11 +1,12 @@
1
1
  import { n as coerceRows, t as coerceRow } from "./_chunks/coerce.mjs";
2
- import { a as inferTable, c as countries, d as keywords, f as page_keywords, i as dimensionToColumn, l as devices, n as allTables, p as pages, r as currentSchemaVersion, s as TABLE_METADATA, t as SCHEMAS, u as drizzleSchema } from "./_chunks/schema.mjs";
3
- import { i as inferSearchType, n as dayPartition, r as inferLegacyTier, s as objectKey, t as DEFAULT_SEARCH_TYPE } from "./_chunks/storage.mjs";
2
+ import { a as inferTable, c as countries, d as hourly_pages, f as keywords, i as dimensionToColumn, l as devices, m as pages, n as allTables, p as page_keywords, r as currentSchemaVersion, s as TABLE_METADATA, t as SCHEMAS, u as drizzleSchema } from "./_chunks/schema.mjs";
3
+ import { a as inferSearchType, c as objectKey, i as inferLegacyTier, n as dayPartition, r as hourPartition, t as DEFAULT_SEARCH_TYPE } from "./_chunks/storage.mjs";
4
4
  import { a as RAW_DAILY_COMPACT_THRESHOLD, c as enumeratePartitions, i as substituteNamedFiles, r as resolveToSQL, s as countRawDailies, t as FILES_PLACEHOLDER } from "./_chunks/compiler.mjs";
5
5
  import { bindLiterals, formatLiteral } from "./sql-bind.mjs";
6
- import { a as createDuckDBExecutor, i as createDuckDBCodec, n as createStorageEngine, r as canonicalEmptyParquetSchema, t as MAX_DAY_BYTES } from "./_chunks/engine.mjs";
6
+ import { a as createDuckDBCodec, i as canonicalEmptyParquetSchema, n as createStorageEngine, o as createDuckDBExecutor, r as gcOrphansImpl, t as MAX_DAY_BYTES } from "./_chunks/engine.mjs";
7
7
  import { createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
8
8
  import "./planner.mjs";
9
+ import { rebuildDailyFromHourly } from "./rollups.mjs";
9
10
  import { fixedPolicy, inspectionPolicy, sitemapPolicy } from "./schedule.mjs";
10
11
  const NOOP_RESULT = {
11
12
  flushed: 0,
@@ -37,7 +38,7 @@ function createIngestAccumulator(opts) {
37
38
  const acc = createRowAccumulator(accOpts);
38
39
  async function writeOne(table, date, rows) {
39
40
  const scope = scopeOf(ctx, table, date);
40
- return engine.writeDay(scope, rows).then(() => engine.setSyncState(scope, "done")).then(async () => {
41
+ return (ctx.grain === "hour" ? engine.writeHour ?? (() => Promise.reject(/* @__PURE__ */ new Error("ingest accumulator: grain=hour requires engine.writeHour"))) : engine.writeDay)(scope, rows).then(() => engine.setSyncState(scope, "done")).then(async () => {
41
42
  await hooks.onWritten?.({
42
43
  table,
43
44
  date,
@@ -214,4 +215,4 @@ const MIN_SYNC_IMPRESSIONS = 1;
214
215
  const MIN_COUNTRY_IMPRESSIONS = 10;
215
216
  const MAX_SITEMAP_URLS_PER_SITE = 5e4;
216
217
  const MAX_TRACKED_URLS_PER_SITE = 2e5;
217
- export { DEFAULT_SEARCH_TYPE, FILES_PLACEHOLDER, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, SCHEMAS, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, WEIGHT_PRIORITY, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, fixedPolicy, formatLiteral, getDateWeight, getTableTier, getTablesForTier, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, keywords, objectKey, page_keywords, pages, parseEnabledSearchTypes, resolveToSQL, sitemapPolicy, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
218
+ export { DEFAULT_SEARCH_TYPE, FILES_PLACEHOLDER, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, SCHEMAS, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, WEIGHT_PRIORITY, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, fixedPolicy, formatLiteral, gcOrphansImpl, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, keywords, objectKey, page_keywords, pages, parseEnabledSearchTypes, rebuildDailyFromHourly, resolveToSQL, sitemapPolicy, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
package/dist/ingest.d.mts CHANGED
@@ -1,4 +1,4 @@
1
- import { S as Row, j as TableName } from "./_chunks/storage.mjs";
1
+ import { C as Row, M as TableName } from "./_chunks/storage.mjs";
2
2
  /**
3
3
  * Canonical GSC API dimension order per table. Consumers hitting the raw
4
4
  * `searchanalytics.query` endpoint must request dimensions in this order so
package/dist/ingest.mjs CHANGED
@@ -8,7 +8,8 @@ const TABLE_DIMS = {
8
8
  "query",
9
9
  "date"
10
10
  ],
11
- search_appearance: ["searchAppearance", "date"]
11
+ search_appearance: ["searchAppearance", "date"],
12
+ hourly_pages: ["hour", "page"]
12
13
  };
13
14
  function toPath(gscUrl) {
14
15
  try {
@@ -80,6 +81,21 @@ function transformGscRow(table, apiRow, options = {}) {
80
81
  }
81
82
  };
82
83
  }
84
+ if (table === "hourly_pages") {
85
+ const hour = String(keys[0] ?? "");
86
+ const date = hour.slice(0, 10);
87
+ return {
88
+ date,
89
+ row: {
90
+ url: toPath(String(keys[1] ?? "")),
91
+ hour,
92
+ date,
93
+ clicks,
94
+ impressions,
95
+ sum_position
96
+ }
97
+ };
98
+ }
83
99
  if (table === "search_appearance") {
84
100
  const date = String(keys[1] ?? "");
85
101
  return {
@@ -1,3 +1,3 @@
1
- import { G as enumeratePartitions } from "./_chunks/storage.mjs";
1
+ import { q as enumeratePartitions } from "./_chunks/storage.mjs";
2
2
  import { a as substituteNamedFiles, i as resolveToSQL, n as ResolvedQuery, r as compileLogicalQueryPlan, t as FILES_PLACEHOLDER } from "./_chunks/planner.mjs";
3
- export { FILES_PLACEHOLDER, ResolvedQuery, compileLogicalQueryPlan, enumeratePartitions, resolveToSQL, substituteNamedFiles };
3
+ export { FILES_PLACEHOLDER, type ResolvedQuery, compileLogicalQueryPlan, enumeratePartitions, resolveToSQL, substituteNamedFiles };
@@ -1,8 +1,8 @@
1
- import { j as TableName$1, w as SearchType$1 } from "../_chunks/storage.mjs";
1
+ import { M as TableName$1, T as SearchType$1 } from "../_chunks/storage.mjs";
2
2
  import { a as ResolvedSQLOptimized, i as ResolvedSQL, n as ExtraQuery, o as ResolverAdapter, r as ResolvedComparisonSQL, s as ResolverOptions, t as ComparisonFilter } from "../_chunks/types.mjs";
3
3
  import { LogicalDataset, LogicalDataset as LogicalDataset$1, PlannerCapabilities } from "gscdump/query/plan";
4
4
  import { SQL } from "drizzle-orm";
5
- import { TableName } from "@gscdump/contracts";
5
+ import { Grain, TableName } from "@gscdump/contracts";
6
6
  import { BuilderState, Dimension, FilterInput, InternalFilter, Metric } from "gscdump/query";
7
7
  type DimensionSurface = 'api' | 'stored' | 'derived';
8
8
  interface DimensionBinding {
@@ -102,6 +102,14 @@ interface RunQueryCtx {
102
102
  siteId: string;
103
103
  table: TableName$1;
104
104
  searchType?: SearchType$1;
105
+ /**
106
+ * Temporal granularity. `'day'` (default) drives `enumeratePartitions` to
107
+ * emit `daily/{date}` only; hourly partitions are skipped by construction.
108
+ * `'hour'` is reserved for hourly read paths and must use the dedicated
109
+ * hourly query surface (callers pass `partitions: [hourly/{date}]`
110
+ * directly through `runSQL`).
111
+ */
112
+ grain?: Grain;
105
113
  }
106
114
  interface RunSQLFn {
107
115
  (opts: {
@@ -1,8 +1,7 @@
1
- import { i as DataSource, o as FileSetRef } from "./_chunks/storage.mjs";
1
+ import { C as Row$1, i as DataSource, o as FileSetRef } from "./_chunks/storage.mjs";
2
2
  import { t as ColumnDef } from "./_chunks/schema.mjs";
3
3
  import { TenantCtx } from "@gscdump/contracts";
4
4
  import { SearchType } from "gscdump/query";
5
- import * as _$_gscdump_engine_contracts0 from "@gscdump/engine/contracts";
6
5
  interface RollupCtx extends TenantCtx {
7
6
  /** When the rollup was built. Stamped into payload + filename. */
8
7
  builtAt: number;
@@ -15,7 +14,7 @@ interface RollupEngine {
15
14
  runSQL: (opts: {
16
15
  ctx: TenantCtx;
17
16
  fileSets: Record<string, FileSetRef>;
18
- table?: _$_gscdump_engine_contracts0.TableName;
17
+ table?: import('@gscdump/engine/contracts').TableName;
19
18
  sql: string;
20
19
  params?: unknown[];
21
20
  /**
@@ -26,7 +25,7 @@ interface RollupEngine {
26
25
  */
27
26
  searchType?: SearchType;
28
27
  }) => Promise<{
29
- rows: _$_gscdump_engine_contracts0.Row[];
28
+ rows: import('@gscdump/engine/contracts').Row[];
30
29
  }>;
31
30
  }
32
31
  /**
@@ -235,5 +234,37 @@ declare const sitemapHealthRollup: RollupDef;
235
234
  * exist on the site.
236
235
  */
237
236
  declare const sitemapChanges28dRollup: RollupDef;
237
+ /**
238
+ * Aggregate one day's `hourly_pages` partition into the daily `pages` shape
239
+ * and write it to the daily Discover partition. After this runs for date D,
240
+ * the daily query path serves D from `pages/.../daily/D` and the `hourly/D`
241
+ * partition becomes read-only / GC-only.
242
+ *
243
+ * `(position - 1)` weighting matches the storage convention encoded by
244
+ * `toSumPosition`: `sum_position = SUM((position - 1) * impressions)`, so a
245
+ * downstream `SUM(sum_position) / SUM(impressions) + 1` recovers the mean.
246
+ *
247
+ * searchType-scoped: only call with `searchType: 'discover'`. The hourly
248
+ * partition lives under `hourly_pages` and the output lands under `pages` so
249
+ * existing dashboard queries (which read `pages`) see the rolled-up day
250
+ * transparently.
251
+ */
252
+ interface RebuildDailyFromHourlyOptions {
253
+ engine: RollupEngine & {
254
+ writeDay: (scope: TenantCtx & {
255
+ table: TableTypeName;
256
+ date: string;
257
+ searchType?: SearchType;
258
+ }, rows: Row$1[]) => Promise<void>;
259
+ };
260
+ ctx: TenantCtx;
261
+ /** PT calendar day to roll up. */
262
+ date: string;
263
+ searchType: 'discover';
264
+ }
265
+ type TableTypeName = import('@gscdump/contracts').TableName;
266
+ declare function rebuildDailyFromHourly(opts: RebuildDailyFromHourlyOptions): Promise<{
267
+ rowsWritten: number;
268
+ }>;
238
269
  declare const DEFAULT_ROLLUPS: readonly RollupDef[];
239
- export { DEFAULT_ROLLUPS, ParquetRollupPointer, RebuildRollupResult, RebuildRollupsOptions, RollupBucket, RollupCtx, RollupDef, RollupEngine, RollupEnvelope, dailyTotalsRollup, indexPercentRollup, indexingHealthRollup, indexingMetadataRollup, readLatestRollup, rebuildRollups, rollupKey, rollupParquetKey, sitemapChanges28dRollup, sitemapHealthRollup, topCountries28dRollup, topKeywords28dParquetRollup, topKeywords28dRollup, topPages28dRollup, weeklyTotalsRollup };
270
+ export { DEFAULT_ROLLUPS, ParquetRollupPointer, RebuildDailyFromHourlyOptions, RebuildRollupResult, RebuildRollupsOptions, RollupBucket, RollupCtx, RollupDef, RollupEngine, RollupEnvelope, dailyTotalsRollup, indexPercentRollup, indexingHealthRollup, indexingMetadataRollup, readLatestRollup, rebuildDailyFromHourly, rebuildRollups, rollupKey, rollupParquetKey, sitemapChanges28dRollup, sitemapHealthRollup, topCountries28dRollup, topKeywords28dParquetRollup, topKeywords28dRollup, topPages28dRollup, weeklyTotalsRollup };
package/dist/rollups.mjs CHANGED
@@ -577,6 +577,43 @@ const sitemapChanges28dRollup = {
577
577
  };
578
578
  }
579
579
  };
580
+ async function rebuildDailyFromHourly(opts) {
581
+ const { engine, ctx, date, searchType } = opts;
582
+ const rows = (await engine.runSQL({
583
+ ctx,
584
+ table: "hourly_pages",
585
+ fileSets: { FILES: {
586
+ table: "hourly_pages",
587
+ partitions: [`hourly/${date}`]
588
+ } },
589
+ searchType,
590
+ sql: `
591
+ SELECT
592
+ url,
593
+ DATE '${date}' AS date,
594
+ SUM(clicks)::BIGINT AS clicks,
595
+ SUM(impressions)::BIGINT AS impressions,
596
+ SUM(sum_position)::DOUBLE AS sum_position
597
+ FROM read_parquet({{FILES}}, union_by_name = true)
598
+ WHERE date = '${date}'
599
+ GROUP BY url
600
+ `
601
+ })).rows.map((r) => ({
602
+ url: r.url,
603
+ date,
604
+ clicks: Number(r.clicks),
605
+ impressions: Number(r.impressions),
606
+ sum_position: Number(r.sum_position)
607
+ }));
608
+ await engine.writeDay({
609
+ userId: ctx.userId,
610
+ siteId: ctx.siteId,
611
+ table: "pages",
612
+ date,
613
+ searchType
614
+ }, rows);
615
+ return { rowsWritten: rows.length };
616
+ }
580
617
  const DEFAULT_ROLLUPS = [
581
618
  dailyTotalsRollup,
582
619
  weeklyTotalsRollup,
@@ -589,4 +626,4 @@ const DEFAULT_ROLLUPS = [
589
626
  sitemapHealthRollup,
590
627
  sitemapChanges28dRollup
591
628
  ];
592
- export { DEFAULT_ROLLUPS, dailyTotalsRollup, indexPercentRollup, indexingHealthRollup, indexingMetadataRollup, readLatestRollup, rebuildRollups, rollupKey, rollupParquetKey, sitemapChanges28dRollup, sitemapHealthRollup, topCountries28dRollup, topKeywords28dParquetRollup, topKeywords28dRollup, topPages28dRollup, weeklyTotalsRollup };
629
+ export { DEFAULT_ROLLUPS, dailyTotalsRollup, indexPercentRollup, indexingHealthRollup, indexingMetadataRollup, readLatestRollup, rebuildDailyFromHourly, rebuildRollups, rollupKey, rollupParquetKey, sitemapChanges28dRollup, sitemapHealthRollup, topCountries28dRollup, topKeywords28dParquetRollup, topKeywords28dRollup, topPages28dRollup, weeklyTotalsRollup };
package/dist/schema.d.mts CHANGED
@@ -1,2 +1,2 @@
1
- import { _ as pages, a as allTables, c as inferTable, d as TABLE_METADATA, f as countries, g as page_keywords, h as keywords, i as TableSchema, l as schemaFor, m as drizzleSchema, n as ColumnType, o as currentSchemaVersion, p as devices, r as SCHEMAS, s as dimensionToColumn, t as ColumnDef, u as DrizzleSchema, v as search_appearance } from "./_chunks/schema.mjs";
2
- export { ColumnDef, ColumnType, DrizzleSchema, SCHEMAS, TABLE_METADATA, TableSchema, allTables, countries, currentSchemaVersion, devices, dimensionToColumn, drizzleSchema, inferTable, keywords, page_keywords, pages, schemaFor, search_appearance };
1
+ import { _ as page_keywords, a as allTables, c as inferTable, d as TABLE_METADATA, f as countries, g as keywords, h as hourly_pages, i as TableSchema, l as schemaFor, m as drizzleSchema, n as ColumnType, o as currentSchemaVersion, p as devices, r as SCHEMAS, s as dimensionToColumn, t as ColumnDef, u as DrizzleSchema, v as pages, y as search_appearance } from "./_chunks/schema.mjs";
2
+ export { type ColumnDef, type ColumnType, type DrizzleSchema, SCHEMAS, TABLE_METADATA, type TableSchema, allTables, countries, currentSchemaVersion, devices, dimensionToColumn, drizzleSchema, hourly_pages, inferTable, keywords, page_keywords, pages, schemaFor, search_appearance };
package/dist/schema.mjs CHANGED
@@ -1,2 +1,2 @@
1
- import { a as inferTable, c as countries, d as keywords, f as page_keywords, i as dimensionToColumn, l as devices, m as search_appearance, n as allTables, o as schemaFor, p as pages, r as currentSchemaVersion, s as TABLE_METADATA, t as SCHEMAS, u as drizzleSchema } from "./_chunks/schema.mjs";
2
- export { SCHEMAS, TABLE_METADATA, allTables, countries, currentSchemaVersion, devices, dimensionToColumn, drizzleSchema, inferTable, keywords, page_keywords, pages, schemaFor, search_appearance };
1
+ import { a as inferTable, c as countries, d as hourly_pages, f as keywords, h as search_appearance, i as dimensionToColumn, l as devices, m as pages, n as allTables, o as schemaFor, p as page_keywords, r as currentSchemaVersion, s as TABLE_METADATA, t as SCHEMAS, u as drizzleSchema } from "./_chunks/schema.mjs";
2
+ export { SCHEMAS, TABLE_METADATA, allTables, countries, currentSchemaVersion, devices, dimensionToColumn, drizzleSchema, hourly_pages, inferTable, keywords, page_keywords, pages, schemaFor, search_appearance };
package/dist/scope.d.mts CHANGED
@@ -16,6 +16,12 @@ interface ScopedRunnerOptions {
16
16
  startDate?: string;
17
17
  /** Inclusive upper bound for `date`. Ignored if `window` is supplied. */
18
18
  endDate?: string;
19
+ /**
20
+ * Temporal granularity. `'day'` (default) filters on `table.date`. `'hour'`
21
+ * filters on `table.hour` when the table exposes that column (e.g.
22
+ * `hourly_pages`); falls back to date filtering otherwise.
23
+ */
24
+ grain?: 'day' | 'hour';
19
25
  }
20
26
  interface TableScope {
21
27
  wherePredicates: SQL[];
package/dist/scope.mjs CHANGED
@@ -2,11 +2,13 @@ import { and, eq, gte, lte } from "drizzle-orm";
2
2
  function buildTableScope(table, opts) {
3
3
  const predicates = [];
4
4
  if (opts.siteId && "site_id" in table) predicates.push(eq(table.site_id, opts.siteId));
5
- if ("date" in table) {
5
+ const useHour = (opts.grain ?? "day") === "hour" && "hour" in table;
6
+ const filterCol = useHour ? table.hour : table.date;
7
+ if ("date" in table || useHour) {
6
8
  const start = opts.window?.start ?? opts.startDate;
7
9
  const end = opts.window?.end ?? opts.endDate;
8
- if (start) predicates.push(gte(table.date, start));
9
- if (end) predicates.push(lte(table.date, end));
10
+ if (start) predicates.push(gte(filterCol, start));
11
+ if (end) predicates.push(lte(filterCol, end));
10
12
  }
11
13
  return {
12
14
  wherePredicates: predicates,
@@ -1,4 +1,4 @@
1
- import { M as TenantCtx, S as Row, T as StorageEngine, w as SearchType$1 } from "../_chunks/storage.mjs";
1
+ import { C as Row, E as StorageEngine, N as TenantCtx, T as SearchType$1 } from "../_chunks/storage.mjs";
2
2
  import { n as AnalysisResult, t as AnalysisParams } from "../_chunks/analysis-types.mjs";
3
3
  import { o as ResolverAdapter } from "../_chunks/types.mjs";
4
4
  import { C as ExecuteSqlOptions, E as SourceCapabilities, S as AnalysisSourceKind, T as QueryRow, t as AnalyzerRegistry, w as FileSet, x as AnalysisQuerySource } from "../_chunks/registry.mjs";
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@gscdump/engine",
3
3
  "type": "module",
4
- "version": "0.17.5",
4
+ "version": "0.18.1",
5
5
  "description": "Append-only Parquet/DuckDB storage engine + planner + adapters for the gscdump pipeline. Node + edge runtimes; opt-in heavy peers.",
6
6
  "author": {
7
7
  "name": "Harlan Wilton",
@@ -169,8 +169,8 @@
169
169
  "dependencies": {
170
170
  "drizzle-orm": "^0.45.2",
171
171
  "proper-lockfile": "^4.1.2",
172
- "@gscdump/contracts": "0.17.5",
173
- "gscdump": "0.17.5"
172
+ "gscdump": "0.18.1",
173
+ "@gscdump/contracts": "0.18.1"
174
174
  },
175
175
  "devDependencies": {
176
176
  "@duckdb/duckdb-wasm": "^1.32.0",