@gscdump/engine 0.21.3 → 0.22.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_chunks/analysis-types.d.mts +1 -1
- package/dist/_chunks/coerce.mjs +1 -1
- package/dist/_chunks/dispatch.mjs +1 -1
- package/dist/_chunks/duckdb.d.mts +2 -2
- package/dist/_chunks/engine.mjs +4 -4
- package/dist/_chunks/iceberg-schema.mjs +7 -3
- package/dist/_chunks/index.d.mts +2 -2
- package/dist/_chunks/parquet-plan.mjs +3 -3
- package/dist/_chunks/planner.d.mts +2 -2
- package/dist/_chunks/registry.d.mts +4 -4
- package/dist/_chunks/resolver.mjs +60 -3
- package/dist/_chunks/schema.d.mts +1067 -275
- package/dist/_chunks/schema.mjs +70 -2
- package/dist/_chunks/sink.d.mts +49 -10
- package/dist/_chunks/snapshot.d.mts +1 -1
- package/dist/_chunks/storage.d.mts +1 -1
- package/dist/_chunks/storage.mjs +1 -1
- package/dist/_chunks/types.d.mts +1 -1
- package/dist/adapters/duckdb-node.d.mts +1 -1
- package/dist/adapters/filesystem.d.mts +1 -1
- package/dist/adapters/filesystem.mjs +1 -1
- package/dist/adapters/hyparquet.d.mts +2 -2
- package/dist/adapters/hyparquet.mjs +1 -1
- package/dist/adapters/node.d.mts +2 -2
- package/dist/adapters/node.mjs +1 -1
- package/dist/adapters/r2-manifest.d.mts +1 -1
- package/dist/adapters/r2-manifest.mjs +1 -1
- package/dist/adapters/r2.d.mts +1 -1
- package/dist/analysis-types.d.mts +1 -1
- package/dist/analyzer/index.d.mts +2 -2
- package/dist/analyzer/index.mjs +1 -1
- package/dist/contracts.d.mts +1 -1
- package/dist/entities.d.mts +1 -1
- package/dist/index.d.mts +6 -6
- package/dist/index.mjs +64 -8
- package/dist/ingest.d.mts +5 -1
- package/dist/ingest.mjs +57 -1
- package/dist/period/index.d.mts +1 -1
- package/dist/planner.d.mts +2 -2
- package/dist/planner.mjs +1 -1
- package/dist/report/index.d.mts +2 -2
- package/dist/resolver/index.d.mts +2 -2
- package/dist/resolver/index.mjs +1 -1
- package/dist/rollups.d.mts +2 -2
- package/dist/schema.d.mts +2 -2
- package/dist/schema.mjs +2 -2
- package/dist/sink-node.d.mts +1 -1
- package/dist/sink-node.mjs +1 -1
- package/dist/snapshot.d.mts +1 -1
- package/dist/source/index.d.mts +4 -4
- package/dist/source/index.mjs +3 -3
- package/package.json +8 -8
package/dist/_chunks/schema.mjs
CHANGED
|
@@ -51,6 +51,27 @@ const search_appearance = pgTable("search_appearance", {
|
|
|
51
51
|
date: dateCol(),
|
|
52
52
|
...metricCols()
|
|
53
53
|
});
|
|
54
|
+
const search_appearance_pages = pgTable("search_appearance_pages", {
|
|
55
|
+
searchAppearance: varchar("searchAppearance").notNull(),
|
|
56
|
+
url: varchar("url").notNull(),
|
|
57
|
+
date: dateCol(),
|
|
58
|
+
...metricCols()
|
|
59
|
+
});
|
|
60
|
+
const search_appearance_queries = pgTable("search_appearance_queries", {
|
|
61
|
+
searchAppearance: varchar("searchAppearance").notNull(),
|
|
62
|
+
query: varchar("query").notNull(),
|
|
63
|
+
query_canonical: varchar("query_canonical"),
|
|
64
|
+
date: dateCol(),
|
|
65
|
+
...metricCols()
|
|
66
|
+
});
|
|
67
|
+
const search_appearance_page_queries = pgTable("search_appearance_page_queries", {
|
|
68
|
+
searchAppearance: varchar("searchAppearance").notNull(),
|
|
69
|
+
url: varchar("url").notNull(),
|
|
70
|
+
query: varchar("query").notNull(),
|
|
71
|
+
query_canonical: varchar("query_canonical"),
|
|
72
|
+
date: dateCol(),
|
|
73
|
+
...metricCols()
|
|
74
|
+
});
|
|
54
75
|
const hourly_pages = pgTable("hourly_pages", {
|
|
55
76
|
url: varchar("url").notNull(),
|
|
56
77
|
hour: varchar("hour").notNull(),
|
|
@@ -64,6 +85,9 @@ const drizzleSchema = {
|
|
|
64
85
|
page_queries,
|
|
65
86
|
dates,
|
|
66
87
|
search_appearance,
|
|
88
|
+
search_appearance_pages,
|
|
89
|
+
search_appearance_queries,
|
|
90
|
+
search_appearance_page_queries,
|
|
67
91
|
hourly_pages
|
|
68
92
|
};
|
|
69
93
|
const TABLE_METADATA = {
|
|
@@ -105,6 +129,47 @@ const TABLE_METADATA = {
|
|
|
105
129
|
clusterKey: ["searchAppearance", "date"],
|
|
106
130
|
version: 1
|
|
107
131
|
},
|
|
132
|
+
search_appearance_pages: {
|
|
133
|
+
sortKey: [
|
|
134
|
+
"date",
|
|
135
|
+
"searchAppearance",
|
|
136
|
+
"url"
|
|
137
|
+
],
|
|
138
|
+
clusterKey: [
|
|
139
|
+
"searchAppearance",
|
|
140
|
+
"url",
|
|
141
|
+
"date"
|
|
142
|
+
],
|
|
143
|
+
version: 1
|
|
144
|
+
},
|
|
145
|
+
search_appearance_queries: {
|
|
146
|
+
sortKey: [
|
|
147
|
+
"date",
|
|
148
|
+
"searchAppearance",
|
|
149
|
+
"query"
|
|
150
|
+
],
|
|
151
|
+
clusterKey: [
|
|
152
|
+
"searchAppearance",
|
|
153
|
+
"query",
|
|
154
|
+
"date"
|
|
155
|
+
],
|
|
156
|
+
version: 1
|
|
157
|
+
},
|
|
158
|
+
search_appearance_page_queries: {
|
|
159
|
+
sortKey: [
|
|
160
|
+
"date",
|
|
161
|
+
"searchAppearance",
|
|
162
|
+
"url",
|
|
163
|
+
"query"
|
|
164
|
+
],
|
|
165
|
+
clusterKey: [
|
|
166
|
+
"searchAppearance",
|
|
167
|
+
"url",
|
|
168
|
+
"query",
|
|
169
|
+
"date"
|
|
170
|
+
],
|
|
171
|
+
version: 1
|
|
172
|
+
},
|
|
108
173
|
hourly_pages: {
|
|
109
174
|
sortKey: [
|
|
110
175
|
"date",
|
|
@@ -149,6 +214,9 @@ const METRIC_TABLES = [
|
|
|
149
214
|
"page_queries",
|
|
150
215
|
"dates",
|
|
151
216
|
"search_appearance",
|
|
217
|
+
"search_appearance_pages",
|
|
218
|
+
"search_appearance_queries",
|
|
219
|
+
"search_appearance_page_queries",
|
|
152
220
|
"hourly_pages"
|
|
153
221
|
];
|
|
154
222
|
const SCHEMAS = Object.fromEntries(METRIC_TABLES.map((t) => [t, tableSchemaFrom(t)]));
|
|
@@ -165,12 +233,12 @@ function inferTable(dimensions) {
|
|
|
165
233
|
const dims = new Set(dimensions);
|
|
166
234
|
const hasPage = dims.has("page");
|
|
167
235
|
const hasQuery = dims.has("query");
|
|
236
|
+
if (dims.has("searchAppearance")) return hasPage && hasQuery ? "search_appearance_page_queries" : hasPage ? "search_appearance_pages" : hasQuery ? "search_appearance_queries" : "search_appearance";
|
|
168
237
|
if (hasPage && hasQuery) return "page_queries";
|
|
169
238
|
if (hasQuery) return "queries";
|
|
170
239
|
if (hasPage) return "pages";
|
|
171
240
|
if (dims.has("country")) return "countries";
|
|
172
241
|
if (dims.has("device")) return "dates";
|
|
173
|
-
if (dims.has("searchAppearance")) return "search_appearance";
|
|
174
242
|
return "dates";
|
|
175
243
|
}
|
|
176
244
|
function naturalKeyColumns(table) {
|
|
@@ -192,4 +260,4 @@ function dimensionToColumn(dim, _table) {
|
|
|
192
260
|
if (dim === "queryCanonical") return "query_canonical";
|
|
193
261
|
return dim;
|
|
194
262
|
}
|
|
195
|
-
export {
|
|
263
|
+
export { SCHEMAS, TABLE_METADATA, allTables, countries, currentSchemaVersion, dates, dedupeByNaturalKey, dimensionToColumn, drizzleSchema, hourly_pages, inferTable, naturalKeyColumns, page_queries, pages, queries, schemaFor, search_appearance, search_appearance_page_queries, search_appearance_pages, search_appearance_queries };
|
package/dist/_chunks/sink.d.mts
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { Row as Row$1, SearchType, TenantCtx as TenantCtx$1 } from "./storage.mjs";
|
|
2
2
|
import { icebergAppend, restCatalogConnect, s3SignedResolver } from "icebird";
|
|
3
3
|
import { TableName } from "@gscdump/contracts";
|
|
4
|
-
/** The
|
|
5
|
-
type IcebergTableName = Extract<TableName, 'pages' | 'queries' | 'countries' | 'page_queries' | 'dates'>;
|
|
6
|
-
/** The
|
|
4
|
+
/** The 6 fact tables that exist as global Iceberg tables. */
|
|
5
|
+
type IcebergTableName = Extract<TableName, 'pages' | 'queries' | 'countries' | 'page_queries' | 'dates' | 'search_appearance' | 'search_appearance_pages' | 'search_appearance_queries' | 'search_appearance_page_queries'>;
|
|
6
|
+
/** The 6 Iceberg table names, in canonical order. */
|
|
7
7
|
declare const ICEBERG_TABLES: readonly IcebergTableName[];
|
|
8
8
|
/**
|
|
9
9
|
* Iceberg-native column type. Superset-mapped from the engine `ColumnType`;
|
|
@@ -216,6 +216,43 @@ declare function ensureIcebergNamespace(conn: IcebergConnection): Promise<void>;
|
|
|
216
216
|
declare function createIcebergTables(conn: IcebergConnection, tables?: readonly IcebergTableName[]): Promise<IcebergTableOpResult[]>;
|
|
217
217
|
/** List the table names currently in the catalog namespace. */
|
|
218
218
|
declare function listIcebergTables(conn: IcebergConnection): Promise<string[]>;
|
|
219
|
+
/** A data file in the current snapshot's manifest, scoped to one partition. */
|
|
220
|
+
interface IcebergListedDataFile {
|
|
221
|
+
/** Raw Iceberg `data_file.file_path` (e.g. `s3://gscdump-analytics/.../x.parquet`). */
|
|
222
|
+
filePath: string;
|
|
223
|
+
/** Object key relative to the warehouse bucket (the part after `s3://<bucket>/`). */
|
|
224
|
+
objectKey: string;
|
|
225
|
+
bytes: number;
|
|
226
|
+
rowCount: number;
|
|
227
|
+
}
|
|
228
|
+
interface ListIcebergDataFilesOptions {
|
|
229
|
+
table: IcebergTableName;
|
|
230
|
+
/** Partition identity column. */
|
|
231
|
+
siteId: string;
|
|
232
|
+
/** Partition identity column. */
|
|
233
|
+
searchType: string;
|
|
234
|
+
/**
|
|
235
|
+
* Inclusive date range. Every month touched by `[start, end]` is scanned;
|
|
236
|
+
* `month(date)` is the third partition transform.
|
|
237
|
+
*/
|
|
238
|
+
range: {
|
|
239
|
+
start: string;
|
|
240
|
+
end: string;
|
|
241
|
+
};
|
|
242
|
+
}
|
|
243
|
+
/**
|
|
244
|
+
* List the parquet data files in the current snapshot of `table`, filtered
|
|
245
|
+
* to a single partition slice `(siteId, searchType, month(date) ∈ range)`.
|
|
246
|
+
*
|
|
247
|
+
* Cost: 1 REST `loadTable` + N manifest fetches (typically 1–10 small Avro
|
|
248
|
+
* files). Iceberg returns the manifest list embedded in `metadata`, so a
|
|
249
|
+
* cached `metadata` would let callers skip the REST call entirely.
|
|
250
|
+
*
|
|
251
|
+
* Skips deleted entries (status=2) and non-data file types (delete files).
|
|
252
|
+
* Returns object keys + bytes + rowCount so the caller can build presigned
|
|
253
|
+
* URLs without re-walking the catalog.
|
|
254
|
+
*/
|
|
255
|
+
declare function listIcebergDataFiles(conn: IcebergConnection, opts: ListIcebergDataFilesOptions): Promise<IcebergListedDataFile[]>;
|
|
219
256
|
/**
|
|
220
257
|
* Drop tables from the catalog namespace, purging their data objects.
|
|
221
258
|
* Defaults to every table currently in the namespace — used to clear the
|
|
@@ -245,13 +282,15 @@ interface SinkWriteResult {
|
|
|
245
282
|
bytes?: number;
|
|
246
283
|
}
|
|
247
284
|
/**
|
|
248
|
-
* Static description of a sink.
|
|
249
|
-
* stability-cutoff model
|
|
250
|
-
*
|
|
285
|
+
* Static description of a sink. Production ingestion is append-only under the
|
|
286
|
+
* v5 stability-cutoff model. Test and revision-path adapters may expose an
|
|
287
|
+
* overwrite capability explicitly.
|
|
251
288
|
*/
|
|
252
289
|
interface SinkCapabilities {
|
|
253
|
-
/**
|
|
254
|
-
appendOnly:
|
|
290
|
+
/** When true, re-emitting a slice accumulates duplicate rows. */
|
|
291
|
+
appendOnly: boolean;
|
|
292
|
+
/** Whether the sink exposes partition overwrite semantics. */
|
|
293
|
+
canOverwrite?: boolean;
|
|
255
294
|
}
|
|
256
295
|
/**
|
|
257
296
|
* Outcome of `Sink.close()` — which tables' buffered rows reached durable
|
|
@@ -326,4 +365,4 @@ interface LocalIcebergSinkOptions extends SinkOptions {
|
|
|
326
365
|
/** S3-compatible warehouse location (POC: MinIO). */
|
|
327
366
|
warehouse: string;
|
|
328
367
|
}
|
|
329
|
-
export {
|
|
368
|
+
export { CommitRetryOptions, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, IcebergAppendSinkOptions, IcebergCatalogConfig, IcebergColumn, IcebergColumnType, IcebergConnection, IcebergListedDataFile, IcebergPartitionField, IcebergPartitionSpec, IcebergPartitionSpecField, IcebergPartitionTransform, IcebergPrimitiveType, IcebergS3Config, IcebergSchema, IcebergSchemaField, IcebergTableName, IcebergTableOpResult, IcebergTableSpec, ListIcebergDataFilesOptions, LocalIcebergSinkOptions, Sink, SinkCapabilities, SinkCloseResult, SinkOptions, SinkSlice, SinkWriteResult, connectIcebergCatalog, createIcebergTables, dropIcebergTables, ensureIcebergNamespace, icebergAppendRetrying, icebergPartitionSpecFor, icebergSchemaFor, icebergTableSpec, isCommitRateLimited, listIcebergDataFiles, listIcebergTables };
|
|
@@ -550,4 +550,4 @@ declare function dayPartition(date: string): string;
|
|
|
550
550
|
*/
|
|
551
551
|
declare function hourPartition(date: string): string;
|
|
552
552
|
declare function objectKey(ctx: TenantCtx, table: TableName, partition: string, version: number, searchType?: SearchType): string;
|
|
553
|
-
export {
|
|
553
|
+
export { CodecCtx, CompactionThresholds, CompactionTier, DEFAULT_SEARCH_TYPE, DataSource, EngineOptions, FileSetRef, GcCtx, type Grain$1 as Grain, ListLiveFilter, LockScope, ManifestEntry, ManifestPurgeResult, ManifestStore, ParquetCodec, PurgeFilter, PurgeResult, PurgeUrlsResult, QueryCtx, QueryExecuteOptions, QueryExecuteResult, QueryExecutor, QueryResult, RAW_DAILY_COMPACT_THRESHOLD, type Row$1 as Row, RunSQLOptions, type SearchType$1 as SearchType, StorageEngine, SyncState, SyncStateDetail, SyncStateFilter, SyncStateKind, SyncStateScope, type TableName$1 as TableName, type TenantCtx$1 as TenantCtx, Watermark, WatermarkFilter, WatermarkScope, WriteCtx, WriteResult, countRawDailies, dayPartition, dedupeOverlappingTiers, enumeratePartitions, hourPartition, inferLegacyTier, inferSearchType, objectKey, splitOverlappingTiers };
|
package/dist/_chunks/storage.mjs
CHANGED
|
@@ -39,4 +39,4 @@ function objectKey(ctx, table, partition, version, searchType) {
|
|
|
39
39
|
function tenantPrefix(ctx) {
|
|
40
40
|
return ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/` : `u_${ctx.userId}/`;
|
|
41
41
|
}
|
|
42
|
-
export {
|
|
42
|
+
export { DEFAULT_SEARCH_TYPE, dayPartition, hourPartition, inferLegacyTier, inferSearchType, mondayOfWeek, monthPartition, objectKey, quarterOfMonth, quarterPartition, tenantPrefix, weekPartition };
|
package/dist/_chunks/types.d.mts
CHANGED
|
@@ -51,4 +51,4 @@ interface ExtraQuery {
|
|
|
51
51
|
sql: string;
|
|
52
52
|
params: unknown[];
|
|
53
53
|
}
|
|
54
|
-
export {
|
|
54
|
+
export { ComparisonFilter, ExtraQuery, ResolvedComparisonSQL, ResolvedSQL, ResolvedSQLOptimized, ResolverAdapter, ResolverOptions };
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { inferLegacyTier, inferSearchType } from "../_chunks/storage.mjs";
|
|
2
2
|
import { dirname, join, resolve } from "node:path";
|
|
3
3
|
import { Buffer } from "node:buffer";
|
|
4
4
|
import { randomBytes } from "node:crypto";
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
1
|
+
import { CodecCtx, DataSource, ParquetCodec, Row, TableName } from "../_chunks/storage.mjs";
|
|
2
|
+
import { ColumnDef } from "../_chunks/schema.mjs";
|
|
3
3
|
import { ParquetQueryFilter } from "hyparquet";
|
|
4
4
|
declare function encodeRowsToParquet(table: TableName, rows: readonly Row[]): Uint8Array;
|
|
5
5
|
interface EncodeFlexOptions {
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { SCHEMAS, TABLE_METADATA, dedupeByNaturalKey } from "../_chunks/schema.mjs";
|
|
2
2
|
import { parquetReadObjects } from "hyparquet";
|
|
3
3
|
import { parquetWriteBuffer } from "hyparquet-writer";
|
|
4
4
|
const ROW_GROUP_SIZE = 25e3;
|
package/dist/adapters/node.d.mts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { DataSource, StorageEngine } from "../_chunks/storage.mjs";
|
|
2
2
|
import { NodeDuckDBOptions, createNodeDuckDBHandle, resetNodeDuckDB } from "./duckdb-node.mjs";
|
|
3
|
-
import {
|
|
3
|
+
import { SnapshotIndex } from "../_chunks/snapshot.mjs";
|
|
4
4
|
import { Row, TableName } from "@gscdump/contracts";
|
|
5
5
|
import { SearchType } from "gscdump/query";
|
|
6
6
|
interface NodeHarnessOptions {
|
package/dist/adapters/node.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { createDuckDBCodec, createDuckDBExecutor, createStorageEngine } from "../_chunks/engine.mjs";
|
|
2
2
|
import { createNodeDuckDBHandle, resetNodeDuckDB } from "./duckdb-node.mjs";
|
|
3
3
|
import { createFilesystemDataSource, createFilesystemManifestStore } from "./filesystem.mjs";
|
|
4
4
|
import path from "node:path";
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { inferLegacyTier, inferSearchType } from "../_chunks/storage.mjs";
|
|
2
2
|
const SHARD_RE = /^u_[^/]+\/manifest\/(?<siteId>[^/]+)\/(?<table>[^/]+)\/HEAD$/;
|
|
3
3
|
function defaultSnapshotId() {
|
|
4
4
|
return `${Date.now()}-${Math.random().toString(36).slice(2, 10)}`;
|
package/dist/adapters/r2.d.mts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { AnalysisParams, AnalysisResult, AnalysisTool, num } from "./_chunks/analysis-types.mjs";
|
|
2
2
|
export { AnalysisParams, AnalysisResult, AnalysisTool, num };
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
1
|
+
import { AnalysisParams, AnalysisResult } from "../_chunks/analysis-types.mjs";
|
|
2
|
+
import { AnalysisQuerySource, Analyzer, AnalyzerRegistry, AnalyzerRegistryInit, AnalyzerVariants, BuildContext, DefineAnalyzerOptions, DefinedAnalyzer, Plan, ReduceContext, ReduceCtx, Reducer, RequiredCapability, RowQueriesPlan, SqlExtraQuery, SqlPlan, SqlPlanSpec, TypedRowQuery, createAnalyzerRegistry, defineAnalyzer, requireAdapter } from "../_chunks/registry.mjs";
|
|
3
3
|
declare class AnalyzerCapabilityError extends Error {
|
|
4
4
|
readonly tool: string;
|
|
5
5
|
readonly missing: readonly RequiredCapability[];
|
package/dist/analyzer/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { AnalyzerCapabilityError, runAnalyzerFromSource } from "../_chunks/dispatch.mjs";
|
|
2
2
|
const DEFAULT_SQL_REQUIRES = ["executeSql", "fileSets"];
|
|
3
3
|
function defineAnalyzer(opts) {
|
|
4
4
|
const { id, reduce, reduceSql, reduceRows, buildSql, buildRows, sqlRequires = DEFAULT_SQL_REQUIRES, rowsRequires = [] } = opts;
|
package/dist/contracts.d.mts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { CodecCtx, CompactionTier, DataSource, EngineOptions, FileSetRef, GcCtx, ListLiveFilter, LockScope, ManifestEntry, ManifestStore, ParquetCodec, QueryCtx, QueryExecuteOptions, QueryExecuteResult, QueryExecutor, QueryResult, Row, RunSQLOptions, SearchType, StorageEngine, SyncState, SyncStateDetail, SyncStateFilter, SyncStateKind, SyncStateScope, TableName, TenantCtx, Watermark, WatermarkFilter, WatermarkScope, WriteCtx, WriteResult } from "./_chunks/storage.mjs";
|
|
2
2
|
export type { CodecCtx, CompactionTier, DataSource, EngineOptions, FileSetRef, GcCtx, ListLiveFilter, LockScope, ManifestEntry, ManifestStore, ParquetCodec, QueryCtx, QueryExecuteOptions, QueryExecuteResult, QueryExecutor, QueryResult, Row, RunSQLOptions, SearchType, StorageEngine, SyncState, SyncStateDetail, SyncStateFilter, SyncStateKind, SyncStateScope, TableName, TenantCtx, Watermark, WatermarkFilter, WatermarkScope, WriteCtx, WriteResult };
|
package/dist/entities.d.mts
CHANGED
package/dist/index.d.mts
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
1
|
+
import { CodecCtx, CompactionThresholds, CompactionTier, DEFAULT_SEARCH_TYPE, DataSource, EngineOptions, FileSetRef, GcCtx, Grain, ListLiveFilter, LockScope, ManifestEntry, ManifestPurgeResult, ManifestStore, ParquetCodec, PurgeFilter, PurgeResult, PurgeUrlsResult, QueryCtx, QueryExecuteOptions, QueryExecuteResult, QueryExecutor, QueryResult, RAW_DAILY_COMPACT_THRESHOLD, Row, RunSQLOptions, SearchType, StorageEngine, SyncState, SyncStateDetail, SyncStateFilter, SyncStateKind, SyncStateScope, TableName, TenantCtx, Watermark, WatermarkFilter, WatermarkScope, WriteCtx, WriteResult, countRawDailies, dayPartition, dedupeOverlappingTiers, enumeratePartitions, hourPartition, inferLegacyTier, inferSearchType, objectKey, splitOverlappingTiers } from "./_chunks/storage.mjs";
|
|
2
|
+
import { DuckDBFactory, DuckDBHandle, canonicalEmptyParquetSchema, createDuckDBCodec, createDuckDBExecutor } from "./_chunks/duckdb.mjs";
|
|
3
|
+
import { ColumnDef, ColumnType, DrizzleSchema, SCHEMAS, TABLE_METADATA, TableSchema, allTables, countries, currentSchemaVersion, dates, dimensionToColumn, drizzleSchema, hourly_pages, inferTable, page_queries, pages, queries } from "./_chunks/schema.mjs";
|
|
4
4
|
import { InspectionVerdict, SchedulePolicy, ScheduleState, fixedPolicy, inspectionPolicy, sitemapPolicy } from "./schedule.mjs";
|
|
5
|
-
import {
|
|
5
|
+
import { CommitRetryOptions, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, IcebergAppendSinkOptions, IcebergCatalogConfig, IcebergColumn, IcebergColumnType, IcebergConnection, IcebergListedDataFile, IcebergPartitionField, IcebergPartitionSpec, IcebergPartitionSpecField, IcebergPartitionTransform, IcebergPrimitiveType, IcebergS3Config, IcebergSchema, IcebergSchemaField, IcebergTableName, IcebergTableOpResult, IcebergTableSpec, ListIcebergDataFilesOptions, LocalIcebergSinkOptions, Sink, SinkCapabilities, SinkCloseResult, SinkOptions, SinkSlice, SinkWriteResult, connectIcebergCatalog, createIcebergTables, dropIcebergTables, ensureIcebergNamespace, icebergAppendRetrying, icebergPartitionSpecFor, icebergSchemaFor, icebergTableSpec, isCommitRateLimited, listIcebergDataFiles, listIcebergTables } from "./_chunks/sink.mjs";
|
|
6
6
|
import { GscApiRow, IngestOptions, RowAccumulator, RowAccumulatorOptions, assembleDatesRow, createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
|
|
7
|
-
import {
|
|
7
|
+
import { FILES_PLACEHOLDER, ResolvedQuery, resolveParquetSQL, substituteNamedFiles } from "./_chunks/planner.mjs";
|
|
8
8
|
import { rebuildDailyFromHourly } from "./rollups.mjs";
|
|
9
9
|
import { bindLiterals, formatLiteral } from "./sql-bind.mjs";
|
|
10
10
|
import { Grain as Grain$1, Row as Row$1, TableName as TableName$1 } from "@gscdump/contracts";
|
|
@@ -179,4 +179,4 @@ declare const MIN_SYNC_IMPRESSIONS = 1;
|
|
|
179
179
|
declare const MIN_COUNTRY_IMPRESSIONS = 10;
|
|
180
180
|
declare const MAX_SITEMAP_URLS_PER_SITE = 50000;
|
|
181
181
|
declare const MAX_TRACKED_URLS_PER_SITE = 200000;
|
|
182
|
-
export { type CodecCtx, type ColumnDef, type ColumnType, type CommitRetryOptions, type CompactionThresholds, type CompactionTier, type CreateIngestAccumulatorOptions, DEFAULT_SEARCH_TYPE, type DataSource, type DateWeight, type DrizzleSchema, type DuckDBFactory, type DuckDBHandle, type EngineOptions, FILES_PLACEHOLDER, type FileSetRef, type FinalizeOptions, type FinalizeResult, type GcCtx, type Grain, type GscApiRow, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, type IcebergAppendSink, type IcebergAppendSinkOptions, type IcebergCatalogConfig, type IcebergColumn, type IcebergColumnType, type IcebergConnection, type IcebergPartitionField, type IcebergPartitionSpec, type IcebergPartitionSpecField, type IcebergPartitionTransform, type IcebergPrimitiveType, type IcebergS3Config, type IcebergSchema, type IcebergSchemaField, type IcebergTableName, type IcebergTableOpResult, type IcebergTableSpec, type InMemorySink, type IngestAccumulator, type IngestAccumulatorCtx, type IngestAccumulatorEngine, type IngestAccumulatorHooks, type IngestOptions, type InspectionVerdict, type ListLiveFilter, type LocalIcebergSinkOptions, type LockScope, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, type ManifestEntry, type ManifestPurgeResult, type ManifestStore, type ParquetCodec, type PurgeFilter, type PurgeResult, type PurgeUrlsResult, type QueryCtx, type QueryExecuteOptions, type QueryExecuteResult, type QueryExecutor, type QueryResult, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, type ResolvedQuery, type Row, type RowAccumulator, type RowAccumulatorOptions, type RunSQLOptions, SCHEMAS, type SchedulePolicy, type ScheduleState, type SearchType, type Sink, type SinkCapabilities, type SinkCloseResult, type SinkOptions, type SinkSlice, type SinkWriteResult, type StorageEngine, type StoredRow, type SyncState, type SyncStateDetail, type SyncStateFilter, type SyncStateKind, type SyncStateScope, type SyncTableName, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, type TableName, type TableSchema, type TableTier, type TenantCtx, type TieredTableName, WEIGHT_PRIORITY, type Watermark, type WatermarkFilter, type WatermarkScope, type WriteCtx, type WriteResult, allTables, assembleDatesRow, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, connectIcebergCatalog, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIcebergAppendSink, createIcebergTables, createInMemorySink, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dates, dayPartition, dedupeOverlappingTiers, dimensionToColumn, drizzleSchema, dropIcebergTables, ensureIcebergNamespace, enumeratePartitions, fixedPolicy, formatLiteral, gcOrphansImpl, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, icebergAppendRetrying, icebergPartitionSpecFor, icebergSchemaFor, icebergTableSpec, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, isCommitRateLimited, listIcebergTables, objectKey, page_queries, pages, parseEnabledSearchTypes, queries, rebuildDailyFromHourly, resolveParquetSQL, sitemapPolicy, splitOverlappingTiers, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
|
|
182
|
+
export { type CodecCtx, type ColumnDef, type ColumnType, type CommitRetryOptions, type CompactionThresholds, type CompactionTier, type CreateIngestAccumulatorOptions, DEFAULT_SEARCH_TYPE, type DataSource, type DateWeight, type DrizzleSchema, type DuckDBFactory, type DuckDBHandle, type EngineOptions, FILES_PLACEHOLDER, type FileSetRef, type FinalizeOptions, type FinalizeResult, type GcCtx, type Grain, type GscApiRow, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, type IcebergAppendSink, type IcebergAppendSinkOptions, type IcebergCatalogConfig, type IcebergColumn, type IcebergColumnType, type IcebergConnection, type IcebergListedDataFile, type IcebergPartitionField, type IcebergPartitionSpec, type IcebergPartitionSpecField, type IcebergPartitionTransform, type IcebergPrimitiveType, type IcebergS3Config, type IcebergSchema, type IcebergSchemaField, type IcebergTableName, type IcebergTableOpResult, type IcebergTableSpec, type InMemorySink, type IngestAccumulator, type IngestAccumulatorCtx, type IngestAccumulatorEngine, type IngestAccumulatorHooks, type IngestOptions, type InspectionVerdict, type ListIcebergDataFilesOptions, type ListLiveFilter, type LocalIcebergSinkOptions, type LockScope, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, type ManifestEntry, type ManifestPurgeResult, type ManifestStore, type ParquetCodec, type PurgeFilter, type PurgeResult, type PurgeUrlsResult, type QueryCtx, type QueryExecuteOptions, type QueryExecuteResult, type QueryExecutor, type QueryResult, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, type ResolvedQuery, type Row, type RowAccumulator, type RowAccumulatorOptions, type RunSQLOptions, SCHEMAS, type SchedulePolicy, type ScheduleState, type SearchType, type Sink, type SinkCapabilities, type SinkCloseResult, type SinkOptions, type SinkSlice, type SinkWriteResult, type StorageEngine, type StoredRow, type SyncState, type SyncStateDetail, type SyncStateFilter, type SyncStateKind, type SyncStateScope, type SyncTableName, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, type TableName, type TableSchema, type TableTier, type TenantCtx, type TieredTableName, WEIGHT_PRIORITY, type Watermark, type WatermarkFilter, type WatermarkScope, type WriteCtx, type WriteResult, allTables, assembleDatesRow, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, connectIcebergCatalog, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIcebergAppendSink, createIcebergTables, createInMemorySink, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dates, dayPartition, dedupeOverlappingTiers, dimensionToColumn, drizzleSchema, dropIcebergTables, ensureIcebergNamespace, enumeratePartitions, fixedPolicy, formatLiteral, gcOrphansImpl, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, icebergAppendRetrying, icebergPartitionSpecFor, icebergSchemaFor, icebergTableSpec, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, isCommitRateLimited, listIcebergDataFiles, listIcebergTables, objectKey, page_queries, pages, parseEnabledSearchTypes, queries, rebuildDailyFromHourly, resolveParquetSQL, sitemapPolicy, splitOverlappingTiers, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
|
package/dist/index.mjs
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
1
|
+
import { coerceRow, coerceRows } from "./_chunks/coerce.mjs";
|
|
2
|
+
import { SCHEMAS, TABLE_METADATA, allTables, countries, currentSchemaVersion, dates, dimensionToColumn, drizzleSchema, hourly_pages, inferTable, page_queries, pages, queries } from "./_chunks/schema.mjs";
|
|
3
|
+
import { DEFAULT_SEARCH_TYPE, dayPartition, hourPartition, inferLegacyTier, inferSearchType, objectKey } from "./_chunks/storage.mjs";
|
|
4
|
+
import { FILES_PLACEHOLDER, RAW_DAILY_COMPACT_THRESHOLD, countRawDailies, dedupeOverlappingTiers, enumeratePartitions, resolveParquetSQL, splitOverlappingTiers, substituteNamedFiles } from "./_chunks/parquet-plan.mjs";
|
|
5
5
|
import { bindLiterals, formatLiteral } from "./sql-bind.mjs";
|
|
6
|
-
import {
|
|
7
|
-
import {
|
|
6
|
+
import { MAX_DAY_BYTES, canonicalEmptyParquetSchema, createDuckDBCodec, createDuckDBExecutor, createStorageEngine, gcOrphansImpl } from "./_chunks/engine.mjs";
|
|
7
|
+
import { ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, icebergTableSpec } from "./_chunks/iceberg-schema.mjs";
|
|
8
8
|
import { assembleDatesRow, createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
|
|
9
9
|
import "./planner.mjs";
|
|
10
10
|
import { rebuildDailyFromHourly } from "./rollups.mjs";
|
|
11
11
|
import { fixedPolicy, inspectionPolicy, sitemapPolicy } from "./schedule.mjs";
|
|
12
|
-
import { icebergAppend, icebergCreateTable, icebergDropTable, restCatalogConnect, restCatalogCreateNamespace, restCatalogListTables, s3SignedResolver } from "icebird";
|
|
12
|
+
import { icebergAppend, icebergCreateTable, icebergDropTable, icebergManifests, restCatalogConnect, restCatalogCreateNamespace, restCatalogListTables, restCatalogLoadTable, s3SignedResolver } from "icebird";
|
|
13
13
|
const ICEBERG_TYPE_MAP = {
|
|
14
14
|
STRING: "string",
|
|
15
15
|
INT: "int",
|
|
@@ -109,6 +109,62 @@ async function createIcebergTables(conn, tables = ICEBERG_TABLES) {
|
|
|
109
109
|
async function listIcebergTables(conn) {
|
|
110
110
|
return restCatalogListTables(conn.catalog, { namespace: conn.namespace }).then((list) => list.map((t) => t.name).sort(), () => []);
|
|
111
111
|
}
|
|
112
|
+
function monthsInRange(range) {
|
|
113
|
+
const [sy, sm] = range.start.split("-").map(Number);
|
|
114
|
+
const [ey, em] = range.end.split("-").map(Number);
|
|
115
|
+
const out = [];
|
|
116
|
+
let y = sy;
|
|
117
|
+
let m = sm;
|
|
118
|
+
while (y < ey || y === ey && m <= em) {
|
|
119
|
+
out.push(`${y}-${String(m).padStart(2, "0")}`);
|
|
120
|
+
m++;
|
|
121
|
+
if (m > 12) {
|
|
122
|
+
m = 1;
|
|
123
|
+
y++;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
return out;
|
|
127
|
+
}
|
|
128
|
+
function monthsSinceEpoch(ym) {
|
|
129
|
+
const [y, m] = ym.split("-").map(Number);
|
|
130
|
+
return (y - 1970) * 12 + (m - 1);
|
|
131
|
+
}
|
|
132
|
+
function stripBucket(filePath) {
|
|
133
|
+
if (!filePath.startsWith("s3://")) return filePath;
|
|
134
|
+
const rest = filePath.slice(5);
|
|
135
|
+
const slash = rest.indexOf("/");
|
|
136
|
+
return slash >= 0 ? rest.slice(slash + 1) : rest;
|
|
137
|
+
}
|
|
138
|
+
async function listIcebergDataFiles(conn, opts) {
|
|
139
|
+
const { metadata } = await restCatalogLoadTable(conn.catalog, {
|
|
140
|
+
namespace: conn.namespace,
|
|
141
|
+
table: opts.table
|
|
142
|
+
});
|
|
143
|
+
if (metadata["current-snapshot-id"] == null) return [];
|
|
144
|
+
const wantedMonths = new Set(monthsInRange(opts.range).map(monthsSinceEpoch));
|
|
145
|
+
const manifests = await icebergManifests({
|
|
146
|
+
metadata,
|
|
147
|
+
resolver: conn.resolver
|
|
148
|
+
});
|
|
149
|
+
const out = [];
|
|
150
|
+
for (const m of manifests) for (const entry of m.entries) {
|
|
151
|
+
if (entry.status === 2) continue;
|
|
152
|
+
const df = entry.data_file;
|
|
153
|
+
if (df.content !== 0) continue;
|
|
154
|
+
const part = df.partition;
|
|
155
|
+
if (part.site_id !== opts.siteId) continue;
|
|
156
|
+
if (part.search_type !== opts.searchType) continue;
|
|
157
|
+
const month = part.date_month;
|
|
158
|
+
if (typeof month !== "number" || !wantedMonths.has(month)) continue;
|
|
159
|
+
out.push({
|
|
160
|
+
filePath: df.file_path,
|
|
161
|
+
objectKey: stripBucket(df.file_path),
|
|
162
|
+
bytes: Number(df.file_size_in_bytes),
|
|
163
|
+
rowCount: Number(df.record_count)
|
|
164
|
+
});
|
|
165
|
+
}
|
|
166
|
+
return out;
|
|
167
|
+
}
|
|
112
168
|
async function dropIcebergTables(conn, tables) {
|
|
113
169
|
const targets = tables ?? await restCatalogListTables(conn.catalog, { namespace: conn.namespace }).then((list) => list.map((t) => t.name), () => []);
|
|
114
170
|
const results = [];
|
|
@@ -473,4 +529,4 @@ const MIN_SYNC_IMPRESSIONS = 1;
|
|
|
473
529
|
const MIN_COUNTRY_IMPRESSIONS = 10;
|
|
474
530
|
const MAX_SITEMAP_URLS_PER_SITE = 5e4;
|
|
475
531
|
const MAX_TRACKED_URLS_PER_SITE = 2e5;
|
|
476
|
-
export { DEFAULT_SEARCH_TYPE, FILES_PLACEHOLDER, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, SCHEMAS, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, WEIGHT_PRIORITY, allTables, assembleDatesRow, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, connectIcebergCatalog, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIcebergAppendSink, createIcebergTables, createInMemorySink, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dates, dayPartition, dedupeOverlappingTiers, dimensionToColumn, drizzleSchema, dropIcebergTables, ensureIcebergNamespace, enumeratePartitions, fixedPolicy, formatLiteral, gcOrphansImpl, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, icebergAppendRetrying, icebergPartitionSpecFor, icebergSchemaFor, icebergTableSpec, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, isCommitRateLimited, listIcebergTables, objectKey, page_queries, pages, parseEnabledSearchTypes, queries, rebuildDailyFromHourly, resolveParquetSQL, sitemapPolicy, splitOverlappingTiers, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
|
|
532
|
+
export { DEFAULT_SEARCH_TYPE, FILES_PLACEHOLDER, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, SCHEMAS, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, WEIGHT_PRIORITY, allTables, assembleDatesRow, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, connectIcebergCatalog, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIcebergAppendSink, createIcebergTables, createInMemorySink, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dates, dayPartition, dedupeOverlappingTiers, dimensionToColumn, drizzleSchema, dropIcebergTables, ensureIcebergNamespace, enumeratePartitions, fixedPolicy, formatLiteral, gcOrphansImpl, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, icebergAppendRetrying, icebergPartitionSpecFor, icebergSchemaFor, icebergTableSpec, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, isCommitRateLimited, listIcebergDataFiles, listIcebergTables, objectKey, page_queries, pages, parseEnabledSearchTypes, queries, rebuildDailyFromHourly, resolveParquetSQL, sitemapPolicy, splitOverlappingTiers, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
|
package/dist/ingest.d.mts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { Row, TableName } from "./_chunks/storage.mjs";
|
|
2
2
|
/**
|
|
3
3
|
* Canonical GSC API dimension order per table. Consumers hitting the raw
|
|
4
4
|
* `searchanalytics.query` endpoint must request dimensions in this order so
|
|
@@ -23,6 +23,10 @@ interface IngestOptions {
|
|
|
23
23
|
* `page_queries` tables only.
|
|
24
24
|
*/
|
|
25
25
|
normalizeQuery?: (query: string) => string | null | undefined;
|
|
26
|
+
/** Date for one-day `searchAppearance` total queries, whose keys omit date. */
|
|
27
|
+
date?: string;
|
|
28
|
+
/** Search appearance filter used for contextual second-step rows. */
|
|
29
|
+
searchAppearance?: string;
|
|
26
30
|
}
|
|
27
31
|
/**
|
|
28
32
|
* Strip a GSC URL to its pathname. Core analytics stores pages by path so
|
package/dist/ingest.mjs
CHANGED
|
@@ -9,6 +9,13 @@ const TABLE_DIMS = {
|
|
|
9
9
|
"date"
|
|
10
10
|
],
|
|
11
11
|
search_appearance: ["searchAppearance", "date"],
|
|
12
|
+
search_appearance_pages: ["page", "date"],
|
|
13
|
+
search_appearance_queries: ["query", "date"],
|
|
14
|
+
search_appearance_page_queries: [
|
|
15
|
+
"page",
|
|
16
|
+
"query",
|
|
17
|
+
"date"
|
|
18
|
+
],
|
|
12
19
|
hourly_pages: ["hour", "page"]
|
|
13
20
|
};
|
|
14
21
|
function toPath(gscUrl) {
|
|
@@ -84,7 +91,7 @@ function transformGscRow(table, apiRow, options = {}) {
|
|
|
84
91
|
};
|
|
85
92
|
}
|
|
86
93
|
if (table === "search_appearance") {
|
|
87
|
-
const date = String(keys[1] ?? "");
|
|
94
|
+
const date = String(keys[1] ?? options.date ?? "");
|
|
88
95
|
return {
|
|
89
96
|
date,
|
|
90
97
|
row: {
|
|
@@ -96,6 +103,55 @@ function transformGscRow(table, apiRow, options = {}) {
|
|
|
96
103
|
}
|
|
97
104
|
};
|
|
98
105
|
}
|
|
106
|
+
if (table === "search_appearance_pages") {
|
|
107
|
+
const date = String(keys[1] ?? "");
|
|
108
|
+
return {
|
|
109
|
+
date,
|
|
110
|
+
row: {
|
|
111
|
+
searchAppearance: String(options.searchAppearance ?? ""),
|
|
112
|
+
url: toPath(String(keys[0] ?? "")),
|
|
113
|
+
date,
|
|
114
|
+
clicks,
|
|
115
|
+
impressions,
|
|
116
|
+
sum_position
|
|
117
|
+
}
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
if (table === "search_appearance_queries") {
|
|
121
|
+
const query = String(keys[0] ?? "");
|
|
122
|
+
const date = String(keys[1] ?? "");
|
|
123
|
+
const query_canonical = options.normalizeQuery?.(query) ?? null;
|
|
124
|
+
return {
|
|
125
|
+
date,
|
|
126
|
+
row: {
|
|
127
|
+
searchAppearance: String(options.searchAppearance ?? ""),
|
|
128
|
+
query,
|
|
129
|
+
query_canonical,
|
|
130
|
+
date,
|
|
131
|
+
clicks,
|
|
132
|
+
impressions,
|
|
133
|
+
sum_position
|
|
134
|
+
}
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
if (table === "search_appearance_page_queries") {
|
|
138
|
+
const query = String(keys[1] ?? "");
|
|
139
|
+
const date = String(keys[2] ?? "");
|
|
140
|
+
const query_canonical = options.normalizeQuery?.(query) ?? null;
|
|
141
|
+
return {
|
|
142
|
+
date,
|
|
143
|
+
row: {
|
|
144
|
+
searchAppearance: String(options.searchAppearance ?? ""),
|
|
145
|
+
url: toPath(String(keys[0] ?? "")),
|
|
146
|
+
query,
|
|
147
|
+
query_canonical,
|
|
148
|
+
date,
|
|
149
|
+
clicks,
|
|
150
|
+
impressions,
|
|
151
|
+
sum_position
|
|
152
|
+
}
|
|
153
|
+
};
|
|
154
|
+
}
|
|
99
155
|
if (table === "dates") throw new Error("`dates` rows must be built via assembleDatesRow, not transformGscRow");
|
|
100
156
|
const query = String(keys[1] ?? "");
|
|
101
157
|
const date = String(keys[2] ?? "");
|
package/dist/period/index.d.mts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { AnalysisPeriod, ComparisonMode, ComparisonPeriod, PadTimeseriesOptions, ResolveWindowOptions, ResolvedWindow, WindowPreset, comparisonOf, defaultEndDate, defaultStartDate, padTimeseries, periodOf, resolveWindow, windowToComparisonPeriod, windowToPeriod } from "../_chunks/index.mjs";
|
|
2
2
|
export { AnalysisPeriod, ComparisonMode, ComparisonPeriod, PadTimeseriesOptions, ResolveWindowOptions, ResolvedWindow, WindowPreset, comparisonOf, defaultEndDate, defaultStartDate, padTimeseries, periodOf, resolveWindow, windowToComparisonPeriod, windowToPeriod };
|
package/dist/planner.d.mts
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
1
|
+
import { enumeratePartitions } from "./_chunks/storage.mjs";
|
|
2
|
+
import { FILES_PLACEHOLDER, ResolvedQuery, compileLogicalQueryPlan, resolveParquetSQL, substituteNamedFiles } from "./_chunks/planner.mjs";
|
|
3
3
|
export { FILES_PLACEHOLDER, type ResolvedQuery, compileLogicalQueryPlan, enumeratePartitions, resolveParquetSQL, substituteNamedFiles };
|
package/dist/planner.mjs
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { FILES_PLACEHOLDER, compileLogicalQueryPlan, enumeratePartitions, resolveParquetSQL, substituteNamedFiles } from "./_chunks/parquet-plan.mjs";
|
|
2
2
|
export { FILES_PLACEHOLDER, compileLogicalQueryPlan, enumeratePartitions, resolveParquetSQL, substituteNamedFiles };
|