@gscdump/engine 0.7.5 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -77,20 +77,6 @@ interface CreateInspectionStoreOptions {
77
77
  now?: () => number;
78
78
  }
79
79
  declare function createInspectionStore(opts: CreateInspectionStoreOptions): InspectionStore;
80
- interface InspectionSqlDriver {
81
- exec: (sql: string) => void | Promise<void>;
82
- run: (sql: string, params: unknown[]) => void | Promise<void>;
83
- all: (sql: string, params: unknown[]) => unknown[] | Promise<unknown[]>;
84
- serialize: () => Uint8Array | Promise<Uint8Array>;
85
- close: () => void | Promise<void>;
86
- }
87
- interface CreateInspectionStoreSqliteOptions {
88
- dataSource: DataSource;
89
- openDriver: (bytes: Uint8Array | undefined) => InspectionSqlDriver | Promise<InspectionSqlDriver>;
90
- hash?: (url: string) => string;
91
- }
92
- declare function inspectionSqliteKey(ctx: TenantCtx): string;
93
- declare function createInspectionStoreSqlite(opts: CreateInspectionStoreSqliteOptions): InspectionStore;
94
80
  /** GSC sitemap record we persist. Matches `Schema$WmxSitemap` but as plain JSON. */
95
81
  interface SitemapRecord {
96
82
  /** The sitemap URL (feedpath) as returned by GSC. */
@@ -189,4 +175,4 @@ interface CreateEmptyTypesStoreOptions {
189
175
  now?: () => number;
190
176
  }
191
177
  declare function createEmptyTypesStore(opts: CreateEmptyTypesStoreOptions): EmptyTypesStore;
192
- export { CreateEmptyTypesStoreOptions, CreateIndexingMetadataStoreOptions, CreateInspectionStoreOptions, CreateInspectionStoreSqliteOptions, CreateSitemapStoreOptions, EmptyTypesDoc, EmptyTypesStore, IndexingMetadataIndex, IndexingMetadataRecord, IndexingMetadataStore, InspectionIndex, InspectionRecord, InspectionSqlDriver, InspectionStore, SitemapHistoryDoc, SitemapIndex, SitemapRecord, SitemapStore, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createInspectionStoreSqlite, createSitemapStore, emptyTypesKey, hashUrl, indexingMetadataIndexKey, inspectionHistoryKey, inspectionIndexKey, inspectionSqliteKey, sitemapHistoryKey, sitemapIndexKey };
178
+ export { CreateEmptyTypesStoreOptions, CreateIndexingMetadataStoreOptions, CreateInspectionStoreOptions, CreateSitemapStoreOptions, EmptyTypesDoc, EmptyTypesStore, IndexingMetadataIndex, IndexingMetadataRecord, IndexingMetadataStore, InspectionIndex, InspectionRecord, InspectionStore, SitemapHistoryDoc, SitemapIndex, SitemapRecord, SitemapStore, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createSitemapStore, emptyTypesKey, hashUrl, indexingMetadataIndexKey, inspectionHistoryKey, inspectionIndexKey, sitemapHistoryKey, sitemapIndexKey };
package/dist/entities.mjs CHANGED
@@ -78,156 +78,6 @@ function createInspectionStore(opts) {
78
78
  }
79
79
  };
80
80
  }
81
- function inspectionSqliteKey(ctx) {
82
- return ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/entities/inspections/inspections.db` : `u_${ctx.userId}/entities/inspections/inspections.db`;
83
- }
84
- const INSPECTION_SCHEMA_SQL = `
85
- CREATE TABLE IF NOT EXISTS inspections (
86
- url_hash TEXT PRIMARY KEY,
87
- url TEXT NOT NULL,
88
- inspected_at TEXT NOT NULL,
89
- index_status TEXT,
90
- last_crawl_time TEXT,
91
- google_canonical TEXT,
92
- user_canonical TEXT,
93
- coverage_state TEXT,
94
- robots_txt_state TEXT,
95
- indexing_state TEXT,
96
- page_fetch_state TEXT,
97
- mobile_usability_verdict TEXT,
98
- rich_results_verdict TEXT,
99
- raw TEXT
100
- );
101
- CREATE TABLE IF NOT EXISTS inspection_history (
102
- year_month TEXT NOT NULL,
103
- url_hash TEXT NOT NULL,
104
- url TEXT NOT NULL,
105
- inspected_at TEXT NOT NULL,
106
- payload TEXT NOT NULL,
107
- PRIMARY KEY (year_month, url_hash, inspected_at)
108
- );
109
- CREATE INDEX IF NOT EXISTS inspection_history_by_month ON inspection_history(year_month);
110
- `;
111
- function rowToRecord(r) {
112
- const out = {
113
- url: r.url,
114
- inspectedAt: r.inspected_at
115
- };
116
- if (r.index_status != null) out.indexStatus = r.index_status;
117
- if (r.last_crawl_time != null) out.lastCrawlTime = r.last_crawl_time;
118
- if (r.google_canonical != null) out.googleCanonical = r.google_canonical;
119
- if (r.user_canonical != null) out.userCanonical = r.user_canonical;
120
- if (r.coverage_state != null) out.coverageState = r.coverage_state;
121
- if (r.robots_txt_state != null) out.robotsTxtState = r.robots_txt_state;
122
- if (r.indexing_state != null) out.indexingState = r.indexing_state;
123
- if (r.page_fetch_state != null) out.pageFetchState = r.page_fetch_state;
124
- if (r.mobile_usability_verdict != null) out.mobileUsabilityVerdict = r.mobile_usability_verdict;
125
- if (r.rich_results_verdict != null) out.richResultsVerdict = r.rich_results_verdict;
126
- if (r.raw != null) out.raw = JSON.parse(r.raw);
127
- return out;
128
- }
129
- function shardForRecord(record) {
130
- const m = YEAR_MONTH_RE.exec(record.inspectedAt);
131
- return m ? `${m[1]}-${m[2]}` : "unknown";
132
- }
133
- function createInspectionStoreSqlite(opts) {
134
- const ds = opts.dataSource;
135
- const hash = opts.hash ?? hashUrl;
136
- async function withDriver(ctx, fn, persist) {
137
- const key = inspectionSqliteKey(ctx);
138
- const bytes = await ds.read(key).catch(() => void 0);
139
- const driver = await opts.openDriver(bytes);
140
- await driver.exec(INSPECTION_SCHEMA_SQL);
141
- const result = await fn(driver);
142
- if (persist) {
143
- const out = await driver.serialize();
144
- await ds.write(key, out);
145
- }
146
- await driver.close();
147
- return result;
148
- }
149
- return {
150
- async writeBatch(ctx, records) {
151
- if (records.length === 0) return;
152
- await withDriver(ctx, async (driver) => {
153
- for (const r of records) {
154
- const h = hash(r.url);
155
- await driver.run(`INSERT INTO inspections (
156
- url_hash, url, inspected_at, index_status, last_crawl_time,
157
- google_canonical, user_canonical, coverage_state, robots_txt_state,
158
- indexing_state, page_fetch_state, mobile_usability_verdict,
159
- rich_results_verdict, raw
160
- ) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)
161
- ON CONFLICT(url_hash) DO UPDATE SET
162
- url = excluded.url,
163
- inspected_at = excluded.inspected_at,
164
- index_status = excluded.index_status,
165
- last_crawl_time = excluded.last_crawl_time,
166
- google_canonical = excluded.google_canonical,
167
- user_canonical = excluded.user_canonical,
168
- coverage_state = excluded.coverage_state,
169
- robots_txt_state = excluded.robots_txt_state,
170
- indexing_state = excluded.indexing_state,
171
- page_fetch_state = excluded.page_fetch_state,
172
- mobile_usability_verdict = excluded.mobile_usability_verdict,
173
- rich_results_verdict = excluded.rich_results_verdict,
174
- raw = excluded.raw`, [
175
- h,
176
- r.url,
177
- r.inspectedAt,
178
- r.indexStatus ?? null,
179
- r.lastCrawlTime ?? null,
180
- r.googleCanonical ?? null,
181
- r.userCanonical ?? null,
182
- r.coverageState ?? null,
183
- r.robotsTxtState ?? null,
184
- r.indexingState ?? null,
185
- r.pageFetchState ?? null,
186
- r.mobileUsabilityVerdict ?? null,
187
- r.richResultsVerdict ?? null,
188
- r.raw === void 0 ? null : JSON.stringify(r.raw)
189
- ]);
190
- await driver.run(`INSERT OR REPLACE INTO inspection_history
191
- (year_month, url_hash, url, inspected_at, payload)
192
- VALUES (?,?,?,?,?)`, [
193
- shardForRecord(r),
194
- h,
195
- r.url,
196
- r.inspectedAt,
197
- JSON.stringify(r)
198
- ]);
199
- }
200
- }, true);
201
- },
202
- async getLatest(ctx, url) {
203
- return await withDriver(ctx, async (driver) => {
204
- const rows = await driver.all("SELECT * FROM inspections WHERE url_hash = ? LIMIT 1", [hash(url)]);
205
- return rows.length === 0 ? void 0 : rowToRecord(rows[0]);
206
- }, false);
207
- },
208
- async loadIndex(ctx) {
209
- return await withDriver(ctx, async (driver) => {
210
- const rows = await driver.all("SELECT * FROM inspections", []);
211
- const records = {};
212
- for (const r of rows) records[r.url_hash] = rowToRecord(r);
213
- return {
214
- version: 1,
215
- records
216
- };
217
- }, false);
218
- },
219
- async loadHistory(ctx, yearMonth) {
220
- return await withDriver(ctx, async (driver) => {
221
- const rows = await driver.all("SELECT * FROM inspection_history WHERE year_month = ? ORDER BY inspected_at ASC", [yearMonth]);
222
- if (rows.length === 0) return void 0;
223
- return {
224
- version: 1,
225
- records: rows.map((r) => JSON.parse(r.payload))
226
- };
227
- }, false);
228
- }
229
- };
230
- }
231
81
  function sitemapIndexKey(ctx) {
232
82
  return ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/entities/sitemaps/index.json` : `u_${ctx.userId}/entities/sitemaps/index.json`;
233
83
  }
@@ -356,4 +206,4 @@ function createEmptyTypesStore(opts) {
356
206
  }
357
207
  };
358
208
  }
359
- export { createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createInspectionStoreSqlite, createSitemapStore, emptyTypesKey, hashUrl, indexingMetadataIndexKey, inspectionHistoryKey, inspectionIndexKey, inspectionSqliteKey, sitemapHistoryKey, sitemapIndexKey };
209
+ export { createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createSitemapStore, emptyTypesKey, hashUrl, indexingMetadataIndexKey, inspectionHistoryKey, inspectionIndexKey, sitemapHistoryKey, sitemapIndexKey };
package/dist/index.d.mts CHANGED
@@ -1,7 +1,6 @@
1
1
  import { A as SyncStateFilter, B as dayPartition, C as QueryResult, D as StorageEngine, E as SearchType, F as Watermark, G as objectKey, H as inferSearchType, I as WatermarkFilter, J as weekPartition, K as quarterOfMonth, L as WatermarkScope, M as SyncStateScope, N as TableName, O as SyncState, P as TenantCtx, R as WriteCtx, S as QueryExecutor, T as RunSQLOptions, U as mondayOfWeek, V as inferLegacyTier, W as monthPartition, X as enumeratePartitions, Y as CompactionThresholds, _ as PurgeResult, a as DataSource, b as QueryExecuteOptions, c as FileSetRef, d as LockScope, f as ManifestEntry, g as PurgeFilter, h as ParquetCodec, i as DEFAULT_SEARCH_TYPE, j as SyncStateKind, k as SyncStateDetail, l as GcCtx, m as ManifestStore, n as CompactionTier, o as EngineOptions, p as ManifestPurgeResult, q as quarterPartition, r as ComparisonResult, s as ExtraResult, t as CodecCtx, u as ListLiveFilter, v as PurgeUrlsResult, w as Row, x as QueryExecuteResult, y as QueryCtx, z as WriteResult } from "./_chunks/storage.mjs";
2
2
  import { a as createDuckDBExecutor, i as createDuckDBCodec, n as DuckDBHandle, r as canonicalEmptyParquetSchema, t as DuckDBFactory } from "./_chunks/duckdb.mjs";
3
3
  import { _ as pages, a as allTables, c as inferTable, d as TABLE_METADATA, f as countries, g as page_keywords, h as keywords, i as TableSchema, m as drizzleSchema, n as ColumnType, o as currentSchemaVersion, p as devices, r as SCHEMAS, s as dimensionToColumn, t as ColumnDef, u as DrizzleSchema } from "./_chunks/schema.mjs";
4
- import { CreateInspectionStoreSqliteOptions, InspectionSqlDriver, createInspectionStoreSqlite, inspectionSqliteKey } from "./entities.mjs";
5
4
  import { GscApiRow, IngestOptions, RowAccumulator, RowAccumulatorOptions, createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
6
5
  import { a as substituteNamedFiles, i as resolveToSQL, n as ResolvedQuery, t as FILES_PLACEHOLDER } from "./_chunks/planner.mjs";
7
6
  import { bindLiterals, formatLiteral } from "./sql-bind.mjs";
@@ -10,4 +9,4 @@ declare function coerceRow(row: Row$1): Row$1;
10
9
  declare function coerceRows(rows: readonly Row$1[]): Row$1[];
11
10
  declare const MAX_DAY_BYTES: number;
12
11
  declare function createStorageEngine(opts: EngineOptions): StorageEngine;
13
- export { type CodecCtx, type ColumnDef, type ColumnType, type CompactionThresholds, type CompactionTier, type ComparisonResult, type CreateInspectionStoreSqliteOptions, DEFAULT_SEARCH_TYPE, type DataSource, type DrizzleSchema, type DuckDBFactory, type DuckDBHandle, type EngineOptions, type ExtraResult, FILES_PLACEHOLDER, type FileSetRef, type GcCtx, type GscApiRow, type IngestOptions, type InspectionSqlDriver, type ListLiveFilter, type LockScope, MAX_DAY_BYTES, type ManifestEntry, type ManifestPurgeResult, type ManifestStore, type ParquetCodec, type PurgeFilter, type PurgeResult, type PurgeUrlsResult, type QueryCtx, type QueryExecuteOptions, type QueryExecuteResult, type QueryExecutor, type QueryResult, type ResolvedQuery, type Row, type RowAccumulator, type RowAccumulatorOptions, type RunSQLOptions, SCHEMAS, type SearchType, type StorageEngine, type SyncState, type SyncStateDetail, type SyncStateFilter, type SyncStateKind, type SyncStateScope, TABLE_METADATA, type TableName, type TableSchema, type TenantCtx, type Watermark, type WatermarkFilter, type WatermarkScope, type WriteCtx, type WriteResult, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countries, createDuckDBCodec, createDuckDBExecutor, createInspectionStoreSqlite, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, formatLiteral, inferLegacyTier, inferSearchType, inferTable, inspectionSqliteKey, keywords, mondayOfWeek, monthPartition, objectKey, page_keywords, pages, quarterOfMonth, quarterPartition, resolveToSQL, substituteNamedFiles, toPath, toSumPosition, transformGscRow, weekPartition };
12
+ export { type CodecCtx, type ColumnDef, type ColumnType, type CompactionThresholds, type CompactionTier, type ComparisonResult, DEFAULT_SEARCH_TYPE, type DataSource, type DrizzleSchema, type DuckDBFactory, type DuckDBHandle, type EngineOptions, type ExtraResult, FILES_PLACEHOLDER, type FileSetRef, type GcCtx, type GscApiRow, type IngestOptions, type ListLiveFilter, type LockScope, MAX_DAY_BYTES, type ManifestEntry, type ManifestPurgeResult, type ManifestStore, type ParquetCodec, type PurgeFilter, type PurgeResult, type PurgeUrlsResult, type QueryCtx, type QueryExecuteOptions, type QueryExecuteResult, type QueryExecutor, type QueryResult, type ResolvedQuery, type Row, type RowAccumulator, type RowAccumulatorOptions, type RunSQLOptions, SCHEMAS, type SearchType, type StorageEngine, type SyncState, type SyncStateDetail, type SyncStateFilter, type SyncStateKind, type SyncStateScope, TABLE_METADATA, type TableName, type TableSchema, type TenantCtx, type Watermark, type WatermarkFilter, type WatermarkScope, type WriteCtx, type WriteResult, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countries, createDuckDBCodec, createDuckDBExecutor, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, formatLiteral, inferLegacyTier, inferSearchType, inferTable, keywords, mondayOfWeek, monthPartition, objectKey, page_keywords, pages, quarterOfMonth, quarterPartition, resolveToSQL, substituteNamedFiles, toPath, toSumPosition, transformGscRow, weekPartition };