@gscdump/engine 0.7.4 → 0.7.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/entities.d.mts +1 -15
- package/dist/entities.mjs +1 -151
- package/dist/index.d.mts +1 -2
- package/dist/index.mjs +1 -2
- package/package.json +3 -27
- package/dist/adapters/http.d.mts +0 -35
- package/dist/adapters/http.mjs +0 -115
- package/dist/adapters/inspection-sqlite-browser.d.mts +0 -3
- package/dist/adapters/inspection-sqlite-browser.mjs +0 -42
- package/dist/adapters/inspection-sqlite-node.d.mts +0 -3
- package/dist/adapters/inspection-sqlite-node.mjs +0 -32
package/dist/entities.d.mts
CHANGED
|
@@ -77,20 +77,6 @@ interface CreateInspectionStoreOptions {
|
|
|
77
77
|
now?: () => number;
|
|
78
78
|
}
|
|
79
79
|
declare function createInspectionStore(opts: CreateInspectionStoreOptions): InspectionStore;
|
|
80
|
-
interface InspectionSqlDriver {
|
|
81
|
-
exec: (sql: string) => void | Promise<void>;
|
|
82
|
-
run: (sql: string, params: unknown[]) => void | Promise<void>;
|
|
83
|
-
all: (sql: string, params: unknown[]) => unknown[] | Promise<unknown[]>;
|
|
84
|
-
serialize: () => Uint8Array | Promise<Uint8Array>;
|
|
85
|
-
close: () => void | Promise<void>;
|
|
86
|
-
}
|
|
87
|
-
interface CreateInspectionStoreSqliteOptions {
|
|
88
|
-
dataSource: DataSource;
|
|
89
|
-
openDriver: (bytes: Uint8Array | undefined) => InspectionSqlDriver | Promise<InspectionSqlDriver>;
|
|
90
|
-
hash?: (url: string) => string;
|
|
91
|
-
}
|
|
92
|
-
declare function inspectionSqliteKey(ctx: TenantCtx): string;
|
|
93
|
-
declare function createInspectionStoreSqlite(opts: CreateInspectionStoreSqliteOptions): InspectionStore;
|
|
94
80
|
/** GSC sitemap record we persist. Matches `Schema$WmxSitemap` but as plain JSON. */
|
|
95
81
|
interface SitemapRecord {
|
|
96
82
|
/** The sitemap URL (feedpath) as returned by GSC. */
|
|
@@ -189,4 +175,4 @@ interface CreateEmptyTypesStoreOptions {
|
|
|
189
175
|
now?: () => number;
|
|
190
176
|
}
|
|
191
177
|
declare function createEmptyTypesStore(opts: CreateEmptyTypesStoreOptions): EmptyTypesStore;
|
|
192
|
-
export { CreateEmptyTypesStoreOptions, CreateIndexingMetadataStoreOptions, CreateInspectionStoreOptions,
|
|
178
|
+
export { CreateEmptyTypesStoreOptions, CreateIndexingMetadataStoreOptions, CreateInspectionStoreOptions, CreateSitemapStoreOptions, EmptyTypesDoc, EmptyTypesStore, IndexingMetadataIndex, IndexingMetadataRecord, IndexingMetadataStore, InspectionIndex, InspectionRecord, InspectionStore, SitemapHistoryDoc, SitemapIndex, SitemapRecord, SitemapStore, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createSitemapStore, emptyTypesKey, hashUrl, indexingMetadataIndexKey, inspectionHistoryKey, inspectionIndexKey, sitemapHistoryKey, sitemapIndexKey };
|
package/dist/entities.mjs
CHANGED
|
@@ -78,156 +78,6 @@ function createInspectionStore(opts) {
|
|
|
78
78
|
}
|
|
79
79
|
};
|
|
80
80
|
}
|
|
81
|
-
function inspectionSqliteKey(ctx) {
|
|
82
|
-
return ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/entities/inspections/inspections.db` : `u_${ctx.userId}/entities/inspections/inspections.db`;
|
|
83
|
-
}
|
|
84
|
-
const INSPECTION_SCHEMA_SQL = `
|
|
85
|
-
CREATE TABLE IF NOT EXISTS inspections (
|
|
86
|
-
url_hash TEXT PRIMARY KEY,
|
|
87
|
-
url TEXT NOT NULL,
|
|
88
|
-
inspected_at TEXT NOT NULL,
|
|
89
|
-
index_status TEXT,
|
|
90
|
-
last_crawl_time TEXT,
|
|
91
|
-
google_canonical TEXT,
|
|
92
|
-
user_canonical TEXT,
|
|
93
|
-
coverage_state TEXT,
|
|
94
|
-
robots_txt_state TEXT,
|
|
95
|
-
indexing_state TEXT,
|
|
96
|
-
page_fetch_state TEXT,
|
|
97
|
-
mobile_usability_verdict TEXT,
|
|
98
|
-
rich_results_verdict TEXT,
|
|
99
|
-
raw TEXT
|
|
100
|
-
);
|
|
101
|
-
CREATE TABLE IF NOT EXISTS inspection_history (
|
|
102
|
-
year_month TEXT NOT NULL,
|
|
103
|
-
url_hash TEXT NOT NULL,
|
|
104
|
-
url TEXT NOT NULL,
|
|
105
|
-
inspected_at TEXT NOT NULL,
|
|
106
|
-
payload TEXT NOT NULL,
|
|
107
|
-
PRIMARY KEY (year_month, url_hash, inspected_at)
|
|
108
|
-
);
|
|
109
|
-
CREATE INDEX IF NOT EXISTS inspection_history_by_month ON inspection_history(year_month);
|
|
110
|
-
`;
|
|
111
|
-
function rowToRecord(r) {
|
|
112
|
-
const out = {
|
|
113
|
-
url: r.url,
|
|
114
|
-
inspectedAt: r.inspected_at
|
|
115
|
-
};
|
|
116
|
-
if (r.index_status != null) out.indexStatus = r.index_status;
|
|
117
|
-
if (r.last_crawl_time != null) out.lastCrawlTime = r.last_crawl_time;
|
|
118
|
-
if (r.google_canonical != null) out.googleCanonical = r.google_canonical;
|
|
119
|
-
if (r.user_canonical != null) out.userCanonical = r.user_canonical;
|
|
120
|
-
if (r.coverage_state != null) out.coverageState = r.coverage_state;
|
|
121
|
-
if (r.robots_txt_state != null) out.robotsTxtState = r.robots_txt_state;
|
|
122
|
-
if (r.indexing_state != null) out.indexingState = r.indexing_state;
|
|
123
|
-
if (r.page_fetch_state != null) out.pageFetchState = r.page_fetch_state;
|
|
124
|
-
if (r.mobile_usability_verdict != null) out.mobileUsabilityVerdict = r.mobile_usability_verdict;
|
|
125
|
-
if (r.rich_results_verdict != null) out.richResultsVerdict = r.rich_results_verdict;
|
|
126
|
-
if (r.raw != null) out.raw = JSON.parse(r.raw);
|
|
127
|
-
return out;
|
|
128
|
-
}
|
|
129
|
-
function shardForRecord(record) {
|
|
130
|
-
const m = YEAR_MONTH_RE.exec(record.inspectedAt);
|
|
131
|
-
return m ? `${m[1]}-${m[2]}` : "unknown";
|
|
132
|
-
}
|
|
133
|
-
function createInspectionStoreSqlite(opts) {
|
|
134
|
-
const ds = opts.dataSource;
|
|
135
|
-
const hash = opts.hash ?? hashUrl;
|
|
136
|
-
async function withDriver(ctx, fn, persist) {
|
|
137
|
-
const key = inspectionSqliteKey(ctx);
|
|
138
|
-
const bytes = await ds.read(key).catch(() => void 0);
|
|
139
|
-
const driver = await opts.openDriver(bytes);
|
|
140
|
-
await driver.exec(INSPECTION_SCHEMA_SQL);
|
|
141
|
-
const result = await fn(driver);
|
|
142
|
-
if (persist) {
|
|
143
|
-
const out = await driver.serialize();
|
|
144
|
-
await ds.write(key, out);
|
|
145
|
-
}
|
|
146
|
-
await driver.close();
|
|
147
|
-
return result;
|
|
148
|
-
}
|
|
149
|
-
return {
|
|
150
|
-
async writeBatch(ctx, records) {
|
|
151
|
-
if (records.length === 0) return;
|
|
152
|
-
await withDriver(ctx, async (driver) => {
|
|
153
|
-
for (const r of records) {
|
|
154
|
-
const h = hash(r.url);
|
|
155
|
-
await driver.run(`INSERT INTO inspections (
|
|
156
|
-
url_hash, url, inspected_at, index_status, last_crawl_time,
|
|
157
|
-
google_canonical, user_canonical, coverage_state, robots_txt_state,
|
|
158
|
-
indexing_state, page_fetch_state, mobile_usability_verdict,
|
|
159
|
-
rich_results_verdict, raw
|
|
160
|
-
) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)
|
|
161
|
-
ON CONFLICT(url_hash) DO UPDATE SET
|
|
162
|
-
url = excluded.url,
|
|
163
|
-
inspected_at = excluded.inspected_at,
|
|
164
|
-
index_status = excluded.index_status,
|
|
165
|
-
last_crawl_time = excluded.last_crawl_time,
|
|
166
|
-
google_canonical = excluded.google_canonical,
|
|
167
|
-
user_canonical = excluded.user_canonical,
|
|
168
|
-
coverage_state = excluded.coverage_state,
|
|
169
|
-
robots_txt_state = excluded.robots_txt_state,
|
|
170
|
-
indexing_state = excluded.indexing_state,
|
|
171
|
-
page_fetch_state = excluded.page_fetch_state,
|
|
172
|
-
mobile_usability_verdict = excluded.mobile_usability_verdict,
|
|
173
|
-
rich_results_verdict = excluded.rich_results_verdict,
|
|
174
|
-
raw = excluded.raw`, [
|
|
175
|
-
h,
|
|
176
|
-
r.url,
|
|
177
|
-
r.inspectedAt,
|
|
178
|
-
r.indexStatus ?? null,
|
|
179
|
-
r.lastCrawlTime ?? null,
|
|
180
|
-
r.googleCanonical ?? null,
|
|
181
|
-
r.userCanonical ?? null,
|
|
182
|
-
r.coverageState ?? null,
|
|
183
|
-
r.robotsTxtState ?? null,
|
|
184
|
-
r.indexingState ?? null,
|
|
185
|
-
r.pageFetchState ?? null,
|
|
186
|
-
r.mobileUsabilityVerdict ?? null,
|
|
187
|
-
r.richResultsVerdict ?? null,
|
|
188
|
-
r.raw === void 0 ? null : JSON.stringify(r.raw)
|
|
189
|
-
]);
|
|
190
|
-
await driver.run(`INSERT OR REPLACE INTO inspection_history
|
|
191
|
-
(year_month, url_hash, url, inspected_at, payload)
|
|
192
|
-
VALUES (?,?,?,?,?)`, [
|
|
193
|
-
shardForRecord(r),
|
|
194
|
-
h,
|
|
195
|
-
r.url,
|
|
196
|
-
r.inspectedAt,
|
|
197
|
-
JSON.stringify(r)
|
|
198
|
-
]);
|
|
199
|
-
}
|
|
200
|
-
}, true);
|
|
201
|
-
},
|
|
202
|
-
async getLatest(ctx, url) {
|
|
203
|
-
return await withDriver(ctx, async (driver) => {
|
|
204
|
-
const rows = await driver.all("SELECT * FROM inspections WHERE url_hash = ? LIMIT 1", [hash(url)]);
|
|
205
|
-
return rows.length === 0 ? void 0 : rowToRecord(rows[0]);
|
|
206
|
-
}, false);
|
|
207
|
-
},
|
|
208
|
-
async loadIndex(ctx) {
|
|
209
|
-
return await withDriver(ctx, async (driver) => {
|
|
210
|
-
const rows = await driver.all("SELECT * FROM inspections", []);
|
|
211
|
-
const records = {};
|
|
212
|
-
for (const r of rows) records[r.url_hash] = rowToRecord(r);
|
|
213
|
-
return {
|
|
214
|
-
version: 1,
|
|
215
|
-
records
|
|
216
|
-
};
|
|
217
|
-
}, false);
|
|
218
|
-
},
|
|
219
|
-
async loadHistory(ctx, yearMonth) {
|
|
220
|
-
return await withDriver(ctx, async (driver) => {
|
|
221
|
-
const rows = await driver.all("SELECT * FROM inspection_history WHERE year_month = ? ORDER BY inspected_at ASC", [yearMonth]);
|
|
222
|
-
if (rows.length === 0) return void 0;
|
|
223
|
-
return {
|
|
224
|
-
version: 1,
|
|
225
|
-
records: rows.map((r) => JSON.parse(r.payload))
|
|
226
|
-
};
|
|
227
|
-
}, false);
|
|
228
|
-
}
|
|
229
|
-
};
|
|
230
|
-
}
|
|
231
81
|
function sitemapIndexKey(ctx) {
|
|
232
82
|
return ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/entities/sitemaps/index.json` : `u_${ctx.userId}/entities/sitemaps/index.json`;
|
|
233
83
|
}
|
|
@@ -356,4 +206,4 @@ function createEmptyTypesStore(opts) {
|
|
|
356
206
|
}
|
|
357
207
|
};
|
|
358
208
|
}
|
|
359
|
-
export { createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore,
|
|
209
|
+
export { createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createSitemapStore, emptyTypesKey, hashUrl, indexingMetadataIndexKey, inspectionHistoryKey, inspectionIndexKey, sitemapHistoryKey, sitemapIndexKey };
|
package/dist/index.d.mts
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import { A as SyncStateFilter, B as dayPartition, C as QueryResult, D as StorageEngine, E as SearchType, F as Watermark, G as objectKey, H as inferSearchType, I as WatermarkFilter, J as weekPartition, K as quarterOfMonth, L as WatermarkScope, M as SyncStateScope, N as TableName, O as SyncState, P as TenantCtx, R as WriteCtx, S as QueryExecutor, T as RunSQLOptions, U as mondayOfWeek, V as inferLegacyTier, W as monthPartition, X as enumeratePartitions, Y as CompactionThresholds, _ as PurgeResult, a as DataSource, b as QueryExecuteOptions, c as FileSetRef, d as LockScope, f as ManifestEntry, g as PurgeFilter, h as ParquetCodec, i as DEFAULT_SEARCH_TYPE, j as SyncStateKind, k as SyncStateDetail, l as GcCtx, m as ManifestStore, n as CompactionTier, o as EngineOptions, p as ManifestPurgeResult, q as quarterPartition, r as ComparisonResult, s as ExtraResult, t as CodecCtx, u as ListLiveFilter, v as PurgeUrlsResult, w as Row, x as QueryExecuteResult, y as QueryCtx, z as WriteResult } from "./_chunks/storage.mjs";
|
|
2
2
|
import { a as createDuckDBExecutor, i as createDuckDBCodec, n as DuckDBHandle, r as canonicalEmptyParquetSchema, t as DuckDBFactory } from "./_chunks/duckdb.mjs";
|
|
3
3
|
import { _ as pages, a as allTables, c as inferTable, d as TABLE_METADATA, f as countries, g as page_keywords, h as keywords, i as TableSchema, m as drizzleSchema, n as ColumnType, o as currentSchemaVersion, p as devices, r as SCHEMAS, s as dimensionToColumn, t as ColumnDef, u as DrizzleSchema } from "./_chunks/schema.mjs";
|
|
4
|
-
import { CreateInspectionStoreSqliteOptions, InspectionSqlDriver, createInspectionStoreSqlite, inspectionSqliteKey } from "./entities.mjs";
|
|
5
4
|
import { GscApiRow, IngestOptions, RowAccumulator, RowAccumulatorOptions, createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
|
|
6
5
|
import { a as substituteNamedFiles, i as resolveToSQL, n as ResolvedQuery, t as FILES_PLACEHOLDER } from "./_chunks/planner.mjs";
|
|
7
6
|
import { bindLiterals, formatLiteral } from "./sql-bind.mjs";
|
|
@@ -10,4 +9,4 @@ declare function coerceRow(row: Row$1): Row$1;
|
|
|
10
9
|
declare function coerceRows(rows: readonly Row$1[]): Row$1[];
|
|
11
10
|
declare const MAX_DAY_BYTES: number;
|
|
12
11
|
declare function createStorageEngine(opts: EngineOptions): StorageEngine;
|
|
13
|
-
export { type CodecCtx, type ColumnDef, type ColumnType, type CompactionThresholds, type CompactionTier, type ComparisonResult,
|
|
12
|
+
export { type CodecCtx, type ColumnDef, type ColumnType, type CompactionThresholds, type CompactionTier, type ComparisonResult, DEFAULT_SEARCH_TYPE, type DataSource, type DrizzleSchema, type DuckDBFactory, type DuckDBHandle, type EngineOptions, type ExtraResult, FILES_PLACEHOLDER, type FileSetRef, type GcCtx, type GscApiRow, type IngestOptions, type ListLiveFilter, type LockScope, MAX_DAY_BYTES, type ManifestEntry, type ManifestPurgeResult, type ManifestStore, type ParquetCodec, type PurgeFilter, type PurgeResult, type PurgeUrlsResult, type QueryCtx, type QueryExecuteOptions, type QueryExecuteResult, type QueryExecutor, type QueryResult, type ResolvedQuery, type Row, type RowAccumulator, type RowAccumulatorOptions, type RunSQLOptions, SCHEMAS, type SearchType, type StorageEngine, type SyncState, type SyncStateDetail, type SyncStateFilter, type SyncStateKind, type SyncStateScope, TABLE_METADATA, type TableName, type TableSchema, type TenantCtx, type Watermark, type WatermarkFilter, type WatermarkScope, type WriteCtx, type WriteResult, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countries, createDuckDBCodec, createDuckDBExecutor, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, formatLiteral, inferLegacyTier, inferSearchType, inferTable, keywords, mondayOfWeek, monthPartition, objectKey, page_keywords, pages, quarterOfMonth, quarterPartition, resolveToSQL, substituteNamedFiles, toPath, toSumPosition, transformGscRow, weekPartition };
|
package/dist/index.mjs
CHANGED
|
@@ -3,7 +3,6 @@ import { a as mondayOfWeek, c as quarterOfMonth, d as weekPartition, i as inferS
|
|
|
3
3
|
import { i as substituteNamedFiles, o as enumeratePartitions, r as resolveToSQL, t as FILES_PLACEHOLDER } from "./_chunks/compiler.mjs";
|
|
4
4
|
import { bindLiterals, formatLiteral } from "./sql-bind.mjs";
|
|
5
5
|
import { a as createDuckDBExecutor, i as createDuckDBCodec, n as createStorageEngine, r as canonicalEmptyParquetSchema, t as MAX_DAY_BYTES } from "./_chunks/engine.mjs";
|
|
6
|
-
import { createInspectionStoreSqlite, inspectionSqliteKey } from "./entities.mjs";
|
|
7
6
|
import { createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
|
|
8
7
|
import "./planner.mjs";
|
|
9
8
|
function coerceRow(row) {
|
|
@@ -19,4 +18,4 @@ function coerceRows(rows) {
|
|
|
19
18
|
for (let i = 0; i < rows.length; i++) out[i] = coerceRow(rows[i]);
|
|
20
19
|
return out;
|
|
21
20
|
}
|
|
22
|
-
export { DEFAULT_SEARCH_TYPE, FILES_PLACEHOLDER, MAX_DAY_BYTES, SCHEMAS, TABLE_METADATA, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countries, createDuckDBCodec, createDuckDBExecutor,
|
|
21
|
+
export { DEFAULT_SEARCH_TYPE, FILES_PLACEHOLDER, MAX_DAY_BYTES, SCHEMAS, TABLE_METADATA, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countries, createDuckDBCodec, createDuckDBExecutor, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, formatLiteral, inferLegacyTier, inferSearchType, inferTable, keywords, mondayOfWeek, monthPartition, objectKey, page_keywords, pages, quarterOfMonth, quarterPartition, resolveToSQL, substituteNamedFiles, toPath, toSumPosition, transformGscRow, weekPartition };
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@gscdump/engine",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "0.7.
|
|
4
|
+
"version": "0.7.6",
|
|
5
5
|
"description": "Append-only Parquet/DuckDB storage engine + planner + adapters for the gscdump pipeline. Node + edge runtimes; opt-in heavy peers.",
|
|
6
6
|
"author": {
|
|
7
7
|
"name": "Harlan Wilton",
|
|
@@ -81,11 +81,6 @@
|
|
|
81
81
|
"import": "./dist/adapters/filesystem.mjs",
|
|
82
82
|
"default": "./dist/adapters/filesystem.mjs"
|
|
83
83
|
},
|
|
84
|
-
"./http": {
|
|
85
|
-
"types": "./dist/adapters/http.d.mts",
|
|
86
|
-
"import": "./dist/adapters/http.mjs",
|
|
87
|
-
"default": "./dist/adapters/http.mjs"
|
|
88
|
-
},
|
|
89
84
|
"./hyparquet": {
|
|
90
85
|
"types": "./dist/adapters/hyparquet.d.mts",
|
|
91
86
|
"import": "./dist/adapters/hyparquet.mjs",
|
|
@@ -135,16 +130,6 @@
|
|
|
135
130
|
"types": "./dist/arrow-utils.d.mts",
|
|
136
131
|
"import": "./dist/arrow-utils.mjs",
|
|
137
132
|
"default": "./dist/arrow-utils.mjs"
|
|
138
|
-
},
|
|
139
|
-
"./inspection-sqlite-node": {
|
|
140
|
-
"types": "./dist/adapters/inspection-sqlite-node.d.mts",
|
|
141
|
-
"import": "./dist/adapters/inspection-sqlite-node.mjs",
|
|
142
|
-
"default": "./dist/adapters/inspection-sqlite-node.mjs"
|
|
143
|
-
},
|
|
144
|
-
"./inspection-sqlite-browser": {
|
|
145
|
-
"types": "./dist/adapters/inspection-sqlite-browser.d.mts",
|
|
146
|
-
"import": "./dist/adapters/inspection-sqlite-browser.mjs",
|
|
147
|
-
"default": "./dist/adapters/inspection-sqlite-browser.mjs"
|
|
148
133
|
}
|
|
149
134
|
},
|
|
150
135
|
"main": "./dist/index.mjs",
|
|
@@ -157,38 +142,29 @@
|
|
|
157
142
|
},
|
|
158
143
|
"peerDependencies": {
|
|
159
144
|
"@duckdb/duckdb-wasm": "^1.32.0",
|
|
160
|
-
"better-sqlite3": "^12.9.0",
|
|
161
145
|
"hyparquet": "^1.25.6",
|
|
162
|
-
"hyparquet-writer": "^0.14.0"
|
|
163
|
-
"wa-sqlite": "^1.0.0"
|
|
146
|
+
"hyparquet-writer": "^0.14.0"
|
|
164
147
|
},
|
|
165
148
|
"peerDependenciesMeta": {
|
|
166
149
|
"@duckdb/duckdb-wasm": {
|
|
167
150
|
"optional": true
|
|
168
151
|
},
|
|
169
|
-
"better-sqlite3": {
|
|
170
|
-
"optional": true
|
|
171
|
-
},
|
|
172
152
|
"hyparquet": {
|
|
173
153
|
"optional": true
|
|
174
154
|
},
|
|
175
155
|
"hyparquet-writer": {
|
|
176
156
|
"optional": true
|
|
177
|
-
},
|
|
178
|
-
"wa-sqlite": {
|
|
179
|
-
"optional": true
|
|
180
157
|
}
|
|
181
158
|
},
|
|
182
159
|
"dependencies": {
|
|
183
160
|
"drizzle-orm": "^0.45.2",
|
|
184
161
|
"proper-lockfile": "^4.1.2",
|
|
185
|
-
"gscdump": "0.7.
|
|
162
|
+
"gscdump": "0.7.6"
|
|
186
163
|
},
|
|
187
164
|
"devDependencies": {
|
|
188
165
|
"@duckdb/duckdb-wasm": "^1.32.0",
|
|
189
166
|
"@types/proper-lockfile": "^4.1.4",
|
|
190
167
|
"aws4fetch": "^1.0.20",
|
|
191
|
-
"better-sqlite3": "^12.9.0",
|
|
192
168
|
"hyparquet": "^1.25.6",
|
|
193
169
|
"hyparquet-writer": "^0.14.0",
|
|
194
170
|
"tsx": "^4.21.0",
|
package/dist/adapters/http.d.mts
DELETED
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
import { a as DataSource, m as ManifestStore } from "../_chunks/storage.mjs";
|
|
2
|
-
interface HttpDataSourceOptions {
|
|
3
|
-
/**
|
|
4
|
-
* Base URL to prefix each object key with. MUST NOT have a trailing slash.
|
|
5
|
-
* E.g. `https://pub-abcdef.r2.dev/gscdump-data`.
|
|
6
|
-
*/
|
|
7
|
-
baseUrl: string;
|
|
8
|
-
/**
|
|
9
|
-
* Optional transformer that produces the final URL for a key. Use this when
|
|
10
|
-
* keys need signing (pre-signed URLs, per-request tokens, etc.). If omitted,
|
|
11
|
-
* the default is `${baseUrl}/${encodeKey(key)}` where forward slashes in the
|
|
12
|
-
* key are preserved.
|
|
13
|
-
*/
|
|
14
|
-
signUrl?: (key: string) => string;
|
|
15
|
-
/**
|
|
16
|
-
* Whether `uri(key)` should return the HTTPS URL so DuckDB's httpfs can
|
|
17
|
-
* fetch directly. Default true (browser, Workers, anywhere httpfs is
|
|
18
|
-
* loaded). Set to false for environments where httpfs isn't available —
|
|
19
|
-
* the executor will fall back to `read(key)` and buffer the bytes itself.
|
|
20
|
-
*/
|
|
21
|
-
useDuckDBHttpfs?: boolean;
|
|
22
|
-
}
|
|
23
|
-
declare function createHttpDataSource(opts: HttpDataSourceOptions): DataSource;
|
|
24
|
-
interface HttpManifestStoreOptions {
|
|
25
|
-
/**
|
|
26
|
-
* URL of a JSON manifest snapshot. The response MUST be:
|
|
27
|
-
* { version: 1, entries: ManifestEntry[], watermarks?: Watermark[] }
|
|
28
|
-
* (Matches the on-disk layout produced by the filesystem adapter.)
|
|
29
|
-
*/
|
|
30
|
-
manifestUrl: string;
|
|
31
|
-
/** Override fetch for tests / custom origins. */
|
|
32
|
-
fetchImpl?: typeof fetch;
|
|
33
|
-
}
|
|
34
|
-
declare function createHttpManifestStore(opts: HttpManifestStoreOptions): ManifestStore;
|
|
35
|
-
export { HttpDataSourceOptions, HttpManifestStoreOptions, createHttpDataSource, createHttpManifestStore };
|
package/dist/adapters/http.mjs
DELETED
|
@@ -1,115 +0,0 @@
|
|
|
1
|
-
import { r as inferLegacyTier } from "../_chunks/storage.mjs";
|
|
2
|
-
function readOnly(name) {
|
|
3
|
-
throw new Error(`http adapter is read-only: ${name} is not supported`);
|
|
4
|
-
}
|
|
5
|
-
function encodeKey(key) {
|
|
6
|
-
return key.split("/").map(encodeURIComponent).join("/");
|
|
7
|
-
}
|
|
8
|
-
const TRAILING_SLASH = /\/$/;
|
|
9
|
-
function createHttpDataSource(opts) {
|
|
10
|
-
const base = opts.baseUrl.replace(TRAILING_SLASH, "");
|
|
11
|
-
const sign = opts.signUrl ?? ((key) => `${base}/${encodeKey(key)}`);
|
|
12
|
-
const useHttpfs = opts.useDuckDBHttpfs ?? true;
|
|
13
|
-
async function readBytes(key, range, signal) {
|
|
14
|
-
const url = sign(key);
|
|
15
|
-
const headers = {};
|
|
16
|
-
if (range) headers.Range = `bytes=${range.offset}-${range.offset + range.length - 1}`;
|
|
17
|
-
const res = await fetch(url, {
|
|
18
|
-
headers,
|
|
19
|
-
signal
|
|
20
|
-
});
|
|
21
|
-
if (!res.ok) throw new Error(`http read failed ${res.status} ${res.statusText} for ${url}`);
|
|
22
|
-
return new Uint8Array(await res.arrayBuffer());
|
|
23
|
-
}
|
|
24
|
-
return {
|
|
25
|
-
read: readBytes,
|
|
26
|
-
async write() {
|
|
27
|
-
readOnly("write");
|
|
28
|
-
},
|
|
29
|
-
async delete() {
|
|
30
|
-
readOnly("delete");
|
|
31
|
-
},
|
|
32
|
-
async list() {
|
|
33
|
-
readOnly("list");
|
|
34
|
-
},
|
|
35
|
-
async head(key) {
|
|
36
|
-
const res = await fetch(sign(key), { method: "HEAD" });
|
|
37
|
-
if (!res.ok) return void 0;
|
|
38
|
-
const len = res.headers.get("content-length");
|
|
39
|
-
return len == null ? void 0 : { bytes: Number(len) };
|
|
40
|
-
},
|
|
41
|
-
uri(key) {
|
|
42
|
-
return useHttpfs ? sign(key) : void 0;
|
|
43
|
-
}
|
|
44
|
-
};
|
|
45
|
-
}
|
|
46
|
-
function matchesFilter(entry, filter) {
|
|
47
|
-
if (entry.userId !== filter.userId) return false;
|
|
48
|
-
if (filter.siteId !== void 0 && entry.siteId !== filter.siteId) return false;
|
|
49
|
-
if (filter.table !== void 0 && entry.table !== filter.table) return false;
|
|
50
|
-
if (filter.partitions && !filter.partitions.includes(entry.partition)) return false;
|
|
51
|
-
if (filter.tier !== void 0 && inferLegacyTier(entry) !== filter.tier) return false;
|
|
52
|
-
return true;
|
|
53
|
-
}
|
|
54
|
-
function matchesWatermark(w, filter) {
|
|
55
|
-
if (w.userId !== filter.userId) return false;
|
|
56
|
-
if (filter.siteId !== void 0 && w.siteId !== filter.siteId) return false;
|
|
57
|
-
if (filter.table !== void 0 && w.table !== filter.table) return false;
|
|
58
|
-
return true;
|
|
59
|
-
}
|
|
60
|
-
function createHttpManifestStore(opts) {
|
|
61
|
-
const fetchImpl = opts.fetchImpl ?? fetch;
|
|
62
|
-
let cache = null;
|
|
63
|
-
async function load() {
|
|
64
|
-
if (!cache) cache = (async () => {
|
|
65
|
-
const res = await fetchImpl(opts.manifestUrl);
|
|
66
|
-
if (!res.ok) throw new Error(`manifest fetch failed ${res.status} ${res.statusText} for ${opts.manifestUrl}`);
|
|
67
|
-
const parsed = await res.json();
|
|
68
|
-
if (parsed.version !== 1) throw new Error(`unsupported manifest version ${parsed.version}`);
|
|
69
|
-
return parsed;
|
|
70
|
-
})();
|
|
71
|
-
return cache;
|
|
72
|
-
}
|
|
73
|
-
return {
|
|
74
|
-
async listLive(filter) {
|
|
75
|
-
const { entries } = await load();
|
|
76
|
-
return entries.filter((e) => e.retiredAt === void 0 && matchesFilter(e, filter));
|
|
77
|
-
},
|
|
78
|
-
async listAll(filter) {
|
|
79
|
-
const { entries } = await load();
|
|
80
|
-
return entries.filter((e) => matchesFilter(e, filter));
|
|
81
|
-
},
|
|
82
|
-
async getWatermarks(filter) {
|
|
83
|
-
const { watermarks = [] } = await load();
|
|
84
|
-
return watermarks.filter((w) => matchesWatermark(w, filter));
|
|
85
|
-
},
|
|
86
|
-
async getSyncStates(_filter) {
|
|
87
|
-
return [];
|
|
88
|
-
},
|
|
89
|
-
async listRetired() {
|
|
90
|
-
return [];
|
|
91
|
-
},
|
|
92
|
-
async registerVersion() {
|
|
93
|
-
readOnly("registerVersion");
|
|
94
|
-
},
|
|
95
|
-
async registerVersions() {
|
|
96
|
-
readOnly("registerVersions");
|
|
97
|
-
},
|
|
98
|
-
async delete() {
|
|
99
|
-
readOnly("delete");
|
|
100
|
-
},
|
|
101
|
-
async bumpWatermark() {
|
|
102
|
-
readOnly("bumpWatermark");
|
|
103
|
-
},
|
|
104
|
-
async setSyncState() {
|
|
105
|
-
readOnly("setSyncState");
|
|
106
|
-
},
|
|
107
|
-
async withLock(_, fn) {
|
|
108
|
-
return fn();
|
|
109
|
-
},
|
|
110
|
-
async purgeTenant() {
|
|
111
|
-
readOnly("purgeTenant");
|
|
112
|
-
}
|
|
113
|
-
};
|
|
114
|
-
}
|
|
115
|
-
export { createHttpDataSource, createHttpManifestStore };
|
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
async function createWaSqliteDriver(bytes) {
|
|
2
|
-
const [factoryMod, apiMod] = await Promise.all([import("wa-sqlite/dist/wa-sqlite.mjs"), import("wa-sqlite")]);
|
|
3
|
-
const wasmModule = await factoryMod.default();
|
|
4
|
-
const sqlite3 = apiMod.Factory(wasmModule);
|
|
5
|
-
const db = await sqlite3.open_v2(":memory:");
|
|
6
|
-
if (bytes && bytes.byteLength > 0) await sqlite3.deserialize(db, "main", bytes);
|
|
7
|
-
async function runStatement(sql, params) {
|
|
8
|
-
const prepared = await sqlite3.prepare_v2(db, sql);
|
|
9
|
-
if (!prepared) return [];
|
|
10
|
-
const { stmt } = prepared;
|
|
11
|
-
const rows = [];
|
|
12
|
-
sqlite3.bind_collection(stmt, params);
|
|
13
|
-
let rc = await sqlite3.step(stmt);
|
|
14
|
-
while (rc === sqlite3.SQLITE_ROW) {
|
|
15
|
-
const colCount = sqlite3.column_count(stmt);
|
|
16
|
-
const row = {};
|
|
17
|
-
for (let i = 0; i < colCount; i++) row[sqlite3.column_name(stmt, i)] = sqlite3.column(stmt, i);
|
|
18
|
-
rows.push(row);
|
|
19
|
-
rc = await sqlite3.step(stmt);
|
|
20
|
-
}
|
|
21
|
-
await sqlite3.finalize(stmt);
|
|
22
|
-
return rows;
|
|
23
|
-
}
|
|
24
|
-
return {
|
|
25
|
-
async exec(sql) {
|
|
26
|
-
await sqlite3.exec(db, sql);
|
|
27
|
-
},
|
|
28
|
-
async run(sql, params) {
|
|
29
|
-
await runStatement(sql, params);
|
|
30
|
-
},
|
|
31
|
-
async all(sql, params) {
|
|
32
|
-
return runStatement(sql, params);
|
|
33
|
-
},
|
|
34
|
-
async serialize() {
|
|
35
|
-
return sqlite3.serialize(db, "main");
|
|
36
|
-
},
|
|
37
|
-
async close() {
|
|
38
|
-
await sqlite3.close(db);
|
|
39
|
-
}
|
|
40
|
-
};
|
|
41
|
-
}
|
|
42
|
-
export { createWaSqliteDriver };
|
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
import { createRequire } from "node:module";
|
|
2
|
-
import process from "node:process";
|
|
3
|
-
import { fileURLToPath } from "node:url";
|
|
4
|
-
import { Buffer } from "node:buffer";
|
|
5
|
-
const require_ = createRequire(typeof __filename !== "undefined" ? __filename : typeof import.meta !== "undefined" ? fileURLToPath(import.meta.url) : process.cwd());
|
|
6
|
-
function loadBetterSqlite() {
|
|
7
|
-
const mod = require_("better-sqlite3");
|
|
8
|
-
return typeof mod === "function" ? mod : mod.default;
|
|
9
|
-
}
|
|
10
|
-
function createBetterSqliteDriver(bytes) {
|
|
11
|
-
const Database = loadBetterSqlite();
|
|
12
|
-
const db = bytes ? new Database(Buffer.from(bytes.buffer, bytes.byteOffset, bytes.byteLength)) : new Database(":memory:");
|
|
13
|
-
return {
|
|
14
|
-
exec(sql) {
|
|
15
|
-
db.exec(sql);
|
|
16
|
-
},
|
|
17
|
-
run(sql, params) {
|
|
18
|
-
db.prepare(sql).run(...params);
|
|
19
|
-
},
|
|
20
|
-
all(sql, params) {
|
|
21
|
-
return db.prepare(sql).all(...params);
|
|
22
|
-
},
|
|
23
|
-
serialize() {
|
|
24
|
-
const buf = db.serialize();
|
|
25
|
-
return new Uint8Array(buf.buffer, buf.byteOffset, buf.byteLength);
|
|
26
|
-
},
|
|
27
|
-
close() {
|
|
28
|
-
db.close();
|
|
29
|
-
}
|
|
30
|
-
};
|
|
31
|
-
}
|
|
32
|
-
export { createBetterSqliteDriver };
|