@gscdump/engine-sqlite 0.6.3 → 0.7.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,9 +1,10 @@
1
1
  import { assertSchemaInSync, compileSqlite, compileSqlite as compileSqlite$1, createResolverAdapter, createSqlQuerySource } from "@gscdump/engine/resolver";
2
- import { and, eq, gte, lte, sql, sql as sql$1 } from "drizzle-orm";
3
- import { integer, real, sqliteTable, text } from "drizzle-orm/sqlite-core";
2
+ import { and, and as and$1, eq, eq as eq$1, gte, inArray, isNotNull, isNull, lt, lte, lte as lte$1, or, sql, sql as sql$1 } from "drizzle-orm";
3
+ import { index, integer, primaryKey, real, sqliteTable, text, unique } from "drizzle-orm/sqlite-core";
4
+ import { inferSearchType } from "@gscdump/engine";
4
5
  import { createScopedHelpers } from "@gscdump/engine/scope";
5
6
  import { drizzle } from "drizzle-orm/sqlite-proxy";
6
- import { resolveWindow } from "@gscdump/analysis/period";
7
+ import { resolveWindow } from "@gscdump/engine/period";
7
8
  function metricCols() {
8
9
  return {
9
10
  clicks: integer("clicks").notNull().default(0),
@@ -121,6 +122,346 @@ function aggCtr(t) {
121
122
  function aggPosition(t) {
122
123
  return sql$1`SUM(${t.sum_position}) / NULLIF(SUM(${t.impressions}), 0) + 1`;
123
124
  }
125
+ const r2Manifest = sqliteTable("r2_manifest", {
126
+ id: text("id").primaryKey(),
127
+ userId: integer("user_id").notNull(),
128
+ siteId: text("site_id"),
129
+ table: text("table").notNull(),
130
+ partition: text("partition").notNull(),
131
+ objectKey: text("object_key").notNull(),
132
+ rowCount: integer("row_count").notNull().default(0),
133
+ bytes: integer("bytes").notNull().default(0),
134
+ createdAt: integer("created_at").notNull(),
135
+ retiredAt: integer("retired_at"),
136
+ tier: text("tier"),
137
+ searchType: text("search_type"),
138
+ schemaVersion: integer("schema_version")
139
+ }, (t) => [
140
+ index("idx_r2_manifest_live").on(t.userId, t.siteId, t.table, t.partition, t.retiredAt),
141
+ index("idx_r2_manifest_retired").on(t.retiredAt),
142
+ index("idx_r2_manifest_tier").on(t.userId, t.siteId, t.table, t.tier, t.retiredAt),
143
+ unique("r2_manifest_object_key_unique").on(t.objectKey)
144
+ ]);
145
+ const r2WriteErrors = sqliteTable("r2_write_errors", {
146
+ id: text("id").primaryKey(),
147
+ userId: integer("user_id").notNull(),
148
+ siteId: text("site_id"),
149
+ table: text("table"),
150
+ date: text("date"),
151
+ error: text("error").notNull(),
152
+ createdAt: integer("created_at").notNull().default(sql$1`(unixepoch())`)
153
+ }, (t) => [index("idx_r2_write_errors_user").on(t.userId, t.createdAt), index("idx_r2_write_errors_created").on(t.createdAt)]);
154
+ const r2ShadowDiffs = sqliteTable("r2_shadow_diffs", {
155
+ id: text("id").primaryKey(),
156
+ userId: integer("user_id").notNull(),
157
+ siteId: text("site_id"),
158
+ endpoint: text("endpoint").notNull(),
159
+ diff: text("diff").notNull(),
160
+ createdAt: integer("created_at").notNull().default(sql$1`(unixepoch())`)
161
+ }, (t) => [index("idx_r2_shadow_diffs_user").on(t.userId, t.createdAt), index("idx_r2_shadow_diffs_endpoint").on(t.endpoint, t.createdAt)]);
162
+ const r2Locks = sqliteTable("r2_locks", {
163
+ scope: text("scope").primaryKey(),
164
+ holderId: text("holder_id").notNull(),
165
+ acquiredAt: integer("acquired_at").notNull(),
166
+ expiresAt: integer("expires_at").notNull()
167
+ }, (t) => [index("idx_r2_locks_expires").on(t.expiresAt)]);
168
+ const r2Watermarks = sqliteTable("r2_watermarks", {
169
+ userId: integer("user_id").notNull(),
170
+ siteId: text("site_id").notNull().default(""),
171
+ table: text("table").notNull(),
172
+ newestDateSynced: text("newest_date_synced").notNull(),
173
+ oldestDateSynced: text("oldest_date_synced").notNull(),
174
+ lastSyncAt: integer("last_sync_at").notNull()
175
+ }, (t) => [primaryKey({ columns: [
176
+ t.userId,
177
+ t.siteId,
178
+ t.table
179
+ ] })]);
180
+ const r2SyncStates = sqliteTable("r2_sync_states", {
181
+ userId: integer("user_id").notNull(),
182
+ siteId: text("site_id").notNull().default(""),
183
+ table: text("table").notNull(),
184
+ date: text("date").notNull(),
185
+ searchType: text("search_type").notNull().default(""),
186
+ state: text("state", { enum: [
187
+ "pending",
188
+ "inflight",
189
+ "done",
190
+ "failed"
191
+ ] }).notNull(),
192
+ updatedAt: integer("updated_at").notNull(),
193
+ attempts: integer("attempts").notNull().default(0),
194
+ error: text("error")
195
+ }, (t) => [primaryKey({ columns: [
196
+ t.userId,
197
+ t.siteId,
198
+ t.table,
199
+ t.date,
200
+ t.searchType
201
+ ] }), index("idx_r2_sync_states_state").on(t.state)]);
202
+ function toRow(e) {
203
+ return {
204
+ id: e.objectKey,
205
+ userId: Number(e.userId),
206
+ siteId: e.siteId ?? null,
207
+ table: e.table,
208
+ partition: e.partition,
209
+ objectKey: e.objectKey,
210
+ rowCount: e.rowCount,
211
+ bytes: e.bytes,
212
+ createdAt: e.createdAt,
213
+ retiredAt: e.retiredAt ?? null,
214
+ tier: e.tier ?? null,
215
+ searchType: e.searchType ?? null,
216
+ schemaVersion: e.schemaVersion ?? null
217
+ };
218
+ }
219
+ function fromRow(r) {
220
+ return {
221
+ userId: String(r.userId),
222
+ siteId: r.siteId ?? void 0,
223
+ table: r.table,
224
+ partition: r.partition,
225
+ objectKey: r.objectKey,
226
+ rowCount: r.rowCount,
227
+ bytes: r.bytes,
228
+ createdAt: r.createdAt,
229
+ retiredAt: r.retiredAt ?? void 0,
230
+ ...r.tier !== null ? { tier: r.tier } : {},
231
+ ...r.searchType !== null ? { searchType: r.searchType } : {},
232
+ ...r.schemaVersion !== null ? { schemaVersion: r.schemaVersion } : {}
233
+ };
234
+ }
235
+ const siteIdOf = (s) => s ?? "";
236
+ const searchTypeOf = (s) => s === void 0 || s === "web" ? "" : s;
237
+ const LOCK_TTL_MS = 3e4;
238
+ const LOCK_ACQUIRE_TIMEOUT_MS = 5e3;
239
+ const LOCK_RETRY_MIN_MS = 25;
240
+ const LOCK_RETRY_MAX_MS = 150;
241
+ function lockScopeKey(scope) {
242
+ return `${scope.userId}|${siteIdOf(scope.siteId)}|${scope.table}|${scope.partition}`;
243
+ }
244
+ function jitterDelay() {
245
+ return LOCK_RETRY_MIN_MS + Math.floor(Math.random() * (LOCK_RETRY_MAX_MS - LOCK_RETRY_MIN_MS));
246
+ }
247
+ function tierMatchCond(target) {
248
+ const explicit = eq$1(r2Manifest.tier, target);
249
+ if (target === "raw") return or(explicit, and$1(isNull(r2Manifest.tier), sql$1`${r2Manifest.partition} LIKE 'daily/%'`));
250
+ if (target === "d30") return or(explicit, and$1(isNull(r2Manifest.tier), sql$1`${r2Manifest.partition} LIKE 'monthly/%'`));
251
+ return explicit;
252
+ }
253
+ function createD1ManifestStore(db) {
254
+ async function listByFilter(filter, liveOnly) {
255
+ const conds = [eq$1(r2Manifest.userId, Number(filter.userId))];
256
+ if (liveOnly) conds.push(isNull(r2Manifest.retiredAt));
257
+ if (filter.siteId !== void 0) conds.push(eq$1(r2Manifest.siteId, filter.siteId));
258
+ if (filter.table !== void 0) conds.push(eq$1(r2Manifest.table, filter.table));
259
+ if (filter.partitions && filter.partitions.length > 0) conds.push(inArray(r2Manifest.partition, filter.partitions));
260
+ if (filter.tier !== void 0) {
261
+ const cond = tierMatchCond(filter.tier);
262
+ if (cond) conds.push(cond);
263
+ }
264
+ return (await db.select().from(r2Manifest).where(and$1(...conds))).map(fromRow);
265
+ }
266
+ const listLive = (filter) => listByFilter(filter, true);
267
+ const listAll = (filter) => listByFilter(filter, false);
268
+ async function registerVersions(newEntries, superseding) {
269
+ const supersededAt = newEntries[0]?.createdAt ?? Date.now();
270
+ const statements = [];
271
+ if (superseding && superseding.length > 0) {
272
+ const keys = superseding.map((s) => s.objectKey);
273
+ const CHUNK = 90;
274
+ for (let i = 0; i < keys.length; i += CHUNK) {
275
+ const slice = keys.slice(i, i + CHUNK);
276
+ statements.push(db.update(r2Manifest).set({ retiredAt: supersededAt }).where(and$1(inArray(r2Manifest.objectKey, slice), isNull(r2Manifest.retiredAt))));
277
+ }
278
+ }
279
+ for (const e of newEntries) statements.push(db.insert(r2Manifest).values(toRow(e)).onConflictDoUpdate({
280
+ target: r2Manifest.objectKey,
281
+ set: {
282
+ userId: sql$1`excluded.user_id`,
283
+ siteId: sql$1`excluded.site_id`,
284
+ table: sql$1`excluded."table"`,
285
+ partition: sql$1`excluded.partition`,
286
+ rowCount: sql$1`excluded.row_count`,
287
+ bytes: sql$1`excluded.bytes`,
288
+ createdAt: sql$1`excluded.created_at`,
289
+ retiredAt: sql$1`excluded.retired_at`,
290
+ tier: sql$1`excluded.tier`,
291
+ searchType: sql$1`excluded.search_type`,
292
+ schemaVersion: sql$1`excluded.schema_version`
293
+ }
294
+ }));
295
+ if (statements.length === 0) return;
296
+ const BATCH_LIMIT = 95;
297
+ for (let i = 0; i < statements.length; i += BATCH_LIMIT) {
298
+ const chunk = statements.slice(i, i + BATCH_LIMIT);
299
+ await db.batch(chunk);
300
+ }
301
+ }
302
+ async function listRetired(olderThan) {
303
+ return (await db.select().from(r2Manifest).where(and$1(isNotNull(r2Manifest.retiredAt), lte$1(r2Manifest.retiredAt, olderThan)))).map(fromRow);
304
+ }
305
+ async function deleteEntries(entries) {
306
+ if (entries.length === 0) return;
307
+ const keys = entries.map((e) => e.objectKey);
308
+ const CHUNK = 90;
309
+ for (let i = 0; i < keys.length; i += CHUNK) await db.delete(r2Manifest).where(inArray(r2Manifest.objectKey, keys.slice(i, i + CHUNK)));
310
+ }
311
+ async function getWatermarks(filter) {
312
+ const conds = [eq$1(r2Watermarks.userId, Number(filter.userId))];
313
+ if (filter.siteId !== void 0) conds.push(eq$1(r2Watermarks.siteId, filter.siteId));
314
+ if (filter.table !== void 0) conds.push(eq$1(r2Watermarks.table, filter.table));
315
+ return (await db.select().from(r2Watermarks).where(and$1(...conds))).map((r) => ({
316
+ userId: String(r.userId),
317
+ siteId: r.siteId === "" ? void 0 : r.siteId,
318
+ table: r.table,
319
+ newestDateSynced: r.newestDateSynced,
320
+ oldestDateSynced: r.oldestDateSynced,
321
+ lastSyncAt: r.lastSyncAt
322
+ }));
323
+ }
324
+ async function bumpWatermark(scope, date, at) {
325
+ const lastSyncAt = at ?? Date.now();
326
+ await db.insert(r2Watermarks).values({
327
+ userId: Number(scope.userId),
328
+ siteId: siteIdOf(scope.siteId),
329
+ table: scope.table,
330
+ newestDateSynced: date,
331
+ oldestDateSynced: date,
332
+ lastSyncAt
333
+ }).onConflictDoUpdate({
334
+ target: [
335
+ r2Watermarks.userId,
336
+ r2Watermarks.siteId,
337
+ r2Watermarks.table
338
+ ],
339
+ set: {
340
+ newestDateSynced: sql$1`CASE WHEN excluded.newest_date_synced > newest_date_synced THEN excluded.newest_date_synced ELSE newest_date_synced END`,
341
+ oldestDateSynced: sql$1`CASE WHEN excluded.oldest_date_synced < oldest_date_synced THEN excluded.oldest_date_synced ELSE oldest_date_synced END`,
342
+ lastSyncAt: sql$1`excluded.last_sync_at`
343
+ }
344
+ }).run();
345
+ }
346
+ async function getSyncStates(filter) {
347
+ const conds = [eq$1(r2SyncStates.userId, Number(filter.userId))];
348
+ if (filter.siteId !== void 0) conds.push(eq$1(r2SyncStates.siteId, filter.siteId));
349
+ if (filter.table !== void 0) conds.push(eq$1(r2SyncStates.table, filter.table));
350
+ if (filter.state !== void 0) conds.push(eq$1(r2SyncStates.state, filter.state));
351
+ if (filter.searchType !== void 0) conds.push(eq$1(r2SyncStates.searchType, searchTypeOf(filter.searchType)));
352
+ return (await db.select().from(r2SyncStates).where(and$1(...conds))).map((r) => ({
353
+ userId: String(r.userId),
354
+ siteId: r.siteId === "" ? void 0 : r.siteId,
355
+ table: r.table,
356
+ date: r.date,
357
+ searchType: inferSearchType({ searchType: r.searchType === "" ? void 0 : r.searchType }),
358
+ state: r.state,
359
+ updatedAt: r.updatedAt,
360
+ attempts: r.attempts,
361
+ error: r.error ?? void 0
362
+ }));
363
+ }
364
+ async function setSyncState(scope, state, detail) {
365
+ const updatedAt = detail?.at ?? Date.now();
366
+ const errorText = detail?.error ?? null;
367
+ await db.insert(r2SyncStates).values({
368
+ userId: Number(scope.userId),
369
+ siteId: siteIdOf(scope.siteId),
370
+ table: scope.table,
371
+ date: scope.date,
372
+ searchType: searchTypeOf(scope.searchType),
373
+ state,
374
+ updatedAt,
375
+ attempts: state === "inflight" ? 1 : 0,
376
+ error: errorText
377
+ }).onConflictDoUpdate({
378
+ target: [
379
+ r2SyncStates.userId,
380
+ r2SyncStates.siteId,
381
+ r2SyncStates.table,
382
+ r2SyncStates.date,
383
+ r2SyncStates.searchType
384
+ ],
385
+ set: {
386
+ state: sql$1`excluded.state`,
387
+ updatedAt: sql$1`excluded.updated_at`,
388
+ attempts: sql$1`CASE WHEN excluded.state = 'inflight' THEN attempts + 1 ELSE attempts END`,
389
+ error: sql$1`CASE
390
+ WHEN excluded.state = 'done' THEN NULL
391
+ WHEN excluded.state = 'inflight' THEN error
392
+ ELSE excluded.error
393
+ END`
394
+ }
395
+ }).run();
396
+ }
397
+ async function withLock(scope, fn) {
398
+ const key = lockScopeKey(scope);
399
+ const holderId = crypto.randomUUID();
400
+ const deadline = Date.now() + LOCK_ACQUIRE_TIMEOUT_MS;
401
+ while (true) {
402
+ const now = Date.now();
403
+ const expiresAt = now + LOCK_TTL_MS;
404
+ await db.insert(r2Locks).values({
405
+ scope: key,
406
+ holderId,
407
+ acquiredAt: now,
408
+ expiresAt
409
+ }).onConflictDoUpdate({
410
+ target: r2Locks.scope,
411
+ set: {
412
+ holderId: sql$1`excluded.holder_id`,
413
+ acquiredAt: sql$1`excluded.acquired_at`,
414
+ expiresAt: sql$1`excluded.expires_at`
415
+ },
416
+ setWhere: lt(r2Locks.expiresAt, now)
417
+ }).run();
418
+ if ((await db.select({ holderId: r2Locks.holderId }).from(r2Locks).where(eq$1(r2Locks.scope, key)).get())?.holderId === holderId) break;
419
+ if (Date.now() >= deadline) throw new Error(`withLock: timed out acquiring ${key} after ${LOCK_ACQUIRE_TIMEOUT_MS}ms`);
420
+ await new Promise((resolve) => setTimeout(resolve, jitterDelay()));
421
+ }
422
+ return await fn().finally(async () => {
423
+ await Promise.resolve(db.delete(r2Locks).where(and$1(eq$1(r2Locks.scope, key), eq$1(r2Locks.holderId, holderId))).run()).catch(() => {});
424
+ });
425
+ }
426
+ async function purgeTenant(filter) {
427
+ const userIdNum = Number(filter.userId);
428
+ const entriesCond = filter.siteId !== void 0 ? and$1(eq$1(r2Manifest.userId, userIdNum), eq$1(r2Manifest.siteId, filter.siteId)) : eq$1(r2Manifest.userId, userIdNum);
429
+ const watermarksCond = filter.siteId !== void 0 ? and$1(eq$1(r2Watermarks.userId, userIdNum), eq$1(r2Watermarks.siteId, filter.siteId)) : eq$1(r2Watermarks.userId, userIdNum);
430
+ const syncStatesCond = filter.siteId !== void 0 ? and$1(eq$1(r2SyncStates.userId, userIdNum), eq$1(r2SyncStates.siteId, filter.siteId)) : eq$1(r2SyncStates.userId, userIdNum);
431
+ const [entriesRows, watermarkRows, syncStateRows] = await Promise.all([
432
+ db.select({ n: sql$1`count(*)` }).from(r2Manifest).where(entriesCond).all(),
433
+ db.select({ n: sql$1`count(*)` }).from(r2Watermarks).where(watermarksCond).all(),
434
+ db.select({ n: sql$1`count(*)` }).from(r2SyncStates).where(syncStatesCond).all()
435
+ ]);
436
+ const entriesRemoved = Number(entriesRows[0]?.n ?? 0);
437
+ const watermarksRemoved = Number(watermarkRows[0]?.n ?? 0);
438
+ const syncStatesRemoved = Number(syncStateRows[0]?.n ?? 0);
439
+ await db.batch([
440
+ db.delete(r2Manifest).where(entriesCond),
441
+ db.delete(r2Watermarks).where(watermarksCond),
442
+ db.delete(r2SyncStates).where(syncStatesCond)
443
+ ]);
444
+ return {
445
+ entriesRemoved,
446
+ watermarksRemoved,
447
+ syncStatesRemoved
448
+ };
449
+ }
450
+ return {
451
+ listLive,
452
+ listAll,
453
+ registerVersion: (entry, superseding) => registerVersions([entry], superseding),
454
+ registerVersions,
455
+ listRetired,
456
+ delete: deleteEntries,
457
+ getWatermarks,
458
+ bumpWatermark,
459
+ getSyncStates,
460
+ setSyncState,
461
+ withLock,
462
+ purgeTenant
463
+ };
464
+ }
124
465
  function createSqliteInsightRunner(opts) {
125
466
  const { executor, logger, rowsAsArrays, schema: schemaOverride } = opts;
126
467
  const callback = async (sql, params, method) => {
@@ -138,4 +479,4 @@ function createSqliteInsightRunner(opts) {
138
479
  }) };
139
480
  }
140
481
  const { scopeFor, mergeScope } = createScopedHelpers(schema);
141
- export { aggClicks, aggCtr, aggImpressions, aggPosition, and, compileSqlite, createEngine, createSqliteInsightRunner, createSqliteResolverAdapter, createSqliteResolverAdapterFromExecutor, eq, gsc_countries, gsc_devices, gsc_keywords, gsc_page_keywords, gsc_pages, gte, lte, mergeScope, probeSqliteRegex, resolveWindow, schema, scopeFor, sql, sqliteResolverAdapter };
482
+ export { aggClicks, aggCtr, aggImpressions, aggPosition, and, compileSqlite, createD1ManifestStore, createEngine, createSqliteInsightRunner, createSqliteResolverAdapter, createSqliteResolverAdapterFromExecutor, eq, gsc_countries, gsc_devices, gsc_keywords, gsc_page_keywords, gsc_pages, gte, lte, mergeScope, probeSqliteRegex, r2Locks, r2Manifest, r2ShadowDiffs, r2SyncStates, r2Watermarks, r2WriteErrors, resolveWindow, schema, scopeFor, sql, sqliteResolverAdapter };