@ainyc/canonry 2.2.3 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,8 +4,11 @@ import {
4
4
  agentSessions,
5
5
  apiKeys,
6
6
  auditLog,
7
+ backlinkDomains,
8
+ backlinkSummaries,
7
9
  bingCoverageSnapshots,
8
10
  bingUrlInspections,
11
+ ccReleaseSyncs,
9
12
  competitors,
10
13
  createLogger,
11
14
  dropLegacyCredentialColumns,
@@ -27,7 +30,7 @@ import {
27
30
  runs,
28
31
  schedules,
29
32
  usageCounters
30
- } from "./chunk-TAII35VC.js";
33
+ } from "./chunk-CW6CAPBQ.js";
31
34
 
32
35
  // src/config.ts
33
36
  import fs from "fs";
@@ -342,12 +345,12 @@ function printCliError(err, format) {
342
345
  }
343
346
 
344
347
  // src/server.ts
345
- import { createRequire as createRequire2 } from "module";
346
- import crypto24 from "crypto";
347
- import fs8 from "fs";
348
- import path9 from "path";
348
+ import { createRequire as createRequire3 } from "module";
349
+ import crypto27 from "crypto";
350
+ import fs12 from "fs";
351
+ import path15 from "path";
349
352
  import { fileURLToPath as fileURLToPath2 } from "url";
350
- import { eq as eq26 } from "drizzle-orm";
353
+ import { eq as eq29 } from "drizzle-orm";
351
354
  import Fastify from "fastify";
352
355
 
353
356
  // ../contracts/src/config-schema.ts
@@ -616,6 +619,9 @@ function agentBusy(projectName) {
616
619
  409
617
620
  );
618
621
  }
622
+ function missingDependency(message, details) {
623
+ return new AppError("MISSING_DEPENDENCY", message, 422, details);
624
+ }
619
625
 
620
626
  // ../contracts/src/google.ts
621
627
  import { z as z5 } from "zod";
@@ -944,7 +950,8 @@ var runKindSchema = z8.enum([
944
950
  "gsc-sync",
945
951
  "inspect-sitemap",
946
952
  "ga-sync",
947
- "bing-inspect"
953
+ "bing-inspect",
954
+ "backlink-extract"
948
955
  ]);
949
956
  var RunKinds = runKindSchema.enum;
950
957
  var runTriggerSchema = z8.enum(["manual", "scheduled", "config-apply"]);
@@ -1431,6 +1438,83 @@ var agentMemoryDeleteRequestSchema = z13.object({
1431
1438
  key: z13.string().min(1).max(AGENT_MEMORY_KEY_MAX_LENGTH)
1432
1439
  });
1433
1440
 
1441
+ // ../contracts/src/backlinks.ts
1442
+ import { z as z14 } from "zod";
1443
+ var ccReleaseSyncStatusSchema = z14.enum(["queued", "downloading", "querying", "ready", "failed"]);
1444
+ var CcReleaseSyncStatuses = ccReleaseSyncStatusSchema.enum;
1445
+ var ccReleaseSyncDtoSchema = z14.object({
1446
+ id: z14.string(),
1447
+ release: z14.string(),
1448
+ status: ccReleaseSyncStatusSchema,
1449
+ phaseDetail: z14.string().nullable().optional(),
1450
+ vertexPath: z14.string().nullable().optional(),
1451
+ edgesPath: z14.string().nullable().optional(),
1452
+ vertexSha256: z14.string().nullable().optional(),
1453
+ edgesSha256: z14.string().nullable().optional(),
1454
+ vertexBytes: z14.number().int().nullable().optional(),
1455
+ edgesBytes: z14.number().int().nullable().optional(),
1456
+ projectsProcessed: z14.number().int().nullable().optional(),
1457
+ domainsDiscovered: z14.number().int().nullable().optional(),
1458
+ downloadStartedAt: z14.string().nullable().optional(),
1459
+ downloadFinishedAt: z14.string().nullable().optional(),
1460
+ queryStartedAt: z14.string().nullable().optional(),
1461
+ queryFinishedAt: z14.string().nullable().optional(),
1462
+ error: z14.string().nullable().optional(),
1463
+ createdAt: z14.string(),
1464
+ updatedAt: z14.string()
1465
+ });
1466
+ var backlinkDomainDtoSchema = z14.object({
1467
+ linkingDomain: z14.string(),
1468
+ numHosts: z14.number().int()
1469
+ });
1470
+ var backlinkSummaryDtoSchema = z14.object({
1471
+ projectId: z14.string(),
1472
+ release: z14.string(),
1473
+ targetDomain: z14.string(),
1474
+ totalLinkingDomains: z14.number().int(),
1475
+ totalHosts: z14.number().int(),
1476
+ top10HostsShare: z14.string(),
1477
+ queriedAt: z14.string()
1478
+ });
1479
+ var backlinkListResponseSchema = z14.object({
1480
+ summary: backlinkSummaryDtoSchema.nullable(),
1481
+ total: z14.number().int(),
1482
+ rows: z14.array(backlinkDomainDtoSchema)
1483
+ });
1484
+ var backlinkHistoryEntrySchema = z14.object({
1485
+ release: z14.string(),
1486
+ totalLinkingDomains: z14.number().int(),
1487
+ totalHosts: z14.number().int(),
1488
+ top10HostsShare: z14.string(),
1489
+ queriedAt: z14.string()
1490
+ });
1491
+ var backlinksInstallStatusDtoSchema = z14.object({
1492
+ duckdbInstalled: z14.boolean(),
1493
+ duckdbVersion: z14.string().nullable().optional(),
1494
+ duckdbSpec: z14.string(),
1495
+ pluginDir: z14.string()
1496
+ });
1497
+ var backlinksInstallResultDtoSchema = z14.object({
1498
+ installed: z14.boolean(),
1499
+ version: z14.string(),
1500
+ path: z14.string(),
1501
+ alreadyPresent: z14.boolean()
1502
+ });
1503
+ var ccAvailableReleaseSchema = z14.object({
1504
+ release: z14.string(),
1505
+ vertexUrl: z14.string(),
1506
+ edgesUrl: z14.string(),
1507
+ vertexBytes: z14.number().int().nullable(),
1508
+ edgesBytes: z14.number().int().nullable(),
1509
+ lastModified: z14.string().nullable()
1510
+ });
1511
+ var ccCachedReleaseSchema = z14.object({
1512
+ release: z14.string(),
1513
+ syncStatus: ccReleaseSyncStatusSchema.nullable(),
1514
+ bytes: z14.number().int(),
1515
+ lastUsedAt: z14.string().nullable()
1516
+ });
1517
+
1434
1518
  // ../api-routes/src/auth.ts
1435
1519
  import crypto2 from "crypto";
1436
1520
  import { eq } from "drizzle-orm";
@@ -2463,7 +2547,7 @@ async function deliverWebhook(target, payload, webhookSecret) {
2463
2547
  const body = JSON.stringify(payload);
2464
2548
  const isHttps = target.url.protocol === "https:";
2465
2549
  const port = target.url.port ? Number(target.url.port) : isHttps ? 443 : 80;
2466
- const path10 = `${target.url.pathname}${target.url.search}`;
2550
+ const path16 = `${target.url.pathname}${target.url.search}`;
2467
2551
  const headers = {
2468
2552
  "Content-Length": String(Buffer.byteLength(body)),
2469
2553
  "Content-Type": "application/json",
@@ -2479,7 +2563,7 @@ async function deliverWebhook(target, payload, webhookSecret) {
2479
2563
  headers,
2480
2564
  hostname: target.address,
2481
2565
  method: "POST",
2482
- path: path10,
2566
+ path: path16,
2483
2567
  port,
2484
2568
  timeout: REQUEST_TIMEOUT_MS
2485
2569
  };
@@ -5657,6 +5741,171 @@ var routeCatalog = [
5657
5741
  200: { description: "Health history returned." },
5658
5742
  404: { description: "Project not found." }
5659
5743
  }
5744
+ },
5745
+ {
5746
+ method: "get",
5747
+ path: "/api/v1/backlinks/status",
5748
+ summary: "Get the Common Crawl DuckDB plugin install status",
5749
+ description: "Reports whether @duckdb/node-api is installed in the local plugin dir. Returns MISSING_DEPENDENCY (422) on deployments that cannot host the plugin (e.g. the cloud API).",
5750
+ tags: ["backlinks"],
5751
+ responses: {
5752
+ 200: { description: "Install status returned." },
5753
+ 422: { description: "Backlinks feature is not available on this deployment." }
5754
+ }
5755
+ },
5756
+ {
5757
+ method: "post",
5758
+ path: "/api/v1/backlinks/install",
5759
+ summary: "Install the @duckdb/node-api plugin",
5760
+ description: "Idempotently installs DuckDB into the canonry plugin dir. Returns MISSING_DEPENDENCY (422) when the host cannot perform the install.",
5761
+ tags: ["backlinks"],
5762
+ responses: {
5763
+ 200: { description: "Installed (or already present)." },
5764
+ 422: { description: "Backlinks feature is not available on this deployment." }
5765
+ }
5766
+ },
5767
+ {
5768
+ method: "post",
5769
+ path: "/api/v1/backlinks/syncs",
5770
+ summary: "Queue a workspace-wide Common Crawl release sync",
5771
+ description: "Creates a `cc_release_syncs` row and fires the sync callback. Idempotent: an existing in-flight row for the same release is returned.",
5772
+ tags: ["backlinks"],
5773
+ requestBody: {
5774
+ required: true,
5775
+ content: {
5776
+ "application/json": {
5777
+ schema: {
5778
+ type: "object",
5779
+ required: ["release"],
5780
+ properties: {
5781
+ release: stringSchema
5782
+ }
5783
+ }
5784
+ }
5785
+ }
5786
+ },
5787
+ responses: {
5788
+ 200: { description: "Existing in-flight sync returned." },
5789
+ 201: { description: "Sync queued." },
5790
+ 400: { description: "Invalid release id." },
5791
+ 422: { description: "Backlinks feature is not available on this deployment." }
5792
+ }
5793
+ },
5794
+ {
5795
+ method: "get",
5796
+ path: "/api/v1/backlinks/syncs",
5797
+ summary: "List Common Crawl release syncs",
5798
+ description: "Returns syncs ordered by updatedAt DESC \u2014 re-queued rows surface ahead of untouched newer rows.",
5799
+ tags: ["backlinks"],
5800
+ responses: {
5801
+ 200: { description: "Sync history returned." }
5802
+ }
5803
+ },
5804
+ {
5805
+ method: "get",
5806
+ path: "/api/v1/backlinks/syncs/latest",
5807
+ summary: "Get the most recently-updated Common Crawl release sync",
5808
+ tags: ["backlinks"],
5809
+ responses: {
5810
+ 200: { description: "Latest sync returned, or null when no sync exists." }
5811
+ }
5812
+ },
5813
+ {
5814
+ method: "get",
5815
+ path: "/api/v1/backlinks/releases",
5816
+ summary: "List cached Common Crawl releases on the local filesystem",
5817
+ tags: ["backlinks"],
5818
+ responses: {
5819
+ 200: { description: "Cached release metadata returned." }
5820
+ }
5821
+ },
5822
+ {
5823
+ method: "delete",
5824
+ path: "/api/v1/backlinks/cache/{release}",
5825
+ summary: "Prune a cached Common Crawl release",
5826
+ tags: ["backlinks"],
5827
+ parameters: [
5828
+ {
5829
+ name: "release",
5830
+ in: "path",
5831
+ required: true,
5832
+ description: "Release id (e.g. cc-main-2026-jan-feb-mar).",
5833
+ schema: stringSchema
5834
+ }
5835
+ ],
5836
+ responses: {
5837
+ 200: { description: "Cache pruned." },
5838
+ 400: { description: "Invalid release id." },
5839
+ 422: { description: "Backlinks feature is not available on this deployment." }
5840
+ }
5841
+ },
5842
+ {
5843
+ method: "post",
5844
+ path: "/api/v1/projects/{name}/backlinks/extract",
5845
+ summary: "Extract backlinks for a single project from a cached release",
5846
+ description: 'Creates a `runs` row with kind="backlink-extract" and fires the extract callback. Defaults to the most recent ready release when `release` is omitted.',
5847
+ tags: ["backlinks"],
5848
+ parameters: [nameParameter],
5849
+ requestBody: {
5850
+ required: false,
5851
+ content: {
5852
+ "application/json": {
5853
+ schema: {
5854
+ type: "object",
5855
+ properties: {
5856
+ release: stringSchema
5857
+ }
5858
+ }
5859
+ }
5860
+ }
5861
+ },
5862
+ responses: {
5863
+ 201: { description: "Extract run queued." },
5864
+ 400: { description: "Invalid release id." },
5865
+ 404: { description: "Project not found." },
5866
+ 422: { description: "Backlinks feature is not available on this deployment." }
5867
+ }
5868
+ },
5869
+ {
5870
+ method: "get",
5871
+ path: "/api/v1/projects/{name}/backlinks/summary",
5872
+ summary: "Get the latest backlink summary for a project",
5873
+ tags: ["backlinks"],
5874
+ parameters: [
5875
+ nameParameter,
5876
+ { name: "release", in: "query", description: "Release id filter.", schema: stringSchema }
5877
+ ],
5878
+ responses: {
5879
+ 200: { description: "Summary returned, or null when no backlinks exist." },
5880
+ 404: { description: "Project not found." }
5881
+ }
5882
+ },
5883
+ {
5884
+ method: "get",
5885
+ path: "/api/v1/projects/{name}/backlinks/domains",
5886
+ summary: "Paginate backlink domains for a project",
5887
+ tags: ["backlinks"],
5888
+ parameters: [
5889
+ nameParameter,
5890
+ { name: "release", in: "query", description: "Release id filter.", schema: stringSchema },
5891
+ { name: "limit", in: "query", description: "Max results (1-500).", schema: stringSchema },
5892
+ { name: "offset", in: "query", description: "Pagination offset.", schema: stringSchema }
5893
+ ],
5894
+ responses: {
5895
+ 200: { description: "Domain list returned." },
5896
+ 404: { description: "Project not found." }
5897
+ }
5898
+ },
5899
+ {
5900
+ method: "get",
5901
+ path: "/api/v1/projects/{name}/backlinks/history",
5902
+ summary: "Get per-release backlink summaries for a project",
5903
+ tags: ["backlinks"],
5904
+ parameters: [nameParameter],
5905
+ responses: {
5906
+ 200: { description: "History returned oldest-first by queriedAt." },
5907
+ 404: { description: "Project not found." }
5908
+ }
5660
5909
  }
5661
5910
  ];
5662
5911
  var canonryLocalRouteCatalog = [
@@ -5791,8 +6040,8 @@ async function openApiRoutes(app, opts = {}) {
5791
6040
  return reply.type("application/json").send(buildOpenApiDocument(opts));
5792
6041
  });
5793
6042
  }
5794
- function buildOperationId(method, path10) {
5795
- const parts = path10.split("/").filter(Boolean).map((part) => {
6043
+ function buildOperationId(method, path16) {
6044
+ const parts = path16.split("/").filter(Boolean).map((part) => {
5796
6045
  if (part.startsWith("{") && part.endsWith("}")) {
5797
6046
  return `by-${part.slice(1, -1)}`;
5798
6047
  }
@@ -9435,10 +9684,10 @@ function buildAuthErrorMessage(res, responseText) {
9435
9684
  }
9436
9685
  return "WordPress credentials are invalid or lack permission for this action";
9437
9686
  }
9438
- async function fetchJson(connection, siteUrl, path10, init) {
9687
+ async function fetchJson(connection, siteUrl, path16, init) {
9439
9688
  if (siteUrl.startsWith("http:")) {
9440
9689
  }
9441
- const res = await fetch(`${normalizeSiteUrl(siteUrl)}${path10}`, {
9690
+ const res = await fetch(`${normalizeSiteUrl(siteUrl)}${path16}`, {
9442
9691
  ...init,
9443
9692
  headers: {
9444
9693
  "Authorization": `Basic ${encodeBasicAuth(connection.username, connection.appPassword)}`,
@@ -10920,6 +11169,566 @@ async function wordpressRoutes(app, opts) {
10920
11169
  });
10921
11170
  }
10922
11171
 
11172
+ // ../api-routes/src/backlinks.ts
11173
+ import crypto18 from "crypto";
11174
+ import { and as and7, asc as asc2, desc as desc8, eq as eq18, sql as sql5 } from "drizzle-orm";
11175
+
11176
+ // ../integration-commoncrawl/src/constants.ts
11177
+ import os3 from "os";
11178
+ import path3 from "path";
11179
+ var CC_BASE_URL = "https://data.commoncrawl.org/projects/hyperlinkgraph";
11180
+ var PLUGIN_DIR = path3.join(os3.homedir(), ".canonry", "plugins");
11181
+ var PLUGIN_PKG_JSON = path3.join(PLUGIN_DIR, "package.json");
11182
+ var DUCKDB_SPEC = process.env.CANONRY_DUCKDB_SPEC ?? "@duckdb/node-api@1.4.4-r.3";
11183
+ var CC_CACHE_DIR = process.env.CANONRY_CC_CACHE_DIR ?? path3.join(os3.homedir(), ".canonry", "cache", "commoncrawl");
11184
+ var RELEASE_ID_REGEX = /^cc-main-(\d{4})-(jan-feb-mar|apr-may-jun|jul-aug-sep|oct-nov-dec)$/;
11185
+ function ccReleasePaths(release) {
11186
+ const base = `${CC_BASE_URL}/${release}/domain`;
11187
+ const vertexFilename = `${release}-domain-vertices.txt.gz`;
11188
+ const edgesFilename = `${release}-domain-edges.txt.gz`;
11189
+ return {
11190
+ vertexUrl: `${base}/${vertexFilename}`,
11191
+ edgesUrl: `${base}/${edgesFilename}`,
11192
+ vertexFilename,
11193
+ edgesFilename
11194
+ };
11195
+ }
11196
+
11197
+ // ../integration-commoncrawl/src/reverse-domain.ts
11198
+ function reverseDomain(domain) {
11199
+ return domain.split(".").reverse().join(".");
11200
+ }
11201
+ function forwardDomain(revDomain) {
11202
+ return revDomain.split(".").reverse().join(".");
11203
+ }
11204
+
11205
+ // ../integration-commoncrawl/src/release-id.ts
11206
+ function isValidReleaseId(id) {
11207
+ return RELEASE_ID_REGEX.test(id);
11208
+ }
11209
+
11210
+ // ../integration-commoncrawl/src/downloader.ts
11211
+ import { createHash } from "crypto";
11212
+ import { createWriteStream } from "fs";
11213
+ import fs3 from "fs/promises";
11214
+ import path4 from "path";
11215
+ import { pipeline } from "stream/promises";
11216
+ import { Readable, Transform } from "stream";
11217
+ async function downloadFile(opts) {
11218
+ const start = Date.now();
11219
+ const fetchImpl = opts.fetchImpl ?? fetch;
11220
+ const sidecarPath = `${opts.destPath}.sha256`;
11221
+ try {
11222
+ const stat = await fs3.stat(opts.destPath);
11223
+ const sidecar = await readSidecar(sidecarPath);
11224
+ const sha2562 = sidecar ?? await hashFile(opts.destPath);
11225
+ if (!sidecar) await writeSidecar(sidecarPath, sha2562);
11226
+ return { bytes: stat.size, sha256: sha2562, cached: true, elapsedMs: Date.now() - start };
11227
+ } catch {
11228
+ }
11229
+ const partialPath = `${opts.destPath}.partial`;
11230
+ await fs3.mkdir(path4.dirname(opts.destPath), { recursive: true });
11231
+ await unlinkIfExists(partialPath);
11232
+ const res = await fetchImpl(opts.url);
11233
+ if (!res.ok || !res.body) {
11234
+ throw new Error(`HTTP ${res.status} ${res.statusText} for ${opts.url}`);
11235
+ }
11236
+ const total = parseContentLength(res.headers.get("content-length"));
11237
+ const hasher = createHash("sha256");
11238
+ let bytes = 0;
11239
+ const hashAndCount = new Transform({
11240
+ transform(chunk, _enc, cb) {
11241
+ hasher.update(chunk);
11242
+ bytes += chunk.length;
11243
+ opts.onProgress?.(bytes, total);
11244
+ cb(null, chunk);
11245
+ }
11246
+ });
11247
+ await pipeline(
11248
+ Readable.fromWeb(res.body),
11249
+ hashAndCount,
11250
+ createWriteStream(partialPath)
11251
+ );
11252
+ const sha256 = hasher.digest("hex");
11253
+ await fs3.rename(partialPath, opts.destPath);
11254
+ await writeSidecar(sidecarPath, sha256);
11255
+ return { bytes, sha256, cached: false, elapsedMs: Date.now() - start };
11256
+ }
11257
+ async function hashFile(filePath) {
11258
+ const hasher = createHash("sha256");
11259
+ const handle = await fs3.open(filePath, "r");
11260
+ try {
11261
+ const stream = handle.createReadStream();
11262
+ for await (const chunk of stream) hasher.update(chunk);
11263
+ } finally {
11264
+ await handle.close();
11265
+ }
11266
+ return hasher.digest("hex");
11267
+ }
11268
+ async function readSidecar(sidecarPath) {
11269
+ try {
11270
+ const raw = await fs3.readFile(sidecarPath, "utf8");
11271
+ const trimmed = raw.trim();
11272
+ return /^[0-9a-f]{64}$/i.test(trimmed) ? trimmed.toLowerCase() : null;
11273
+ } catch {
11274
+ return null;
11275
+ }
11276
+ }
11277
+ async function writeSidecar(sidecarPath, sha256) {
11278
+ await fs3.writeFile(sidecarPath, `${sha256}
11279
+ `);
11280
+ }
11281
+ async function unlinkIfExists(p) {
11282
+ try {
11283
+ await fs3.unlink(p);
11284
+ } catch {
11285
+ }
11286
+ }
11287
+ function parseContentLength(value) {
11288
+ if (!value) return null;
11289
+ const n = Number.parseInt(value, 10);
11290
+ return Number.isFinite(n) ? n : null;
11291
+ }
11292
+
11293
+ // ../integration-commoncrawl/src/plugin-resolver.ts
11294
+ import fs4 from "fs";
11295
+ import { createRequire as createRequire2 } from "module";
11296
+ import path5 from "path";
11297
+ function pluginDirFor(pkgJson) {
11298
+ return path5.dirname(pkgJson);
11299
+ }
11300
+ function duckdbPkgJsonFor(pluginDir) {
11301
+ return path5.join(pluginDir, "node_modules", "@duckdb", "node-api", "package.json");
11302
+ }
11303
+ function loadDuckdb(opts = {}) {
11304
+ const pkgJson = opts.pluginPkgJson ?? PLUGIN_PKG_JSON;
11305
+ const pluginDir = pluginDirFor(pkgJson);
11306
+ const duckdbPkg = duckdbPkgJsonFor(pluginDir);
11307
+ if (!fs4.existsSync(duckdbPkg)) {
11308
+ throw missingDependency(
11309
+ "@duckdb/node-api is not installed. Run `canonry backlinks install` to enable the backlinks feature.",
11310
+ { pluginDir }
11311
+ );
11312
+ }
11313
+ try {
11314
+ const pluginRequire = createRequire2(duckdbPkg);
11315
+ return pluginRequire("@duckdb/node-api");
11316
+ } catch {
11317
+ throw missingDependency(
11318
+ "@duckdb/node-api is installed but failed to load. Re-run `canonry backlinks install`.",
11319
+ { pluginDir }
11320
+ );
11321
+ }
11322
+ }
11323
+ function isDuckdbInstalled(opts = {}) {
11324
+ const pkgJson = opts.pluginPkgJson ?? PLUGIN_PKG_JSON;
11325
+ return fs4.existsSync(duckdbPkgJsonFor(pluginDirFor(pkgJson)));
11326
+ }
11327
+ function readInstalledVersion(opts = {}) {
11328
+ const pluginDir = opts.pluginPkgJson ? pluginDirFor(opts.pluginPkgJson) : PLUGIN_DIR;
11329
+ try {
11330
+ const raw = fs4.readFileSync(duckdbPkgJsonFor(pluginDir), "utf8");
11331
+ const pkg = JSON.parse(raw);
11332
+ return pkg.version ?? null;
11333
+ } catch {
11334
+ return null;
11335
+ }
11336
+ }
11337
+
11338
+ // ../integration-commoncrawl/src/plugin-installer.ts
11339
+ import { spawn } from "child_process";
11340
+ import fs5 from "fs/promises";
11341
+ import path6 from "path";
11342
+ async function installDuckdb(opts = {}) {
11343
+ const pluginDir = opts.pluginDir ?? PLUGIN_DIR;
11344
+ const pluginPkgJson = path6.join(pluginDir, "package.json");
11345
+ const spec = opts.spec ?? DUCKDB_SPEC;
11346
+ const pkgManager = opts.packageManager ?? "npm";
11347
+ await ensurePluginDir(pluginDir, pluginPkgJson);
11348
+ if (isDuckdbInstalled({ pluginPkgJson })) {
11349
+ const version2 = readInstalledVersion({ pluginPkgJson }) ?? "unknown";
11350
+ return { alreadyPresent: true, version: version2, path: pluginDir };
11351
+ }
11352
+ await runInstall(pkgManager, spec, pluginDir, opts.onLog);
11353
+ if (!isDuckdbInstalled({ pluginPkgJson })) {
11354
+ throw new Error(`${pkgManager} install completed but @duckdb/node-api still cannot be resolved from ${pluginDir}`);
11355
+ }
11356
+ const version = readInstalledVersion({ pluginPkgJson }) ?? "unknown";
11357
+ return { alreadyPresent: false, version, path: pluginDir };
11358
+ }
11359
+ async function ensurePluginDir(pluginDir = PLUGIN_DIR, pluginPkgJson = PLUGIN_PKG_JSON) {
11360
+ await fs5.mkdir(pluginDir, { recursive: true });
11361
+ try {
11362
+ await fs5.access(pluginPkgJson);
11363
+ } catch {
11364
+ const contents = JSON.stringify({ name: "canonry-plugins", private: true, dependencies: {} }, null, 2);
11365
+ await fs5.writeFile(pluginPkgJson, `${contents}
11366
+ `);
11367
+ }
11368
+ }
11369
+ async function runInstall(pkgManager, spec, pluginDir, onLog) {
11370
+ const args = pkgManager === "pnpm" ? ["add", spec, "--dir", pluginDir] : ["install", spec, "--prefix", pluginDir];
11371
+ await new Promise((resolve, reject) => {
11372
+ const child = spawn(pkgManager, args, {
11373
+ stdio: onLog ? ["ignore", "pipe", "pipe"] : "inherit"
11374
+ });
11375
+ if (onLog) {
11376
+ child.stdout?.setEncoding("utf8");
11377
+ child.stderr?.setEncoding("utf8");
11378
+ child.stdout?.on("data", (chunk) => {
11379
+ for (const line of chunk.split(/\r?\n/)) {
11380
+ if (line.length > 0) onLog(line);
11381
+ }
11382
+ });
11383
+ child.stderr?.on("data", (chunk) => {
11384
+ for (const line of chunk.split(/\r?\n/)) {
11385
+ if (line.length > 0) onLog(line);
11386
+ }
11387
+ });
11388
+ }
11389
+ child.on("error", reject);
11390
+ child.on("exit", (code) => {
11391
+ if (code === 0) resolve();
11392
+ else reject(new Error(`${pkgManager} install exited with code ${code}`));
11393
+ });
11394
+ });
11395
+ }
11396
+
11397
+ // ../integration-commoncrawl/src/duckdb-query.ts
11398
+ async function queryBacklinks(opts) {
11399
+ if (opts.targets.length === 0) return [];
11400
+ const duckdb = opts.duckdb;
11401
+ const reversed = opts.targets.map(reverseDomain);
11402
+ const targetList = reversed.map(quote).join(", ");
11403
+ const limitClause = opts.limitPerTarget ? `QUALIFY row_number() OVER (PARTITION BY t.target_rev_domain ORDER BY v.num_hosts DESC) <= ${Math.floor(opts.limitPerTarget)}` : "";
11404
+ const sql10 = `
11405
+ WITH vertices AS (
11406
+ SELECT * FROM read_csv(
11407
+ ${quote(opts.vertexPath)},
11408
+ delim=' ', header=false,
11409
+ columns={'id':'BIGINT','rev_domain':'VARCHAR','num_hosts':'BIGINT'}
11410
+ )
11411
+ ),
11412
+ targets AS (
11413
+ SELECT v.id AS target_id, v.rev_domain AS target_rev_domain
11414
+ FROM vertices v
11415
+ WHERE v.rev_domain IN (${targetList})
11416
+ ),
11417
+ inbound AS (
11418
+ SELECT e.from_id, e.to_id
11419
+ FROM read_csv(
11420
+ ${quote(opts.edgesPath)},
11421
+ delim=' ', header=false,
11422
+ columns={'from_id':'BIGINT','to_id':'BIGINT'}
11423
+ ) e
11424
+ WHERE e.to_id IN (SELECT target_id FROM targets)
11425
+ )
11426
+ SELECT
11427
+ t.target_rev_domain,
11428
+ v.rev_domain AS linking_rev_domain,
11429
+ v.num_hosts
11430
+ FROM inbound i
11431
+ JOIN targets t ON t.target_id = i.to_id
11432
+ JOIN vertices v ON v.id = i.from_id
11433
+ ${limitClause}
11434
+ ORDER BY t.target_rev_domain, v.num_hosts DESC
11435
+ `;
11436
+ const instance = await duckdb.DuckDBInstance.create(":memory:");
11437
+ const conn = await instance.connect();
11438
+ let rows;
11439
+ try {
11440
+ const reader = await conn.runAndReadAll(sql10);
11441
+ rows = reader.getRowObjects();
11442
+ } finally {
11443
+ conn.disconnectSync?.();
11444
+ conn.closeSync?.();
11445
+ instance.closeSync?.();
11446
+ }
11447
+ return rows.map((r) => ({
11448
+ targetDomain: forwardDomain(String(r["target_rev_domain"])),
11449
+ linkingDomain: forwardDomain(String(r["linking_rev_domain"])),
11450
+ numHosts: Number(r["num_hosts"])
11451
+ }));
11452
+ }
11453
+ function quote(s) {
11454
+ return `'${s.replace(/'/g, "''")}'`;
11455
+ }
11456
+
11457
+ // ../integration-commoncrawl/src/cache.ts
11458
+ import fs6 from "fs";
11459
+ import path7 from "path";
11460
+ function cacheRoot(opts = {}) {
11461
+ return opts.cacheDir ?? CC_CACHE_DIR;
11462
+ }
11463
+ function directoryBytesAndLastUsed(dir) {
11464
+ let bytes = 0;
11465
+ let latestMtimeMs = 0;
11466
+ const walk = (p) => {
11467
+ let stat;
11468
+ try {
11469
+ stat = fs6.statSync(p);
11470
+ } catch {
11471
+ return;
11472
+ }
11473
+ if (stat.isDirectory()) {
11474
+ let entries;
11475
+ try {
11476
+ entries = fs6.readdirSync(p);
11477
+ } catch {
11478
+ return;
11479
+ }
11480
+ for (const e of entries) walk(path7.join(p, e));
11481
+ } else if (stat.isFile()) {
11482
+ bytes += stat.size;
11483
+ const mtime = Math.max(stat.mtimeMs, stat.atimeMs);
11484
+ if (mtime > latestMtimeMs) latestMtimeMs = mtime;
11485
+ }
11486
+ };
11487
+ walk(dir);
11488
+ return {
11489
+ bytes,
11490
+ lastUsedAt: latestMtimeMs > 0 ? new Date(latestMtimeMs).toISOString() : null
11491
+ };
11492
+ }
11493
+ function listCachedReleases(opts = {}) {
11494
+ const root = cacheRoot(opts);
11495
+ if (!fs6.existsSync(root)) return [];
11496
+ const entries = fs6.readdirSync(root, { withFileTypes: true });
11497
+ const result = [];
11498
+ for (const entry of entries) {
11499
+ if (!entry.isDirectory()) continue;
11500
+ if (!RELEASE_ID_REGEX.test(entry.name)) continue;
11501
+ const dir = path7.join(root, entry.name);
11502
+ const stats = directoryBytesAndLastUsed(dir);
11503
+ result.push({ release: entry.name, bytes: stats.bytes, lastUsedAt: stats.lastUsedAt });
11504
+ }
11505
+ result.sort((a, b) => (b.lastUsedAt ?? "").localeCompare(a.lastUsedAt ?? ""));
11506
+ return result;
11507
+ }
11508
+ function pruneCachedRelease(release, opts = {}) {
11509
+ if (!RELEASE_ID_REGEX.test(release)) {
11510
+ throw new Error(`Invalid release id: ${release}`);
11511
+ }
11512
+ const dir = path7.join(cacheRoot(opts), release);
11513
+ fs6.rmSync(dir, { recursive: true, force: true });
11514
+ }
11515
+
11516
+ // ../api-routes/src/backlinks.ts
11517
+ var BACKLINKS_UNSUPPORTED_MESSAGE = "Backlinks sync and install are only available from a local canonry install. Run `canonry backlinks install` locally to use this feature.";
11518
+ var NON_TERMINAL_SYNC_STATUSES = /* @__PURE__ */ new Set([
11519
+ CcReleaseSyncStatuses.queued,
11520
+ CcReleaseSyncStatuses.downloading,
11521
+ CcReleaseSyncStatuses.querying
11522
+ ]);
11523
+ function mapSyncRow(row) {
11524
+ return {
11525
+ id: row.id,
11526
+ release: row.release,
11527
+ status: row.status,
11528
+ phaseDetail: row.phaseDetail ?? null,
11529
+ vertexPath: row.vertexPath ?? null,
11530
+ edgesPath: row.edgesPath ?? null,
11531
+ vertexSha256: row.vertexSha256 ?? null,
11532
+ edgesSha256: row.edgesSha256 ?? null,
11533
+ vertexBytes: row.vertexBytes ?? null,
11534
+ edgesBytes: row.edgesBytes ?? null,
11535
+ projectsProcessed: row.projectsProcessed ?? null,
11536
+ domainsDiscovered: row.domainsDiscovered ?? null,
11537
+ downloadStartedAt: row.downloadStartedAt ?? null,
11538
+ downloadFinishedAt: row.downloadFinishedAt ?? null,
11539
+ queryStartedAt: row.queryStartedAt ?? null,
11540
+ queryFinishedAt: row.queryFinishedAt ?? null,
11541
+ error: row.error ?? null,
11542
+ createdAt: row.createdAt,
11543
+ updatedAt: row.updatedAt
11544
+ };
11545
+ }
11546
+ function mapSummaryRow(row) {
11547
+ return {
11548
+ projectId: row.projectId,
11549
+ release: row.release,
11550
+ targetDomain: row.targetDomain,
11551
+ totalLinkingDomains: row.totalLinkingDomains,
11552
+ totalHosts: row.totalHosts,
11553
+ top10HostsShare: row.top10HostsShare,
11554
+ queriedAt: row.queriedAt
11555
+ };
11556
+ }
11557
+ function mapRunRow(row) {
11558
+ return {
11559
+ id: row.id,
11560
+ projectId: row.projectId,
11561
+ kind: row.kind,
11562
+ status: row.status,
11563
+ trigger: row.trigger,
11564
+ location: row.location ?? null,
11565
+ startedAt: row.startedAt ?? null,
11566
+ finishedAt: row.finishedAt ?? null,
11567
+ error: row.error ?? null,
11568
+ createdAt: row.createdAt
11569
+ };
11570
+ }
11571
+ function latestSummaryForProject(db, projectId, release) {
11572
+ const condition = release ? and7(eq18(backlinkSummaries.projectId, projectId), eq18(backlinkSummaries.release, release)) : eq18(backlinkSummaries.projectId, projectId);
11573
+ return db.select().from(backlinkSummaries).where(condition).orderBy(desc8(backlinkSummaries.queriedAt)).limit(1).get();
11574
+ }
11575
+ async function backlinksRoutes(app, opts) {
11576
+ app.get("/backlinks/status", async (_request, reply) => {
11577
+ if (!opts.getBacklinksStatus) {
11578
+ throw missingDependency(BACKLINKS_UNSUPPORTED_MESSAGE);
11579
+ }
11580
+ return reply.send(opts.getBacklinksStatus());
11581
+ });
11582
+ app.post("/backlinks/install", async (_request, reply) => {
11583
+ if (!opts.onInstallBacklinks) {
11584
+ throw missingDependency(BACKLINKS_UNSUPPORTED_MESSAGE);
11585
+ }
11586
+ const result = await opts.onInstallBacklinks();
11587
+ return reply.status(200).send(result);
11588
+ });
11589
+ app.post("/backlinks/syncs", async (request, reply) => {
11590
+ const release = request.body?.release;
11591
+ if (!release || !isValidReleaseId(release)) {
11592
+ throw validationError("Invalid release id. Expected form: cc-main-YYYY-{jan-feb-mar,apr-may-jun,jul-aug-sep,oct-nov-dec}");
11593
+ }
11594
+ if (!opts.getBacklinksStatus || !opts.onReleaseSyncRequested) {
11595
+ throw missingDependency(BACKLINKS_UNSUPPORTED_MESSAGE);
11596
+ }
11597
+ if (!opts.getBacklinksStatus().duckdbInstalled) {
11598
+ throw missingDependency(
11599
+ "@duckdb/node-api is not installed. Run `canonry backlinks install` to enable the backlinks feature."
11600
+ );
11601
+ }
11602
+ const existing = app.db.select().from(ccReleaseSyncs).where(eq18(ccReleaseSyncs.release, release)).get();
11603
+ const now = (/* @__PURE__ */ new Date()).toISOString();
11604
+ if (existing) {
11605
+ if (NON_TERMINAL_SYNC_STATUSES.has(existing.status)) {
11606
+ return reply.status(200).send(mapSyncRow(existing));
11607
+ }
11608
+ app.db.update(ccReleaseSyncs).set({
11609
+ status: CcReleaseSyncStatuses.queued,
11610
+ phaseDetail: null,
11611
+ error: null,
11612
+ updatedAt: now
11613
+ }).where(eq18(ccReleaseSyncs.id, existing.id)).run();
11614
+ opts.onReleaseSyncRequested(existing.id, release);
11615
+ const refreshed = app.db.select().from(ccReleaseSyncs).where(eq18(ccReleaseSyncs.id, existing.id)).get();
11616
+ return reply.status(200).send(mapSyncRow(refreshed));
11617
+ }
11618
+ const id = crypto18.randomUUID();
11619
+ app.db.insert(ccReleaseSyncs).values({
11620
+ id,
11621
+ release,
11622
+ status: CcReleaseSyncStatuses.queued,
11623
+ createdAt: now,
11624
+ updatedAt: now
11625
+ }).run();
11626
+ opts.onReleaseSyncRequested(id, release);
11627
+ const inserted = app.db.select().from(ccReleaseSyncs).where(eq18(ccReleaseSyncs.id, id)).get();
11628
+ return reply.status(201).send(mapSyncRow(inserted));
11629
+ });
11630
+ app.get("/backlinks/syncs/latest", async (_request, reply) => {
11631
+ const row = app.db.select().from(ccReleaseSyncs).orderBy(desc8(ccReleaseSyncs.updatedAt)).limit(1).get();
11632
+ return reply.send(row ? mapSyncRow(row) : null);
11633
+ });
11634
+ app.get("/backlinks/syncs", async (_request, reply) => {
11635
+ const rows = app.db.select().from(ccReleaseSyncs).orderBy(desc8(ccReleaseSyncs.updatedAt)).all();
11636
+ return reply.send(rows.map(mapSyncRow));
11637
+ });
11638
+ app.get("/backlinks/releases", async (_request, reply) => {
11639
+ const releases = opts.listCachedReleases?.() ?? [];
11640
+ return reply.send(releases);
11641
+ });
11642
+ app.delete("/backlinks/cache/:release", async (request, reply) => {
11643
+ const release = request.params.release;
11644
+ if (!isValidReleaseId(release)) {
11645
+ throw validationError("Invalid release id");
11646
+ }
11647
+ if (!opts.onBacklinksPruneCache) {
11648
+ throw missingDependency(BACKLINKS_UNSUPPORTED_MESSAGE);
11649
+ }
11650
+ opts.onBacklinksPruneCache(release);
11651
+ return reply.send({ ok: true });
11652
+ });
11653
+ app.post("/projects/:name/backlinks/extract", async (request, reply) => {
11654
+ const project = resolveProject(app.db, request.params.name);
11655
+ if (!opts.getBacklinksStatus || !opts.onBacklinkExtractRequested) {
11656
+ throw missingDependency(BACKLINKS_UNSUPPORTED_MESSAGE);
11657
+ }
11658
+ if (!opts.getBacklinksStatus().duckdbInstalled) {
11659
+ throw missingDependency(
11660
+ "@duckdb/node-api is not installed. Run `canonry backlinks install` to enable the backlinks feature."
11661
+ );
11662
+ }
11663
+ const release = request.body?.release;
11664
+ if (release !== void 0 && !isValidReleaseId(release)) {
11665
+ throw validationError("Invalid release id");
11666
+ }
11667
+ const now = (/* @__PURE__ */ new Date()).toISOString();
11668
+ const runId = crypto18.randomUUID();
11669
+ app.db.insert(runs).values({
11670
+ id: runId,
11671
+ projectId: project.id,
11672
+ kind: RunKinds["backlink-extract"],
11673
+ status: RunStatuses.queued,
11674
+ trigger: RunTriggers.manual,
11675
+ createdAt: now
11676
+ }).run();
11677
+ opts.onBacklinkExtractRequested(runId, project.id, release);
11678
+ const run = app.db.select().from(runs).where(eq18(runs.id, runId)).get();
11679
+ return reply.status(201).send(mapRunRow(run));
11680
+ });
11681
+ app.get(
11682
+ "/projects/:name/backlinks/summary",
11683
+ async (request, reply) => {
11684
+ const project = resolveProject(app.db, request.params.name);
11685
+ const row = latestSummaryForProject(app.db, project.id, request.query.release);
11686
+ return reply.send(row ? mapSummaryRow(row) : null);
11687
+ }
11688
+ );
11689
+ app.get("/projects/:name/backlinks/domains", async (request, reply) => {
11690
+ const project = resolveProject(app.db, request.params.name);
11691
+ const summaryRow = latestSummaryForProject(app.db, project.id, request.query.release);
11692
+ const targetRelease = request.query.release ?? summaryRow?.release;
11693
+ if (!targetRelease) {
11694
+ const response2 = { summary: null, total: 0, rows: [] };
11695
+ return reply.send(response2);
11696
+ }
11697
+ const limit = Math.min(Math.max(parseInt(request.query.limit ?? "50", 10) || 50, 1), 500);
11698
+ const offset = Math.max(parseInt(request.query.offset ?? "0", 10) || 0, 0);
11699
+ const domainCondition = and7(
11700
+ eq18(backlinkDomains.projectId, project.id),
11701
+ eq18(backlinkDomains.release, targetRelease)
11702
+ );
11703
+ const totalRow = app.db.select({ count: sql5`count(*)` }).from(backlinkDomains).where(domainCondition).get();
11704
+ const rows = app.db.select({
11705
+ linkingDomain: backlinkDomains.linkingDomain,
11706
+ numHosts: backlinkDomains.numHosts
11707
+ }).from(backlinkDomains).where(domainCondition).orderBy(desc8(backlinkDomains.numHosts)).limit(limit).offset(offset).all();
11708
+ const response = {
11709
+ summary: summaryRow ? mapSummaryRow(summaryRow) : null,
11710
+ total: Number(totalRow?.count ?? 0),
11711
+ rows
11712
+ };
11713
+ return reply.send(response);
11714
+ });
11715
+ app.get(
11716
+ "/projects/:name/backlinks/history",
11717
+ async (request, reply) => {
11718
+ const project = resolveProject(app.db, request.params.name);
11719
+ const rows = app.db.select().from(backlinkSummaries).where(eq18(backlinkSummaries.projectId, project.id)).orderBy(asc2(backlinkSummaries.queriedAt)).all();
11720
+ const response = rows.map((r) => ({
11721
+ release: r.release,
11722
+ totalLinkingDomains: r.totalLinkingDomains,
11723
+ totalHosts: r.totalHosts,
11724
+ top10HostsShare: r.top10HostsShare,
11725
+ queriedAt: r.queriedAt
11726
+ }));
11727
+ return reply.send(response);
11728
+ }
11729
+ );
11730
+ }
11731
+
10923
11732
  // ../api-routes/src/index.ts
10924
11733
  async function apiRoutes(app, opts) {
10925
11734
  app.decorate("db", opts.db);
@@ -11028,6 +11837,14 @@ async function apiRoutes(app, opts) {
11028
11837
  googleConnectionStore: opts.googleConnectionStore,
11029
11838
  getGoogleAuthConfig: opts.getGoogleAuthConfig
11030
11839
  });
11840
+ await api.register(backlinksRoutes, {
11841
+ getBacklinksStatus: opts.getBacklinksStatus,
11842
+ onInstallBacklinks: opts.onInstallBacklinks,
11843
+ onReleaseSyncRequested: opts.onReleaseSyncRequested,
11844
+ onBacklinkExtractRequested: opts.onBacklinkExtractRequested,
11845
+ onBacklinksPruneCache: opts.onBacklinksPruneCache,
11846
+ listCachedReleases: opts.listCachedReleases
11847
+ });
11031
11848
  if (opts.registerAuthenticatedRoutes) {
11032
11849
  await opts.registerAuthenticatedRoutes(api);
11033
11850
  }
@@ -11035,7 +11852,7 @@ async function apiRoutes(app, opts) {
11035
11852
  }
11036
11853
 
11037
11854
  // src/server.ts
11038
- import os5 from "os";
11855
+ import os6 from "os";
11039
11856
 
11040
11857
  // ../provider-gemini/src/normalize.ts
11041
11858
  import { GoogleGenAI } from "@google/genai";
@@ -12423,8 +13240,8 @@ var localAdapter = {
12423
13240
  };
12424
13241
 
12425
13242
  // ../provider-cdp/src/adapter.ts
12426
- import path4 from "path";
12427
- import os3 from "os";
13243
+ import path9 from "path";
13244
+ import os4 from "os";
12428
13245
 
12429
13246
  // ../provider-cdp/src/connection.ts
12430
13247
  import CDP from "chrome-remote-interface";
@@ -12788,12 +13605,12 @@ function sleep2(ms) {
12788
13605
  }
12789
13606
 
12790
13607
  // ../provider-cdp/src/screenshot.ts
12791
- import fs3 from "fs";
12792
- import path3 from "path";
13608
+ import fs7 from "fs";
13609
+ import path8 from "path";
12793
13610
  async function captureElementScreenshot(client, selector, outputPath) {
12794
- const dir = path3.dirname(outputPath);
12795
- if (!fs3.existsSync(dir)) {
12796
- fs3.mkdirSync(dir, { recursive: true });
13611
+ const dir = path8.dirname(outputPath);
13612
+ if (!fs7.existsSync(dir)) {
13613
+ fs7.mkdirSync(dir, { recursive: true });
12797
13614
  }
12798
13615
  let clip;
12799
13616
  try {
@@ -12827,7 +13644,7 @@ async function captureElementScreenshot(client, selector, outputPath) {
12827
13644
  }
12828
13645
  const { data } = await client.Page.captureScreenshot(screenshotParams);
12829
13646
  const buffer = Buffer.from(data, "base64");
12830
- fs3.writeFileSync(outputPath, buffer);
13647
+ fs7.writeFileSync(outputPath, buffer);
12831
13648
  return outputPath;
12832
13649
  }
12833
13650
 
@@ -12888,7 +13705,7 @@ function getConnection(config) {
12888
13705
  return conn;
12889
13706
  }
12890
13707
  function getScreenshotDir2() {
12891
- return path4.join(os3.homedir(), ".canonry", "screenshots");
13708
+ return path9.join(os4.homedir(), ".canonry", "screenshots");
12892
13709
  }
12893
13710
  var cdpChatgptAdapter = {
12894
13711
  name: "cdp:chatgpt",
@@ -12952,7 +13769,7 @@ var cdpChatgptAdapter = {
12952
13769
  const answerText = await target.extractAnswer(client);
12953
13770
  const groundingSources = await target.extractCitations(client);
12954
13771
  const screenshotId = `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
12955
- const screenshotPath = path4.join(getScreenshotDir2(), `${screenshotId}.png`);
13772
+ const screenshotPath = path9.join(getScreenshotDir2(), `${screenshotId}.png`);
12956
13773
  let capturedScreenshotPath;
12957
13774
  try {
12958
13775
  capturedScreenshotPath = await captureElementScreenshot(
@@ -13488,11 +14305,11 @@ function removeWordpressConnection(config, projectName) {
13488
14305
  }
13489
14306
 
13490
14307
  // src/job-runner.ts
13491
- import crypto18 from "crypto";
13492
- import fs4 from "fs";
13493
- import path5 from "path";
13494
- import os4 from "os";
13495
- import { and as and7, eq as eq18, inArray as inArray3, sql as sql5 } from "drizzle-orm";
14308
+ import crypto19 from "crypto";
14309
+ import fs8 from "fs";
14310
+ import path10 from "path";
14311
+ import os5 from "os";
14312
+ import { and as and8, eq as eq19, inArray as inArray3, sql as sql6 } from "drizzle-orm";
13496
14313
 
13497
14314
  // src/citation-utils.ts
13498
14315
  function domainMatches(domain, canonicalDomain) {
@@ -13728,7 +14545,7 @@ var JobRunner = class {
13728
14545
  if (stale.length === 0) return;
13729
14546
  const now = (/* @__PURE__ */ new Date()).toISOString();
13730
14547
  for (const run of stale) {
13731
- this.db.update(runs).set({ status: "failed", finishedAt: now, error: "Server restarted while run was in progress" }).where(eq18(runs.id, run.id)).run();
14548
+ this.db.update(runs).set({ status: "failed", finishedAt: now, error: "Server restarted while run was in progress" }).where(eq19(runs.id, run.id)).run();
13732
14549
  log.warn("run.recovered-stale", { runId: run.id, previousStatus: run.status });
13733
14550
  }
13734
14551
  }
@@ -13756,10 +14573,10 @@ var JobRunner = class {
13756
14573
  throw new Error(`Run ${runId} is not executable from status '${existingRun.status}'`);
13757
14574
  }
13758
14575
  if (existingRun.status === "queued") {
13759
- this.db.update(runs).set({ status: "running", startedAt: now }).where(and7(eq18(runs.id, runId), eq18(runs.status, "queued"))).run();
14576
+ this.db.update(runs).set({ status: "running", startedAt: now }).where(and8(eq19(runs.id, runId), eq19(runs.status, "queued"))).run();
13760
14577
  }
13761
14578
  this.throwIfRunCancelled(runId);
13762
- const project = this.db.select().from(projects).where(eq18(projects.id, projectId)).get();
14579
+ const project = this.db.select().from(projects).where(eq19(projects.id, projectId)).get();
13763
14580
  if (!project) {
13764
14581
  throw new Error(`Project ${projectId} not found`);
13765
14582
  }
@@ -13779,8 +14596,8 @@ var JobRunner = class {
13779
14596
  throw new Error("No providers configured. Add at least one provider API key.");
13780
14597
  }
13781
14598
  log.info("run.dispatch", { runId, providerCount: activeProviders.length, providers: activeProviders.map((p) => p.adapter.name) });
13782
- projectKeywords = this.db.select().from(keywords).where(eq18(keywords.projectId, projectId)).all();
13783
- const projectCompetitors = this.db.select().from(competitors).where(eq18(competitors.projectId, projectId)).all();
14599
+ projectKeywords = this.db.select().from(keywords).where(eq19(keywords.projectId, projectId)).all();
14600
+ const projectCompetitors = this.db.select().from(competitors).where(eq19(competitors.projectId, projectId)).all();
13784
14601
  const competitorDomains = projectCompetitors.map((c) => c.domain);
13785
14602
  const allDomains = effectiveDomains({
13786
14603
  canonicalDomain: project.canonicalDomain,
@@ -13796,7 +14613,7 @@ var JobRunner = class {
13796
14613
  const todayPeriod = getCurrentUsageDay();
13797
14614
  for (const p of activeProviders) {
13798
14615
  const providerScope = `${projectId}:${p.adapter.name}`;
13799
- const providerUsage = this.db.select().from(usageCounters).where(eq18(usageCounters.scope, providerScope)).all().filter((r) => r.period === todayPeriod && r.metric === "queries").reduce((sum, r) => sum + r.count, 0);
14616
+ const providerUsage = this.db.select().from(usageCounters).where(eq19(usageCounters.scope, providerScope)).all().filter((r) => r.period === todayPeriod && r.metric === "queries").reduce((sum, r) => sum + r.count, 0);
13800
14617
  const limit = p.config.quotaPolicy.maxRequestsPerDay;
13801
14618
  if (providerUsage + queriesPerProvider > limit) {
13802
14619
  throw new Error(
@@ -13855,12 +14672,12 @@ var JobRunner = class {
13855
14672
  competitorDomains
13856
14673
  );
13857
14674
  let screenshotRelPath = null;
13858
- if (raw.screenshotPath && fs4.existsSync(raw.screenshotPath)) {
13859
- const snapshotId = crypto18.randomUUID();
13860
- const screenshotDir = path5.join(os4.homedir(), ".canonry", "screenshots", runId);
13861
- if (!fs4.existsSync(screenshotDir)) fs4.mkdirSync(screenshotDir, { recursive: true });
13862
- const destPath = path5.join(screenshotDir, `${snapshotId}.png`);
13863
- fs4.renameSync(raw.screenshotPath, destPath);
14675
+ if (raw.screenshotPath && fs8.existsSync(raw.screenshotPath)) {
14676
+ const snapshotId = crypto19.randomUUID();
14677
+ const screenshotDir = path10.join(os5.homedir(), ".canonry", "screenshots", runId);
14678
+ if (!fs8.existsSync(screenshotDir)) fs8.mkdirSync(screenshotDir, { recursive: true });
14679
+ const destPath = path10.join(screenshotDir, `${snapshotId}.png`);
14680
+ fs8.renameSync(raw.screenshotPath, destPath);
13864
14681
  screenshotRelPath = `${runId}/${snapshotId}.png`;
13865
14682
  this.db.insert(querySnapshots).values({
13866
14683
  id: snapshotId,
@@ -13886,7 +14703,7 @@ var JobRunner = class {
13886
14703
  }).run();
13887
14704
  } else {
13888
14705
  this.db.insert(querySnapshots).values({
13889
- id: crypto18.randomUUID(),
14706
+ id: crypto19.randomUUID(),
13890
14707
  runId,
13891
14708
  keywordId: kw.id,
13892
14709
  provider: providerName,
@@ -13937,12 +14754,12 @@ var JobRunner = class {
13937
14754
  const someFailed = providerErrors.size > 0;
13938
14755
  if (allFailed) {
13939
14756
  const errorDetail = JSON.stringify(Object.fromEntries(providerErrors));
13940
- this.db.update(runs).set({ status: "failed", finishedAt: (/* @__PURE__ */ new Date()).toISOString(), error: errorDetail }).where(eq18(runs.id, runId)).run();
14757
+ this.db.update(runs).set({ status: "failed", finishedAt: (/* @__PURE__ */ new Date()).toISOString(), error: errorDetail }).where(eq19(runs.id, runId)).run();
13941
14758
  } else if (someFailed) {
13942
14759
  const errorDetail = JSON.stringify(Object.fromEntries(providerErrors));
13943
- this.db.update(runs).set({ status: "partial", finishedAt: (/* @__PURE__ */ new Date()).toISOString(), error: errorDetail }).where(eq18(runs.id, runId)).run();
14760
+ this.db.update(runs).set({ status: "partial", finishedAt: (/* @__PURE__ */ new Date()).toISOString(), error: errorDetail }).where(eq19(runs.id, runId)).run();
13944
14761
  } else {
13945
- this.db.update(runs).set({ status: "completed", finishedAt: (/* @__PURE__ */ new Date()).toISOString() }).where(eq18(runs.id, runId)).run();
14762
+ this.db.update(runs).set({ status: "completed", finishedAt: (/* @__PURE__ */ new Date()).toISOString() }).where(eq19(runs.id, runId)).run();
13946
14763
  }
13947
14764
  this.flushProviderUsage(projectId, providerDispatchCounts);
13948
14765
  const finalStatus = allFailed ? "failed" : someFailed ? "partial" : "completed";
@@ -13977,7 +14794,7 @@ var JobRunner = class {
13977
14794
  status: "failed",
13978
14795
  finishedAt: (/* @__PURE__ */ new Date()).toISOString(),
13979
14796
  error: errorMessage
13980
- }).where(eq18(runs.id, runId)).run();
14797
+ }).where(eq19(runs.id, runId)).run();
13981
14798
  this.flushProviderUsage(projectId, providerDispatchCounts);
13982
14799
  trackEvent("run.completed", {
13983
14800
  status: "failed",
@@ -13998,7 +14815,7 @@ var JobRunner = class {
13998
14815
  const now = (/* @__PURE__ */ new Date()).toISOString();
13999
14816
  const period = now.slice(0, 10);
14000
14817
  this.db.insert(usageCounters).values({
14001
- id: crypto18.randomUUID(),
14818
+ id: crypto19.randomUUID(),
14002
14819
  scope,
14003
14820
  period,
14004
14821
  metric,
@@ -14006,7 +14823,7 @@ var JobRunner = class {
14006
14823
  updatedAt: now
14007
14824
  }).onConflictDoUpdate({
14008
14825
  target: [usageCounters.scope, usageCounters.period, usageCounters.metric],
14009
- set: { count: sql5`${usageCounters.count} + ${count}`, updatedAt: now }
14826
+ set: { count: sql6`${usageCounters.count} + ${count}`, updatedAt: now }
14010
14827
  }).run();
14011
14828
  }
14012
14829
  flushProviderUsage(projectId, providerDispatchCounts) {
@@ -14020,7 +14837,7 @@ var JobRunner = class {
14020
14837
  status: runs.status,
14021
14838
  finishedAt: runs.finishedAt,
14022
14839
  error: runs.error
14023
- }).from(runs).where(eq18(runs.id, runId)).get();
14840
+ }).from(runs).where(eq19(runs.id, runId)).get();
14024
14841
  }
14025
14842
  isRunCancelled(runId) {
14026
14843
  return this.getRunState(runId)?.status === "cancelled";
@@ -14036,7 +14853,7 @@ var JobRunner = class {
14036
14853
  this.db.update(runs).set({
14037
14854
  finishedAt: (/* @__PURE__ */ new Date()).toISOString(),
14038
14855
  error: currentRun.error ?? "Cancelled by user"
14039
- }).where(eq18(runs.id, runId)).run();
14856
+ }).where(eq19(runs.id, runId)).run();
14040
14857
  }
14041
14858
  trackEvent("run.completed", {
14042
14859
  status: "cancelled",
@@ -14058,8 +14875,8 @@ function getCurrentUsageDay() {
14058
14875
  }
14059
14876
 
14060
14877
  // src/gsc-sync.ts
14061
- import crypto19 from "crypto";
14062
- import { eq as eq19, and as and8, sql as sql6 } from "drizzle-orm";
14878
+ import crypto20 from "crypto";
14879
+ import { eq as eq20, and as and9, sql as sql7 } from "drizzle-orm";
14063
14880
  var log2 = createLogger("GscSync");
14064
14881
  function formatDate2(d) {
14065
14882
  return d.toISOString().split("T")[0];
@@ -14071,13 +14888,13 @@ function daysAgo(n) {
14071
14888
  }
14072
14889
  async function executeGscSync(db, runId, projectId, opts) {
14073
14890
  const now = (/* @__PURE__ */ new Date()).toISOString();
14074
- db.update(runs).set({ status: "running", startedAt: now }).where(eq19(runs.id, runId)).run();
14891
+ db.update(runs).set({ status: "running", startedAt: now }).where(eq20(runs.id, runId)).run();
14075
14892
  try {
14076
14893
  const { clientId: googleClientId, clientSecret: googleClientSecret } = getGoogleAuthConfig(opts.config);
14077
14894
  if (!googleClientId || !googleClientSecret) {
14078
14895
  throw new Error("Google OAuth is not configured in the local Canonry config");
14079
14896
  }
14080
- const project = db.select().from(projects).where(eq19(projects.id, projectId)).get();
14897
+ const project = db.select().from(projects).where(eq20(projects.id, projectId)).get();
14081
14898
  if (!project) {
14082
14899
  throw new Error(`Project not found: ${projectId}`);
14083
14900
  }
@@ -14111,10 +14928,10 @@ async function executeGscSync(db, runId, projectId, opts) {
14111
14928
  });
14112
14929
  log2.info("fetch.complete", { runId, projectId, rowCount: rows.length });
14113
14930
  db.delete(gscSearchData).where(
14114
- and8(
14115
- eq19(gscSearchData.projectId, projectId),
14116
- sql6`${gscSearchData.date} >= ${startDate}`,
14117
- sql6`${gscSearchData.date} <= ${endDate}`
14931
+ and9(
14932
+ eq20(gscSearchData.projectId, projectId),
14933
+ sql7`${gscSearchData.date} >= ${startDate}`,
14934
+ sql7`${gscSearchData.date} <= ${endDate}`
14118
14935
  )
14119
14936
  ).run();
14120
14937
  const batchSize = 500;
@@ -14124,7 +14941,7 @@ async function executeGscSync(db, runId, projectId, opts) {
14124
14941
  for (const row of batch) {
14125
14942
  const [query, page, country, device, date] = row.keys;
14126
14943
  db.insert(gscSearchData).values({
14127
- id: crypto19.randomUUID(),
14944
+ id: crypto20.randomUUID(),
14128
14945
  projectId,
14129
14946
  syncRunId: runId,
14130
14947
  date: date ?? "",
@@ -14158,7 +14975,7 @@ async function executeGscSync(db, runId, projectId, opts) {
14158
14975
  const rich = ir.richResultsResult;
14159
14976
  const inspectedAt = (/* @__PURE__ */ new Date()).toISOString();
14160
14977
  db.insert(gscUrlInspections).values({
14161
- id: crypto19.randomUUID(),
14978
+ id: crypto20.randomUUID(),
14162
14979
  projectId,
14163
14980
  syncRunId: runId,
14164
14981
  url: pageUrl,
@@ -14179,7 +14996,7 @@ async function executeGscSync(db, runId, projectId, opts) {
14179
14996
  log2.error("inspect.url-failed", { runId, projectId, url: pageUrl, error: err instanceof Error ? err.message : String(err) });
14180
14997
  }
14181
14998
  }
14182
- const allInspections = db.select().from(gscUrlInspections).where(eq19(gscUrlInspections.projectId, projectId)).all();
14999
+ const allInspections = db.select().from(gscUrlInspections).where(eq20(gscUrlInspections.projectId, projectId)).all();
14183
15000
  const latestByUrl = /* @__PURE__ */ new Map();
14184
15001
  for (const row of allInspections) {
14185
15002
  const existing = latestByUrl.get(row.url);
@@ -14200,9 +15017,9 @@ async function executeGscSync(db, runId, projectId, opts) {
14200
15017
  }
14201
15018
  }
14202
15019
  const snapshotDate = formatDate2(/* @__PURE__ */ new Date());
14203
- db.delete(gscCoverageSnapshots).where(and8(eq19(gscCoverageSnapshots.projectId, projectId), eq19(gscCoverageSnapshots.date, snapshotDate))).run();
15020
+ db.delete(gscCoverageSnapshots).where(and9(eq20(gscCoverageSnapshots.projectId, projectId), eq20(gscCoverageSnapshots.date, snapshotDate))).run();
14204
15021
  db.insert(gscCoverageSnapshots).values({
14205
- id: crypto19.randomUUID(),
15022
+ id: crypto20.randomUUID(),
14206
15023
  projectId,
14207
15024
  syncRunId: runId,
14208
15025
  date: snapshotDate,
@@ -14211,19 +15028,19 @@ async function executeGscSync(db, runId, projectId, opts) {
14211
15028
  reasonBreakdown: JSON.stringify(reasonCounts),
14212
15029
  createdAt: (/* @__PURE__ */ new Date()).toISOString()
14213
15030
  }).run();
14214
- db.update(runs).set({ status: "completed", finishedAt: (/* @__PURE__ */ new Date()).toISOString() }).where(eq19(runs.id, runId)).run();
15031
+ db.update(runs).set({ status: "completed", finishedAt: (/* @__PURE__ */ new Date()).toISOString() }).where(eq20(runs.id, runId)).run();
14215
15032
  log2.info("sync.completed", { runId, projectId, searchDataRows: rows.length, urlInspections: topPages.length, indexed: snapIndexed, notIndexed: snapNotIndexed });
14216
15033
  } catch (err) {
14217
15034
  const errorMsg = err instanceof Error ? err.message : String(err);
14218
- db.update(runs).set({ status: "failed", error: errorMsg, finishedAt: (/* @__PURE__ */ new Date()).toISOString() }).where(eq19(runs.id, runId)).run();
15035
+ db.update(runs).set({ status: "failed", error: errorMsg, finishedAt: (/* @__PURE__ */ new Date()).toISOString() }).where(eq20(runs.id, runId)).run();
14219
15036
  log2.error("sync.failed", { runId, projectId, error: errorMsg });
14220
15037
  throw err;
14221
15038
  }
14222
15039
  }
14223
15040
 
14224
15041
  // src/gsc-inspect-sitemap.ts
14225
- import crypto20 from "crypto";
14226
- import { eq as eq20, and as and9 } from "drizzle-orm";
15042
+ import crypto21 from "crypto";
15043
+ import { eq as eq21, and as and10 } from "drizzle-orm";
14227
15044
 
14228
15045
  // src/sitemap-parser.ts
14229
15046
  var LOC_REGEX = /<loc>\s*([^<]+?)\s*<\/loc>/gi;
@@ -14292,13 +15109,13 @@ async function parseSitemapRecursive(url, urls, depth) {
14292
15109
  var log3 = createLogger("InspectSitemap");
14293
15110
  async function executeInspectSitemap(db, runId, projectId, opts) {
14294
15111
  const now = (/* @__PURE__ */ new Date()).toISOString();
14295
- db.update(runs).set({ status: "running", startedAt: now }).where(eq20(runs.id, runId)).run();
15112
+ db.update(runs).set({ status: "running", startedAt: now }).where(eq21(runs.id, runId)).run();
14296
15113
  try {
14297
15114
  const { clientId: googleClientId, clientSecret: googleClientSecret } = getGoogleAuthConfig(opts.config);
14298
15115
  if (!googleClientId || !googleClientSecret) {
14299
15116
  throw new Error("Google OAuth is not configured in the local Canonry config");
14300
15117
  }
14301
- const project = db.select().from(projects).where(eq20(projects.id, projectId)).get();
15118
+ const project = db.select().from(projects).where(eq21(projects.id, projectId)).get();
14302
15119
  if (!project) {
14303
15120
  throw new Error(`Project not found: ${projectId}`);
14304
15121
  }
@@ -14339,7 +15156,7 @@ async function executeInspectSitemap(db, runId, projectId, opts) {
14339
15156
  const rich = ir.richResultsResult;
14340
15157
  const inspectedAt = (/* @__PURE__ */ new Date()).toISOString();
14341
15158
  db.insert(gscUrlInspections).values({
14342
- id: crypto20.randomUUID(),
15159
+ id: crypto21.randomUUID(),
14343
15160
  projectId,
14344
15161
  syncRunId: runId,
14345
15162
  url: pageUrl,
@@ -14366,7 +15183,7 @@ async function executeInspectSitemap(db, runId, projectId, opts) {
14366
15183
  await new Promise((r) => setTimeout(r, 1e3));
14367
15184
  }
14368
15185
  }
14369
- const allInspections = db.select().from(gscUrlInspections).where(eq20(gscUrlInspections.projectId, projectId)).all();
15186
+ const allInspections = db.select().from(gscUrlInspections).where(eq21(gscUrlInspections.projectId, projectId)).all();
14370
15187
  const latestByUrl = /* @__PURE__ */ new Map();
14371
15188
  for (const row of allInspections) {
14372
15189
  const existing = latestByUrl.get(row.url);
@@ -14387,9 +15204,9 @@ async function executeInspectSitemap(db, runId, projectId, opts) {
14387
15204
  }
14388
15205
  }
14389
15206
  const snapshotDate = (/* @__PURE__ */ new Date()).toISOString().split("T")[0];
14390
- db.delete(gscCoverageSnapshots).where(and9(eq20(gscCoverageSnapshots.projectId, projectId), eq20(gscCoverageSnapshots.date, snapshotDate))).run();
15207
+ db.delete(gscCoverageSnapshots).where(and10(eq21(gscCoverageSnapshots.projectId, projectId), eq21(gscCoverageSnapshots.date, snapshotDate))).run();
14391
15208
  db.insert(gscCoverageSnapshots).values({
14392
- id: crypto20.randomUUID(),
15209
+ id: crypto21.randomUUID(),
14393
15210
  projectId,
14394
15211
  syncRunId: runId,
14395
15212
  date: snapshotDate,
@@ -14399,16 +15216,304 @@ async function executeInspectSitemap(db, runId, projectId, opts) {
14399
15216
  createdAt: (/* @__PURE__ */ new Date()).toISOString()
14400
15217
  }).run();
14401
15218
  const status = errors > 0 && inspected > 0 ? "partial" : errors === urls.length ? "failed" : "completed";
14402
- db.update(runs).set({ status, finishedAt: (/* @__PURE__ */ new Date()).toISOString() }).where(eq20(runs.id, runId)).run();
15219
+ db.update(runs).set({ status, finishedAt: (/* @__PURE__ */ new Date()).toISOString() }).where(eq21(runs.id, runId)).run();
14403
15220
  log3.info("inspect.completed", { runId, projectId, inspected, errors, total: urls.length, indexed: snapIndexed, notIndexed: snapNotIndexed });
14404
15221
  } catch (err) {
14405
15222
  const errorMsg = err instanceof Error ? err.message : String(err);
14406
- db.update(runs).set({ status: "failed", error: errorMsg, finishedAt: (/* @__PURE__ */ new Date()).toISOString() }).where(eq20(runs.id, runId)).run();
15223
+ db.update(runs).set({ status: "failed", error: errorMsg, finishedAt: (/* @__PURE__ */ new Date()).toISOString() }).where(eq21(runs.id, runId)).run();
14407
15224
  log3.error("inspect.failed", { runId, projectId, error: errorMsg });
14408
15225
  throw err;
14409
15226
  }
14410
15227
  }
14411
15228
 
15229
+ // src/commoncrawl-sync.ts
15230
+ import crypto22 from "crypto";
15231
+ import path11 from "path";
15232
+ import { and as and11, eq as eq22, sql as sql8 } from "drizzle-orm";
15233
+ var log4 = createLogger("CommonCrawlSync");
15234
+ var INSERT_CHUNK_SIZE = 1e4;
15235
+ function defaultDeps() {
15236
+ return {
15237
+ downloadFile,
15238
+ queryBacklinks,
15239
+ loadDuckdb,
15240
+ now: () => /* @__PURE__ */ new Date(),
15241
+ cacheDir: CC_CACHE_DIR
15242
+ };
15243
+ }
15244
+ async function executeReleaseSync(db, syncId, opts) {
15245
+ const deps = { ...defaultDeps(), ...opts.deps };
15246
+ const release = opts.release;
15247
+ try {
15248
+ if (!isValidReleaseId(release)) {
15249
+ throw new Error(`Invalid release id: ${release}`);
15250
+ }
15251
+ const downloadStartedAt = deps.now().toISOString();
15252
+ db.update(ccReleaseSyncs).set({
15253
+ status: CcReleaseSyncStatuses.downloading,
15254
+ downloadStartedAt,
15255
+ phaseDetail: "downloading vertices + edges",
15256
+ updatedAt: downloadStartedAt,
15257
+ error: null
15258
+ }).where(eq22(ccReleaseSyncs.id, syncId)).run();
15259
+ const paths = ccReleasePaths(release);
15260
+ const releaseCacheDir = path11.join(deps.cacheDir, release);
15261
+ const vertexPath = path11.join(releaseCacheDir, paths.vertexFilename);
15262
+ const edgesPath = path11.join(releaseCacheDir, paths.edgesFilename);
15263
+ const [vertex, edges] = await Promise.all([
15264
+ deps.downloadFile({ url: paths.vertexUrl, destPath: vertexPath }),
15265
+ deps.downloadFile({ url: paths.edgesUrl, destPath: edgesPath })
15266
+ ]);
15267
+ const downloadFinishedAt = deps.now().toISOString();
15268
+ const queryStartedAt = downloadFinishedAt;
15269
+ db.update(ccReleaseSyncs).set({
15270
+ status: CcReleaseSyncStatuses.querying,
15271
+ downloadFinishedAt,
15272
+ queryStartedAt,
15273
+ phaseDetail: "querying backlinks",
15274
+ vertexPath,
15275
+ edgesPath,
15276
+ vertexBytes: vertex.bytes,
15277
+ edgesBytes: edges.bytes,
15278
+ vertexSha256: vertex.sha256,
15279
+ edgesSha256: edges.sha256,
15280
+ updatedAt: downloadFinishedAt
15281
+ }).where(eq22(ccReleaseSyncs.id, syncId)).run();
15282
+ const allProjects = db.select().from(projects).all();
15283
+ const targets = Array.from(new Set(allProjects.map((p) => p.canonicalDomain)));
15284
+ let rows = [];
15285
+ if (targets.length > 0) {
15286
+ const duckdb = deps.loadDuckdb();
15287
+ rows = await deps.queryBacklinks({ vertexPath, edgesPath, targets, duckdb });
15288
+ }
15289
+ const projectsByDomain = /* @__PURE__ */ new Map();
15290
+ for (const p of allProjects) {
15291
+ const ids = projectsByDomain.get(p.canonicalDomain) ?? [];
15292
+ ids.push(p.id);
15293
+ projectsByDomain.set(p.canonicalDomain, ids);
15294
+ }
15295
+ const queriedAt = deps.now().toISOString();
15296
+ db.transaction((tx) => {
15297
+ tx.delete(backlinkDomains).where(eq22(backlinkDomains.releaseSyncId, syncId)).run();
15298
+ tx.delete(backlinkSummaries).where(eq22(backlinkSummaries.releaseSyncId, syncId)).run();
15299
+ const expanded = [];
15300
+ for (const r of rows) {
15301
+ const projectIds = projectsByDomain.get(r.targetDomain);
15302
+ if (!projectIds) continue;
15303
+ for (const projectId of projectIds) {
15304
+ expanded.push({
15305
+ id: crypto22.randomUUID(),
15306
+ projectId,
15307
+ releaseSyncId: syncId,
15308
+ release,
15309
+ targetDomain: r.targetDomain,
15310
+ linkingDomain: r.linkingDomain,
15311
+ numHosts: r.numHosts,
15312
+ createdAt: queriedAt
15313
+ });
15314
+ }
15315
+ }
15316
+ for (let i = 0; i < expanded.length; i += INSERT_CHUNK_SIZE) {
15317
+ const chunk = expanded.slice(i, i + INSERT_CHUNK_SIZE);
15318
+ if (chunk.length > 0) tx.insert(backlinkDomains).values(chunk).run();
15319
+ }
15320
+ const rowsByProject = groupByProject(rows, projectsByDomain);
15321
+ for (const p of allProjects) {
15322
+ const projectRows = rowsByProject.get(p.id) ?? [];
15323
+ const summary = computeSummary(projectRows);
15324
+ tx.insert(backlinkSummaries).values({
15325
+ id: crypto22.randomUUID(),
15326
+ projectId: p.id,
15327
+ releaseSyncId: syncId,
15328
+ release,
15329
+ targetDomain: p.canonicalDomain,
15330
+ totalLinkingDomains: summary.totalLinkingDomains,
15331
+ totalHosts: summary.totalHosts,
15332
+ top10HostsShare: summary.top10HostsShare,
15333
+ queriedAt,
15334
+ createdAt: queriedAt
15335
+ }).onConflictDoUpdate({
15336
+ target: [backlinkSummaries.projectId, backlinkSummaries.release],
15337
+ set: {
15338
+ releaseSyncId: syncId,
15339
+ targetDomain: p.canonicalDomain,
15340
+ totalLinkingDomains: summary.totalLinkingDomains,
15341
+ totalHosts: summary.totalHosts,
15342
+ top10HostsShare: summary.top10HostsShare,
15343
+ queriedAt
15344
+ }
15345
+ }).run();
15346
+ }
15347
+ });
15348
+ const finishedAt = deps.now().toISOString();
15349
+ db.update(ccReleaseSyncs).set({
15350
+ status: CcReleaseSyncStatuses.ready,
15351
+ queryFinishedAt: finishedAt,
15352
+ phaseDetail: null,
15353
+ projectsProcessed: allProjects.length,
15354
+ domainsDiscovered: rows.length,
15355
+ updatedAt: finishedAt,
15356
+ error: null
15357
+ }).where(eq22(ccReleaseSyncs.id, syncId)).run();
15358
+ log4.info("sync.completed", {
15359
+ syncId,
15360
+ release,
15361
+ projectsProcessed: allProjects.length,
15362
+ domainsDiscovered: rows.length
15363
+ });
15364
+ } catch (err) {
15365
+ const errorMsg = err instanceof Error ? err.message : String(err);
15366
+ const finishedAt = deps.now().toISOString();
15367
+ db.update(ccReleaseSyncs).set({
15368
+ status: CcReleaseSyncStatuses.failed,
15369
+ error: errorMsg,
15370
+ phaseDetail: null,
15371
+ updatedAt: finishedAt
15372
+ }).where(eq22(ccReleaseSyncs.id, syncId)).run();
15373
+ log4.error("sync.failed", { syncId, release, error: errorMsg });
15374
+ throw err;
15375
+ }
15376
+ }
15377
+ function groupByProject(rows, projectsByDomain) {
15378
+ const out = /* @__PURE__ */ new Map();
15379
+ for (const row of rows) {
15380
+ const projectIds = projectsByDomain.get(row.targetDomain);
15381
+ if (!projectIds) continue;
15382
+ for (const projectId of projectIds) {
15383
+ const bucket = out.get(projectId) ?? [];
15384
+ bucket.push(row);
15385
+ out.set(projectId, bucket);
15386
+ }
15387
+ }
15388
+ return out;
15389
+ }
15390
+ function computeSummary(rows) {
15391
+ if (rows.length === 0) {
15392
+ return { totalLinkingDomains: 0, totalHosts: 0, top10HostsShare: "0" };
15393
+ }
15394
+ const sorted = [...rows].sort((a, b) => b.numHosts - a.numHosts);
15395
+ const totalHosts = sorted.reduce((acc, r) => acc + r.numHosts, 0);
15396
+ const top10Hosts = sorted.slice(0, 10).reduce((acc, r) => acc + r.numHosts, 0);
15397
+ const share = totalHosts > 0 ? top10Hosts / totalHosts : 0;
15398
+ return {
15399
+ totalLinkingDomains: rows.length,
15400
+ totalHosts,
15401
+ top10HostsShare: share.toFixed(6)
15402
+ };
15403
+ }
15404
+
15405
+ // src/backlink-extract.ts
15406
+ import crypto23 from "crypto";
15407
+ import { and as and12, desc as desc9, eq as eq23 } from "drizzle-orm";
15408
+ var log5 = createLogger("BacklinkExtract");
15409
+ function defaultDeps2() {
15410
+ return {
15411
+ queryBacklinks,
15412
+ loadDuckdb,
15413
+ now: () => /* @__PURE__ */ new Date()
15414
+ };
15415
+ }
15416
+ async function executeBacklinkExtract(db, runId, projectId, opts = {}) {
15417
+ const deps = { ...defaultDeps2(), ...opts.deps };
15418
+ const startedAt = deps.now().toISOString();
15419
+ db.update(runs).set({ status: RunStatuses.running, startedAt }).where(eq23(runs.id, runId)).run();
15420
+ try {
15421
+ const project = db.select().from(projects).where(eq23(projects.id, projectId)).get();
15422
+ if (!project) {
15423
+ throw new Error(`Project not found: ${projectId}`);
15424
+ }
15425
+ const sync = opts.release ? db.select().from(ccReleaseSyncs).where(eq23(ccReleaseSyncs.release, opts.release)).get() : db.select().from(ccReleaseSyncs).where(eq23(ccReleaseSyncs.status, CcReleaseSyncStatuses.ready)).orderBy(desc9(ccReleaseSyncs.createdAt)).limit(1).get();
15426
+ if (!sync) {
15427
+ throw new Error("No ready release sync available \u2014 run `canonry backlinks sync` first");
15428
+ }
15429
+ if (sync.status !== CcReleaseSyncStatuses.ready) {
15430
+ throw new Error(`Release ${sync.release} is not ready (status=${sync.status})`);
15431
+ }
15432
+ if (!sync.vertexPath || !sync.edgesPath) {
15433
+ throw new Error(`Release ${sync.release} is missing cached file paths`);
15434
+ }
15435
+ const duckdb = deps.loadDuckdb();
15436
+ const rows = await deps.queryBacklinks({
15437
+ vertexPath: sync.vertexPath,
15438
+ edgesPath: sync.edgesPath,
15439
+ targets: [project.canonicalDomain],
15440
+ duckdb
15441
+ });
15442
+ const queriedAt = deps.now().toISOString();
15443
+ const syncId = sync.id;
15444
+ const release = sync.release;
15445
+ const targetDomain = project.canonicalDomain;
15446
+ db.transaction((tx) => {
15447
+ tx.delete(backlinkDomains).where(
15448
+ and12(eq23(backlinkDomains.projectId, projectId), eq23(backlinkDomains.release, release))
15449
+ ).run();
15450
+ if (rows.length > 0) {
15451
+ const values = rows.map((r) => ({
15452
+ id: crypto23.randomUUID(),
15453
+ projectId,
15454
+ releaseSyncId: syncId,
15455
+ release,
15456
+ targetDomain,
15457
+ linkingDomain: r.linkingDomain,
15458
+ numHosts: r.numHosts,
15459
+ createdAt: queriedAt
15460
+ }));
15461
+ tx.insert(backlinkDomains).values(values).run();
15462
+ }
15463
+ const summary = computeSummary2(rows);
15464
+ tx.insert(backlinkSummaries).values({
15465
+ id: crypto23.randomUUID(),
15466
+ projectId,
15467
+ releaseSyncId: syncId,
15468
+ release,
15469
+ targetDomain,
15470
+ totalLinkingDomains: summary.totalLinkingDomains,
15471
+ totalHosts: summary.totalHosts,
15472
+ top10HostsShare: summary.top10HostsShare,
15473
+ queriedAt,
15474
+ createdAt: queriedAt
15475
+ }).onConflictDoUpdate({
15476
+ target: [backlinkSummaries.projectId, backlinkSummaries.release],
15477
+ set: {
15478
+ releaseSyncId: syncId,
15479
+ targetDomain,
15480
+ totalLinkingDomains: summary.totalLinkingDomains,
15481
+ totalHosts: summary.totalHosts,
15482
+ top10HostsShare: summary.top10HostsShare,
15483
+ queriedAt
15484
+ }
15485
+ }).run();
15486
+ });
15487
+ const finishedAt = deps.now().toISOString();
15488
+ db.update(runs).set({ status: RunStatuses.completed, finishedAt }).where(eq23(runs.id, runId)).run();
15489
+ log5.info("extract.completed", { runId, projectId, release, rows: rows.length });
15490
+ } catch (err) {
15491
+ const errorMsg = err instanceof Error ? err.message : String(err);
15492
+ const finishedAt = deps.now().toISOString();
15493
+ db.update(runs).set({
15494
+ status: RunStatuses.failed,
15495
+ error: errorMsg,
15496
+ finishedAt
15497
+ }).where(eq23(runs.id, runId)).run();
15498
+ log5.error("extract.failed", { runId, projectId, error: errorMsg });
15499
+ throw err;
15500
+ }
15501
+ }
15502
+ function computeSummary2(rows) {
15503
+ if (rows.length === 0) {
15504
+ return { totalLinkingDomains: 0, totalHosts: 0, top10HostsShare: "0" };
15505
+ }
15506
+ const sorted = [...rows].sort((a, b) => b.numHosts - a.numHosts);
15507
+ const totalHosts = sorted.reduce((acc, r) => acc + r.numHosts, 0);
15508
+ const top10Hosts = sorted.slice(0, 10).reduce((acc, r) => acc + r.numHosts, 0);
15509
+ const share = totalHosts > 0 ? top10Hosts / totalHosts : 0;
15510
+ return {
15511
+ totalLinkingDomains: rows.length,
15512
+ totalHosts,
15513
+ top10HostsShare: share.toFixed(6)
15514
+ };
15515
+ }
15516
+
14412
15517
  // src/provider-registry.ts
14413
15518
  var ProviderRegistry = class {
14414
15519
  providers = /* @__PURE__ */ new Map();
@@ -14462,8 +15567,8 @@ var ProviderRegistry = class {
14462
15567
 
14463
15568
  // src/scheduler.ts
14464
15569
  import cron from "node-cron";
14465
- import { eq as eq21 } from "drizzle-orm";
14466
- var log4 = createLogger("Scheduler");
15570
+ import { eq as eq24 } from "drizzle-orm";
15571
+ var log6 = createLogger("Scheduler");
14467
15572
  var Scheduler = class {
14468
15573
  db;
14469
15574
  callbacks;
@@ -14474,16 +15579,16 @@ var Scheduler = class {
14474
15579
  }
14475
15580
  /** Load all enabled schedules from DB and register cron jobs. */
14476
15581
  start() {
14477
- const allSchedules = this.db.select().from(schedules).where(eq21(schedules.enabled, 1)).all();
15582
+ const allSchedules = this.db.select().from(schedules).where(eq24(schedules.enabled, 1)).all();
14478
15583
  for (const schedule of allSchedules) {
14479
15584
  const missedRunAt = schedule.nextRunAt;
14480
15585
  this.registerCronTask(schedule);
14481
15586
  if (missedRunAt && new Date(missedRunAt) < /* @__PURE__ */ new Date()) {
14482
- log4.info("run.catch-up", { projectId: schedule.projectId, missedRunAt });
15587
+ log6.info("run.catch-up", { projectId: schedule.projectId, missedRunAt });
14483
15588
  this.triggerRun(schedule.id, schedule.projectId);
14484
15589
  }
14485
15590
  }
14486
- log4.info("started", { scheduleCount: allSchedules.length });
15591
+ log6.info("started", { scheduleCount: allSchedules.length });
14487
15592
  }
14488
15593
  /** Stop all cron tasks for graceful shutdown. */
14489
15594
  stop() {
@@ -14499,7 +15604,7 @@ var Scheduler = class {
14499
15604
  this.stopTask(projectId, existing, "Stopped");
14500
15605
  this.tasks.delete(projectId);
14501
15606
  }
14502
- const schedule = this.db.select().from(schedules).where(eq21(schedules.projectId, projectId)).get();
15607
+ const schedule = this.db.select().from(schedules).where(eq24(schedules.projectId, projectId)).get();
14503
15608
  if (schedule && schedule.enabled === 1) {
14504
15609
  this.registerCronTask(schedule);
14505
15610
  }
@@ -14515,12 +15620,12 @@ var Scheduler = class {
14515
15620
  stopTask(projectId, task, verb) {
14516
15621
  task.stop();
14517
15622
  task.destroy();
14518
- log4.info(`task.${verb.toLowerCase()}`, { projectId });
15623
+ log6.info(`task.${verb.toLowerCase()}`, { projectId });
14519
15624
  }
14520
15625
  registerCronTask(schedule) {
14521
15626
  const { id: scheduleId, projectId, cronExpr, timezone } = schedule;
14522
15627
  if (!cron.validate(cronExpr)) {
14523
- log4.error("cron.invalid", { projectId, cronExpr });
15628
+ log6.error("cron.invalid", { projectId, cronExpr });
14524
15629
  return;
14525
15630
  }
14526
15631
  const task = cron.schedule(cronExpr, () => {
@@ -14532,24 +15637,24 @@ var Scheduler = class {
14532
15637
  this.db.update(schedules).set({
14533
15638
  nextRunAt: task.getNextRun()?.toISOString() ?? null,
14534
15639
  updatedAt: (/* @__PURE__ */ new Date()).toISOString()
14535
- }).where(eq21(schedules.id, scheduleId)).run();
15640
+ }).where(eq24(schedules.id, scheduleId)).run();
14536
15641
  const label = schedule.preset ?? cronExpr;
14537
- log4.info("cron.registered", { projectId, schedule: label, timezone });
15642
+ log6.info("cron.registered", { projectId, schedule: label, timezone });
14538
15643
  }
14539
15644
  triggerRun(scheduleId, projectId) {
14540
15645
  try {
14541
15646
  const now = (/* @__PURE__ */ new Date()).toISOString();
14542
- const currentSchedule = this.db.select().from(schedules).where(eq21(schedules.id, scheduleId)).get();
15647
+ const currentSchedule = this.db.select().from(schedules).where(eq24(schedules.id, scheduleId)).get();
14543
15648
  if (!currentSchedule || currentSchedule.enabled !== 1) {
14544
- log4.warn("schedule.stale", { scheduleId, projectId, msg: "schedule no longer exists or is disabled" });
15649
+ log6.warn("schedule.stale", { scheduleId, projectId, msg: "schedule no longer exists or is disabled" });
14545
15650
  this.remove(projectId);
14546
15651
  return;
14547
15652
  }
14548
15653
  const task = this.tasks.get(projectId);
14549
15654
  const nextRunAt = task?.getNextRun()?.toISOString() ?? null;
14550
- const project = this.db.select().from(projects).where(eq21(projects.id, projectId)).get();
15655
+ const project = this.db.select().from(projects).where(eq24(projects.id, projectId)).get();
14551
15656
  if (!project) {
14552
- log4.error("project.not-found", { projectId, msg: "skipping scheduled run" });
15657
+ log6.error("project.not-found", { projectId, msg: "skipping scheduled run" });
14553
15658
  this.remove(projectId);
14554
15659
  return;
14555
15660
  }
@@ -14558,7 +15663,7 @@ var Scheduler = class {
14558
15663
  if (project.defaultLocation) {
14559
15664
  const loc = projectLocations.find((l) => l.label === project.defaultLocation);
14560
15665
  if (!loc) {
14561
- log4.warn("default-location.stale", { scheduleId, projectId, label: project.defaultLocation });
15666
+ log6.warn("default-location.stale", { scheduleId, projectId, label: project.defaultLocation });
14562
15667
  return;
14563
15668
  }
14564
15669
  resolvedLocation = loc;
@@ -14572,11 +15677,11 @@ var Scheduler = class {
14572
15677
  location: locationLabel
14573
15678
  });
14574
15679
  if (queueResult.conflict) {
14575
- log4.info("run.skipped-active", { projectName: project.name, activeRunId: queueResult.activeRunId });
15680
+ log6.info("run.skipped-active", { projectName: project.name, activeRunId: queueResult.activeRunId });
14576
15681
  this.db.update(schedules).set({
14577
15682
  nextRunAt,
14578
15683
  updatedAt: now
14579
- }).where(eq21(schedules.id, currentSchedule.id)).run();
15684
+ }).where(eq24(schedules.id, currentSchedule.id)).run();
14580
15685
  return;
14581
15686
  }
14582
15687
  const runId = queueResult.runId;
@@ -14584,21 +15689,21 @@ var Scheduler = class {
14584
15689
  lastRunAt: now,
14585
15690
  nextRunAt,
14586
15691
  updatedAt: now
14587
- }).where(eq21(schedules.id, currentSchedule.id)).run();
15692
+ }).where(eq24(schedules.id, currentSchedule.id)).run();
14588
15693
  const scheduleProviders = parseJsonColumn(currentSchedule.providers, []);
14589
15694
  const providers = scheduleProviders.length > 0 ? scheduleProviders : void 0;
14590
- log4.info("run.triggered", { runId, projectName: project.name, providers: providers ?? "all" });
15695
+ log6.info("run.triggered", { runId, projectName: project.name, providers: providers ?? "all" });
14591
15696
  this.callbacks.onRunCreated(runId, projectId, providers, resolvedLocation);
14592
15697
  } catch (err) {
14593
- log4.error("trigger.error", { scheduleId, projectId, error: err instanceof Error ? err.message : String(err) });
15698
+ log6.error("trigger.error", { scheduleId, projectId, error: err instanceof Error ? err.message : String(err) });
14594
15699
  }
14595
15700
  }
14596
15701
  };
14597
15702
 
14598
15703
  // src/notifier.ts
14599
- import { eq as eq22, desc as desc8, and as and10, or as or2 } from "drizzle-orm";
14600
- import crypto21 from "crypto";
14601
- var log5 = createLogger("Notifier");
15704
+ import { eq as eq25, desc as desc10, and as and13, or as or2 } from "drizzle-orm";
15705
+ import crypto24 from "crypto";
15706
+ var log7 = createLogger("Notifier");
14602
15707
  var Notifier = class {
14603
15708
  db;
14604
15709
  serverUrl;
@@ -14608,26 +15713,26 @@ var Notifier = class {
14608
15713
  }
14609
15714
  /** Called after a run completes (success, partial, or failed). */
14610
15715
  async onRunCompleted(runId, projectId) {
14611
- log5.info("run.completed", { runId, projectId });
14612
- const notifs = this.db.select().from(notifications).where(eq22(notifications.projectId, projectId)).all().filter((n) => n.enabled === 1);
15716
+ log7.info("run.completed", { runId, projectId });
15717
+ const notifs = this.db.select().from(notifications).where(eq25(notifications.projectId, projectId)).all().filter((n) => n.enabled === 1);
14613
15718
  if (notifs.length === 0) {
14614
- log5.info("notifications.none-enabled", { projectId });
15719
+ log7.info("notifications.none-enabled", { projectId });
14615
15720
  return;
14616
15721
  }
14617
- log5.info("notifications.found", { projectId, count: notifs.length });
14618
- const run = this.db.select().from(runs).where(eq22(runs.id, runId)).get();
15722
+ log7.info("notifications.found", { projectId, count: notifs.length });
15723
+ const run = this.db.select().from(runs).where(eq25(runs.id, runId)).get();
14619
15724
  if (!run) {
14620
- log5.error("run.not-found", { runId, msg: "skipping notification dispatch" });
15725
+ log7.error("run.not-found", { runId, msg: "skipping notification dispatch" });
14621
15726
  return;
14622
15727
  }
14623
- const project = this.db.select().from(projects).where(eq22(projects.id, projectId)).get();
15728
+ const project = this.db.select().from(projects).where(eq25(projects.id, projectId)).get();
14624
15729
  if (!project) {
14625
- log5.error("project.not-found", { projectId, msg: "skipping notification dispatch" });
15730
+ log7.error("project.not-found", { projectId, msg: "skipping notification dispatch" });
14626
15731
  return;
14627
15732
  }
14628
15733
  const transitions = this.computeTransitions(runId, projectId);
14629
15734
  const events = [];
14630
- log5.info("run.status", { runId: run.id, status: run.status, projectId });
15735
+ log7.info("run.status", { runId: run.id, status: run.status, projectId });
14631
15736
  if (run.status === "completed" || run.status === "partial") {
14632
15737
  events.push("run.completed");
14633
15738
  }
@@ -14643,7 +15748,7 @@ var Notifier = class {
14643
15748
  if (!config.url) continue;
14644
15749
  const subscribedEvents = config.events;
14645
15750
  const matchingEvents = events.filter((e) => subscribedEvents.includes(e));
14646
- log5.info("notification.match", { notificationId: notif.id, subscribedEvents, matchedEvents: matchingEvents });
15751
+ log7.info("notification.match", { notificationId: notif.id, subscribedEvents, matchedEvents: matchingEvents });
14647
15752
  if (matchingEvents.length === 0) continue;
14648
15753
  for (const event of matchingEvents) {
14649
15754
  const relevantTransitions = event === "citation.lost" ? lostTransitions : event === "citation.gained" ? gainedTransitions : transitions;
@@ -14667,11 +15772,11 @@ var Notifier = class {
14667
15772
  if (criticalInsights.length > 0) insightEvents.push("insight.critical");
14668
15773
  if (highInsights.length > 0) insightEvents.push("insight.high");
14669
15774
  if (insightEvents.length === 0) return;
14670
- const notifs = this.db.select().from(notifications).where(eq22(notifications.projectId, projectId)).all().filter((n) => n.enabled === 1);
15775
+ const notifs = this.db.select().from(notifications).where(eq25(notifications.projectId, projectId)).all().filter((n) => n.enabled === 1);
14671
15776
  if (notifs.length === 0) return;
14672
- const run = this.db.select().from(runs).where(eq22(runs.id, runId)).get();
15777
+ const run = this.db.select().from(runs).where(eq25(runs.id, runId)).get();
14673
15778
  if (!run) return;
14674
- const project = this.db.select().from(projects).where(eq22(projects.id, projectId)).get();
15779
+ const project = this.db.select().from(projects).where(eq25(projects.id, projectId)).get();
14675
15780
  if (!project) return;
14676
15781
  for (const notif of notifs) {
14677
15782
  const config = parseJsonColumn(notif.config, { url: "", events: [] });
@@ -14702,11 +15807,11 @@ var Notifier = class {
14702
15807
  }
14703
15808
  computeTransitions(runId, projectId) {
14704
15809
  const recentRuns = this.db.select().from(runs).where(
14705
- and10(
14706
- eq22(runs.projectId, projectId),
14707
- or2(eq22(runs.status, "completed"), eq22(runs.status, "partial"))
15810
+ and13(
15811
+ eq25(runs.projectId, projectId),
15812
+ or2(eq25(runs.status, "completed"), eq25(runs.status, "partial"))
14708
15813
  )
14709
- ).orderBy(desc8(runs.createdAt)).limit(2).all();
15814
+ ).orderBy(desc10(runs.createdAt)).limit(2).all();
14710
15815
  if (recentRuns.length < 2) return [];
14711
15816
  const currentRunId = recentRuns[0].id;
14712
15817
  const previousRunId = recentRuns[1].id;
@@ -14716,12 +15821,12 @@ var Notifier = class {
14716
15821
  keyword: keywords.keyword,
14717
15822
  provider: querySnapshots.provider,
14718
15823
  citationState: querySnapshots.citationState
14719
- }).from(querySnapshots).leftJoin(keywords, eq22(querySnapshots.keywordId, keywords.id)).where(eq22(querySnapshots.runId, currentRunId)).all();
15824
+ }).from(querySnapshots).leftJoin(keywords, eq25(querySnapshots.keywordId, keywords.id)).where(eq25(querySnapshots.runId, currentRunId)).all();
14720
15825
  const previousSnapshots = this.db.select({
14721
15826
  keywordId: querySnapshots.keywordId,
14722
15827
  provider: querySnapshots.provider,
14723
15828
  citationState: querySnapshots.citationState
14724
- }).from(querySnapshots).where(eq22(querySnapshots.runId, previousRunId)).all();
15829
+ }).from(querySnapshots).where(eq25(querySnapshots.runId, previousRunId)).all();
14725
15830
  const prevMap = /* @__PURE__ */ new Map();
14726
15831
  for (const s of previousSnapshots) {
14727
15832
  prevMap.set(`${s.keywordId}:${s.provider}`, s.citationState);
@@ -14745,23 +15850,23 @@ var Notifier = class {
14745
15850
  const targetLabel = redactNotificationUrl(url).urlDisplay;
14746
15851
  const targetCheck = await resolveWebhookTarget(url);
14747
15852
  if (!targetCheck.ok) {
14748
- log5.error("webhook.ssrf-blocked", { url: targetLabel, reason: targetCheck.message });
15853
+ log7.error("webhook.ssrf-blocked", { url: targetLabel, reason: targetCheck.message });
14749
15854
  this.logDelivery(projectId, notificationId, payload.event, "failed", `SSRF: ${targetCheck.message}`);
14750
15855
  return;
14751
15856
  }
14752
- log5.info("webhook.send", { event: payload.event, url: targetLabel });
15857
+ log7.info("webhook.send", { event: payload.event, url: targetLabel });
14753
15858
  const maxRetries = 3;
14754
15859
  const delays = [1e3, 4e3, 16e3];
14755
15860
  for (let attempt = 0; attempt < maxRetries; attempt++) {
14756
15861
  try {
14757
15862
  const response = await deliverWebhook(targetCheck.target, payload, webhookSecret);
14758
15863
  if (response.status >= 200 && response.status < 300) {
14759
- log5.info("webhook.delivered", { event: payload.event, url: targetLabel, httpStatus: response.status });
15864
+ log7.info("webhook.delivered", { event: payload.event, url: targetLabel, httpStatus: response.status });
14760
15865
  this.logDelivery(projectId, notificationId, payload.event, "sent", null);
14761
15866
  return;
14762
15867
  }
14763
15868
  const errorDetail = response.error ?? `HTTP ${response.status}`;
14764
- log5.warn("webhook.attempt-failed", { event: payload.event, url: targetLabel, attempt: attempt + 1, maxRetries, httpStatus: response.status, error: errorDetail });
15869
+ log7.warn("webhook.attempt-failed", { event: payload.event, url: targetLabel, attempt: attempt + 1, maxRetries, httpStatus: response.status, error: errorDetail });
14765
15870
  if (attempt === maxRetries - 1) {
14766
15871
  this.logDelivery(projectId, notificationId, payload.event, "failed", errorDetail);
14767
15872
  }
@@ -14769,7 +15874,7 @@ var Notifier = class {
14769
15874
  const errorDetail = err instanceof Error ? err.message : String(err);
14770
15875
  if (attempt === maxRetries - 1) {
14771
15876
  this.logDelivery(projectId, notificationId, payload.event, "failed", errorDetail);
14772
- log5.error("webhook.exhausted", { event: payload.event, url: targetLabel, maxRetries, error: errorDetail });
15877
+ log7.error("webhook.exhausted", { event: payload.event, url: targetLabel, maxRetries, error: errorDetail });
14773
15878
  }
14774
15879
  }
14775
15880
  if (attempt < maxRetries - 1) {
@@ -14779,7 +15884,7 @@ var Notifier = class {
14779
15884
  }
14780
15885
  logDelivery(projectId, notificationId, event, status, error) {
14781
15886
  this.db.insert(auditLog).values({
14782
- id: crypto21.randomUUID(),
15887
+ id: crypto24.randomUUID(),
14783
15888
  projectId,
14784
15889
  actor: "scheduler",
14785
15890
  action: `notification.${status}`,
@@ -14792,7 +15897,7 @@ var Notifier = class {
14792
15897
  };
14793
15898
 
14794
15899
  // src/run-coordinator.ts
14795
- var log6 = createLogger("RunCoordinator");
15900
+ var log8 = createLogger("RunCoordinator");
14796
15901
  var RunCoordinator = class {
14797
15902
  constructor(notifier, intelligenceService, onInsightsGenerated, onAeroEvent) {
14798
15903
  this.notifier = notifier;
@@ -14814,35 +15919,35 @@ var RunCoordinator = class {
14814
15919
  try {
14815
15920
  await this.onInsightsGenerated(runId, projectId, result);
14816
15921
  } catch (err) {
14817
- log6.error("insight-webhook.failed", { runId, error: err instanceof Error ? err.message : String(err) });
15922
+ log8.error("insight-webhook.failed", { runId, error: err instanceof Error ? err.message : String(err) });
14818
15923
  }
14819
15924
  }
14820
15925
  }
14821
15926
  } catch (err) {
14822
- log6.error("intelligence.failed", { runId, error: err instanceof Error ? err.message : String(err) });
15927
+ log8.error("intelligence.failed", { runId, error: err instanceof Error ? err.message : String(err) });
14823
15928
  }
14824
15929
  try {
14825
15930
  await this.notifier.onRunCompleted(runId, projectId);
14826
15931
  } catch (err) {
14827
- log6.error("notifier.failed", { runId, error: err instanceof Error ? err.message : String(err) });
15932
+ log8.error("notifier.failed", { runId, error: err instanceof Error ? err.message : String(err) });
14828
15933
  }
14829
15934
  if (this.onAeroEvent) {
14830
15935
  try {
14831
15936
  await this.onAeroEvent({ runId, projectId, insightCount, criticalOrHigh });
14832
15937
  } catch (err) {
14833
- log6.error("aero.failed", { runId, error: err instanceof Error ? err.message : String(err) });
15938
+ log8.error("aero.failed", { runId, error: err instanceof Error ? err.message : String(err) });
14834
15939
  }
14835
15940
  }
14836
15941
  }
14837
15942
  };
14838
15943
 
14839
15944
  // src/agent/session-registry.ts
14840
- import crypto23 from "crypto";
14841
- import { eq as eq24 } from "drizzle-orm";
15945
+ import crypto26 from "crypto";
15946
+ import { eq as eq27 } from "drizzle-orm";
14842
15947
 
14843
15948
  // src/agent/session.ts
14844
- import fs7 from "fs";
14845
- import path8 from "path";
15949
+ import fs11 from "fs";
15950
+ import path14 from "path";
14846
15951
  import { Agent } from "@mariozechner/pi-agent-core";
14847
15952
  import { registerBuiltInApiProviders } from "@mariozechner/pi-ai";
14848
15953
 
@@ -14943,26 +16048,26 @@ function buildAgentProvidersResponse(config) {
14943
16048
  }
14944
16049
 
14945
16050
  // src/agent/skill-paths.ts
14946
- import fs5 from "fs";
14947
- import path6 from "path";
16051
+ import fs9 from "fs";
16052
+ import path12 from "path";
14948
16053
  import { fileURLToPath } from "url";
14949
16054
  function resolveAeroSkillDir(pkgDir) {
14950
- const here = pkgDir ?? path6.dirname(fileURLToPath(import.meta.url));
16055
+ const here = pkgDir ?? path12.dirname(fileURLToPath(import.meta.url));
14951
16056
  const candidates = [
14952
- path6.join(here, "../assets/agent-workspace/skills/aero"),
14953
- path6.join(here, "../../assets/agent-workspace/skills/aero"),
14954
- path6.join(here, "../../../../skills/aero")
16057
+ path12.join(here, "../assets/agent-workspace/skills/aero"),
16058
+ path12.join(here, "../../assets/agent-workspace/skills/aero"),
16059
+ path12.join(here, "../../../../skills/aero")
14955
16060
  ];
14956
16061
  for (const candidate of candidates) {
14957
- if (fs5.existsSync(path6.join(candidate, "SKILL.md"))) return candidate;
16062
+ if (fs9.existsSync(path12.join(candidate, "SKILL.md"))) return candidate;
14958
16063
  }
14959
16064
  throw new Error(`Aero skill not found. Searched:
14960
16065
  ${candidates.join("\n ")}`);
14961
16066
  }
14962
16067
 
14963
16068
  // src/agent/skill-tools.ts
14964
- import fs6 from "fs";
14965
- import path7 from "path";
16069
+ import fs10 from "fs";
16070
+ import path13 from "path";
14966
16071
  import { Type } from "@sinclair/typebox";
14967
16072
  var MAX_DOC_CHARS = 2e4;
14968
16073
  function textResult(details) {
@@ -14983,13 +16088,13 @@ function parseDescription(body) {
14983
16088
  return "(no description)";
14984
16089
  }
14985
16090
  function scanSkillDocs(skillDir) {
14986
- const refsDir = path7.join(skillDir ?? resolveAeroSkillDir(), "references");
14987
- if (!fs6.existsSync(refsDir)) return [];
16091
+ const refsDir = path13.join(skillDir ?? resolveAeroSkillDir(), "references");
16092
+ if (!fs10.existsSync(refsDir)) return [];
14988
16093
  const entries = [];
14989
- for (const file of fs6.readdirSync(refsDir)) {
16094
+ for (const file of fs10.readdirSync(refsDir)) {
14990
16095
  if (!file.endsWith(".md")) continue;
14991
- const filePath = path7.join(refsDir, file);
14992
- const body = fs6.readFileSync(filePath, "utf-8");
16096
+ const filePath = path13.join(refsDir, file);
16097
+ const body = fs10.readFileSync(filePath, "utf-8");
14993
16098
  entries.push({
14994
16099
  slug: file.replace(/\.md$/, ""),
14995
16100
  description: parseDescription(body),
@@ -15032,8 +16137,8 @@ function buildReadSkillDocTool() {
15032
16137
  availableSlugs: docs.map((d) => d.slug)
15033
16138
  });
15034
16139
  }
15035
- const filePath = path7.join(skillDir, "references", `${match.slug}.md`);
15036
- const content = fs6.readFileSync(filePath, "utf-8");
16140
+ const filePath = path13.join(skillDir, "references", `${match.slug}.md`);
16141
+ const content = fs10.readFileSync(filePath, "utf-8");
15037
16142
  if (content.length > MAX_DOC_CHARS) {
15038
16143
  return textResult({
15039
16144
  slug: match.slug,
@@ -15057,8 +16162,8 @@ function buildSkillDocTools() {
15057
16162
  import { Type as Type2 } from "@sinclair/typebox";
15058
16163
 
15059
16164
  // src/agent/memory-store.ts
15060
- import crypto22 from "crypto";
15061
- import { and as and11, desc as desc9, eq as eq23, like, sql as sql7 } from "drizzle-orm";
16165
+ import crypto25 from "crypto";
16166
+ import { and as and14, desc as desc11, eq as eq26, like, sql as sql9 } from "drizzle-orm";
15062
16167
  var COMPACTION_KEY_PREFIX = "compaction:";
15063
16168
  var COMPACTION_NOTES_PER_SESSION = 3;
15064
16169
  function rowToDto(row) {
@@ -15072,7 +16177,7 @@ function rowToDto(row) {
15072
16177
  };
15073
16178
  }
15074
16179
  function listMemoryEntries(db, projectId, opts = {}) {
15075
- const query = db.select().from(agentMemory).where(eq23(agentMemory.projectId, projectId)).orderBy(desc9(agentMemory.updatedAt));
16180
+ const query = db.select().from(agentMemory).where(eq26(agentMemory.projectId, projectId)).orderBy(desc11(agentMemory.updatedAt));
15076
16181
  const rows = opts.limit === void 0 ? query.all() : query.limit(opts.limit).all();
15077
16182
  return rows.map(rowToDto);
15078
16183
  }
@@ -15086,7 +16191,7 @@ function upsertMemoryEntry(db, args) {
15086
16191
  throw new Error(`memory key prefix "${COMPACTION_KEY_PREFIX}" is reserved for compaction notes`);
15087
16192
  }
15088
16193
  const now = (/* @__PURE__ */ new Date()).toISOString();
15089
- const id = crypto22.randomUUID();
16194
+ const id = crypto25.randomUUID();
15090
16195
  db.insert(agentMemory).values({
15091
16196
  id,
15092
16197
  projectId: args.projectId,
@@ -15103,12 +16208,12 @@ function upsertMemoryEntry(db, args) {
15103
16208
  updatedAt: now
15104
16209
  }
15105
16210
  }).run();
15106
- const row = db.select().from(agentMemory).where(and11(eq23(agentMemory.projectId, args.projectId), eq23(agentMemory.key, args.key))).get();
16211
+ const row = db.select().from(agentMemory).where(and14(eq26(agentMemory.projectId, args.projectId), eq26(agentMemory.key, args.key))).get();
15107
16212
  if (!row) throw new Error("memory upsert produced no row");
15108
16213
  return rowToDto(row);
15109
16214
  }
15110
16215
  function deleteMemoryEntry(db, projectId, key) {
15111
- const result = db.delete(agentMemory).where(and11(eq23(agentMemory.projectId, projectId), eq23(agentMemory.key, key))).run();
16216
+ const result = db.delete(agentMemory).where(and14(eq26(agentMemory.projectId, projectId), eq26(agentMemory.key, key))).run();
15112
16217
  const changes = result.changes ?? 0;
15113
16218
  return changes > 0;
15114
16219
  }
@@ -15123,7 +16228,7 @@ function writeCompactionNote(db, args) {
15123
16228
  }
15124
16229
  const now = (/* @__PURE__ */ new Date()).toISOString();
15125
16230
  const key = `${COMPACTION_KEY_PREFIX}${args.sessionId}:${now}`;
15126
- const id = crypto22.randomUUID();
16231
+ const id = crypto25.randomUUID();
15127
16232
  let inserted;
15128
16233
  db.transaction((tx) => {
15129
16234
  tx.insert(agentMemory).values({
@@ -15137,16 +16242,16 @@ function writeCompactionNote(db, args) {
15137
16242
  }).run();
15138
16243
  const sessionPrefix = `${COMPACTION_KEY_PREFIX}${args.sessionId}:`;
15139
16244
  const existing = tx.select({ id: agentMemory.id, updatedAt: agentMemory.updatedAt }).from(agentMemory).where(
15140
- and11(
15141
- eq23(agentMemory.projectId, args.projectId),
16245
+ and14(
16246
+ eq26(agentMemory.projectId, args.projectId),
15142
16247
  like(agentMemory.key, `${sessionPrefix}%`)
15143
16248
  )
15144
- ).orderBy(desc9(agentMemory.updatedAt)).all();
16249
+ ).orderBy(desc11(agentMemory.updatedAt)).all();
15145
16250
  const stale = existing.slice(COMPACTION_NOTES_PER_SESSION).map((r) => r.id);
15146
16251
  if (stale.length > 0) {
15147
- tx.delete(agentMemory).where(sql7`${agentMemory.id} IN (${sql7.join(stale.map((s) => sql7`${s}`), sql7`, `)})`).run();
16252
+ tx.delete(agentMemory).where(sql9`${agentMemory.id} IN (${sql9.join(stale.map((s) => sql9`${s}`), sql9`, `)})`).run();
15148
16253
  }
15149
- const row = tx.select().from(agentMemory).where(and11(eq23(agentMemory.projectId, args.projectId), eq23(agentMemory.key, key))).get();
16254
+ const row = tx.select().from(agentMemory).where(and14(eq26(agentMemory.projectId, args.projectId), eq26(agentMemory.key, key))).get();
15150
16255
  if (row) inserted = rowToDto(row);
15151
16256
  });
15152
16257
  if (!inserted) throw new Error("compaction note write produced no row");
@@ -15293,6 +16398,35 @@ function buildGetRunTool(ctx) {
15293
16398
  }
15294
16399
  };
15295
16400
  }
16401
+ var BacklinksSchema = Type2.Object({
16402
+ limit: Type2.Optional(
16403
+ Type2.Number({
16404
+ description: "Max linking-domain rows to include. Default 50, max 200.",
16405
+ minimum: 1,
16406
+ maximum: 200
16407
+ })
16408
+ ),
16409
+ release: Type2.Optional(
16410
+ Type2.String({
16411
+ description: "Common Crawl release id (e.g., cc-main-2026-jan-feb-mar). Omit for the most recent release with data."
16412
+ })
16413
+ )
16414
+ });
16415
+ function buildListBacklinksTool(ctx) {
16416
+ return {
16417
+ name: "list_backlinks",
16418
+ label: "List backlinks",
16419
+ description: "Backlink summary and top linking domains from the most recent ready Common Crawl release. Off-site authority signal that correlates with citation likelihood. Returns null summary when no release sync has completed for this workspace.",
16420
+ parameters: BacklinksSchema,
16421
+ execute: async (_toolCallId, params) => {
16422
+ const response = await ctx.client.backlinksDomains(ctx.projectName, {
16423
+ limit: params.limit ?? 50,
16424
+ release: params.release
16425
+ });
16426
+ return textResult2(response);
16427
+ }
16428
+ };
16429
+ }
15296
16430
  var RecallSchema = Type2.Object({
15297
16431
  limit: Type2.Optional(
15298
16432
  Type2.Number({
@@ -15323,7 +16457,8 @@ function buildReadTools(ctx) {
15323
16457
  buildListKeywordsTool(ctx),
15324
16458
  buildListCompetitorsTool(ctx),
15325
16459
  buildGetRunTool(ctx),
15326
- buildRecallTool(ctx)
16460
+ buildRecallTool(ctx),
16461
+ buildListBacklinksTool(ctx)
15327
16462
  ];
15328
16463
  }
15329
16464
  var RunSweepSchema = Type2.Object({
@@ -15557,10 +16692,10 @@ function ensureBuiltinsRegistered() {
15557
16692
  }
15558
16693
  function loadAeroSystemPrompt(pkgDir) {
15559
16694
  const skillDir = resolveAeroSkillDir(pkgDir);
15560
- const skillBody = fs7.readFileSync(path8.join(skillDir, "SKILL.md"), "utf-8");
15561
- const soulPath = path8.join(skillDir, "soul.md");
15562
- if (!fs7.existsSync(soulPath)) return skillBody;
15563
- const soulBody = fs7.readFileSync(soulPath, "utf-8");
16695
+ const skillBody = fs11.readFileSync(path14.join(skillDir, "SKILL.md"), "utf-8");
16696
+ const soulPath = path14.join(skillDir, "soul.md");
16697
+ if (!fs11.existsSync(soulPath)) return skillBody;
16698
+ const soulBody = fs11.readFileSync(soulPath, "utf-8");
15564
16699
  return `${soulBody.trimEnd()}
15565
16700
 
15566
16701
  ---
@@ -15744,7 +16879,7 @@ async function compactMessages(args) {
15744
16879
  }
15745
16880
 
15746
16881
  // src/agent/session-registry.ts
15747
- var log7 = createLogger("SessionRegistry");
16882
+ var log9 = createLogger("SessionRegistry");
15748
16883
  var MAX_HYDRATE_NOTES = 20;
15749
16884
  var MAX_HYDRATE_BYTES = 32 * 1024;
15750
16885
  function escapeMemoryFragment(value) {
@@ -15793,7 +16928,7 @@ var SessionRegistry = class {
15793
16928
  modelProvider: effectiveProvider,
15794
16929
  modelId: effectiveModelId,
15795
16930
  updatedAt: (/* @__PURE__ */ new Date()).toISOString()
15796
- }).where(eq24(agentSessions.projectId, projectId)).run();
16931
+ }).where(eq27(agentSessions.projectId, projectId)).run();
15797
16932
  }
15798
16933
  const agent2 = createAeroSession({
15799
16934
  projectName,
@@ -15975,13 +17110,13 @@ ${lines.join("\n")}
15975
17110
  agent.state.messages = result.messages;
15976
17111
  agent.state.systemPrompt = this.buildHydratedSystemPrompt(projectId, row.systemPrompt);
15977
17112
  this.save(projectName);
15978
- log7.info("compaction.completed", {
17113
+ log9.info("compaction.completed", {
15979
17114
  projectName,
15980
17115
  removedCount: result.removedCount,
15981
17116
  summaryBytes: Buffer.byteLength(result.summary, "utf8")
15982
17117
  });
15983
17118
  } catch (err) {
15984
- log7.error("compaction.failed", {
17119
+ log9.error("compaction.failed", {
15985
17120
  projectName,
15986
17121
  error: err instanceof Error ? err.message : String(err)
15987
17122
  });
@@ -16011,7 +17146,7 @@ ${lines.join("\n")}
16011
17146
  modelProvider: nextProvider,
16012
17147
  modelId: nextModelId,
16013
17148
  updatedAt: (/* @__PURE__ */ new Date()).toISOString()
16014
- }).where(eq24(agentSessions.projectId, projectId)).run();
17149
+ }).where(eq27(agentSessions.projectId, projectId)).run();
16015
17150
  }
16016
17151
  /** Persist a session's transcript back to the DB. Call after any run settles. */
16017
17152
  save(projectName) {
@@ -16078,7 +17213,7 @@ ${lines.join("\n")}
16078
17213
  await agent.prompt(msgs);
16079
17214
  this.save(projectName);
16080
17215
  } catch (err) {
16081
- log7.error("drain.failed", {
17216
+ log9.error("drain.failed", {
16082
17217
  projectName,
16083
17218
  error: err instanceof Error ? err.message : String(err)
16084
17219
  });
@@ -16173,17 +17308,17 @@ ${lines.join("\n")}
16173
17308
  return id;
16174
17309
  }
16175
17310
  tryResolveProjectId(projectName) {
16176
- const row = this.opts.db.select({ id: projects.id }).from(projects).where(eq24(projects.name, projectName)).get();
17311
+ const row = this.opts.db.select({ id: projects.id }).from(projects).where(eq27(projects.name, projectName)).get();
16177
17312
  return row?.id;
16178
17313
  }
16179
17314
  loadRow(projectId) {
16180
- const row = this.opts.db.select().from(agentSessions).where(eq24(agentSessions.projectId, projectId)).get();
17315
+ const row = this.opts.db.select().from(agentSessions).where(eq27(agentSessions.projectId, projectId)).get();
16181
17316
  return row ?? null;
16182
17317
  }
16183
17318
  insertRow(params) {
16184
17319
  const now = (/* @__PURE__ */ new Date()).toISOString();
16185
17320
  this.opts.db.insert(agentSessions).values({
16186
- id: crypto23.randomUUID(),
17321
+ id: crypto26.randomUUID(),
16187
17322
  projectId: params.projectId,
16188
17323
  systemPrompt: params.systemPrompt,
16189
17324
  modelProvider: params.provider ?? params.modelProvider ?? AgentProviderIds.claude,
@@ -16196,14 +17331,14 @@ ${lines.join("\n")}
16196
17331
  }
16197
17332
  updateRow(projectId, patch) {
16198
17333
  const now = (/* @__PURE__ */ new Date()).toISOString();
16199
- this.opts.db.update(agentSessions).set({ ...patch, updatedAt: now }).where(eq24(agentSessions.projectId, projectId)).run();
17334
+ this.opts.db.update(agentSessions).set({ ...patch, updatedAt: now }).where(eq27(agentSessions.projectId, projectId)).run();
16200
17335
  }
16201
17336
  };
16202
17337
 
16203
17338
  // src/agent/agent-routes.ts
16204
- import { eq as eq25 } from "drizzle-orm";
17339
+ import { eq as eq28 } from "drizzle-orm";
16205
17340
  function resolveProject2(db, name) {
16206
- const row = db.select({ id: projects.id, name: projects.name }).from(projects).where(eq25(projects.name, name)).get();
17341
+ const row = db.select({ id: projects.id, name: projects.name }).from(projects).where(eq28(projects.name, name)).get();
16207
17342
  if (!row) throw notFound("project", name);
16208
17343
  return row;
16209
17344
  }
@@ -16212,7 +17347,7 @@ function registerAgentRoutes(app, opts) {
16212
17347
  "/projects/:name/agent/transcript",
16213
17348
  async (request) => {
16214
17349
  const project = resolveProject2(opts.db, request.params.name);
16215
- const row = opts.db.select().from(agentSessions).where(eq25(agentSessions.projectId, project.id)).get();
17350
+ const row = opts.db.select().from(agentSessions).where(eq28(agentSessions.projectId, project.id)).get();
16216
17351
  if (!row) {
16217
17352
  return { messages: [], modelProvider: null, modelId: null, updatedAt: null };
16218
17353
  }
@@ -16236,7 +17371,7 @@ function registerAgentRoutes(app, opts) {
16236
17371
  async (request) => {
16237
17372
  const project = resolveProject2(opts.db, request.params.name);
16238
17373
  opts.sessionRegistry.reset(project.name);
16239
- opts.db.update(agentSessions).set({ messages: "[]", followUpQueue: "[]", updatedAt: (/* @__PURE__ */ new Date()).toISOString() }).where(eq25(agentSessions.projectId, project.id)).run();
17374
+ opts.db.update(agentSessions).set({ messages: "[]", followUpQueue: "[]", updatedAt: (/* @__PURE__ */ new Date()).toISOString() }).where(eq28(agentSessions.projectId, project.id)).run();
16240
17375
  return { status: "reset" };
16241
17376
  }
16242
17377
  );
@@ -16398,9 +17533,9 @@ var ApiClient = class {
16398
17533
  }
16399
17534
  return this.probePromise;
16400
17535
  }
16401
- async request(method, path10, body) {
17536
+ async request(method, path16, body) {
16402
17537
  await this.probeBasePath();
16403
- const url = `${this.baseUrl}${path10}`;
17538
+ const url = `${this.baseUrl}${path16}`;
16404
17539
  const serializedBody = body != null ? JSON.stringify(body) : void 0;
16405
17540
  const headers = {
16406
17541
  "Authorization": `Bearer ${this.apiKey}`,
@@ -16488,9 +17623,9 @@ var ApiClient = class {
16488
17623
  * structured-error behavior of `request()`; the caller reads `res.body`
16489
17624
  * and releases the response when done.
16490
17625
  */
16491
- async streamPost(path10, body, signal) {
17626
+ async streamPost(path16, body, signal) {
16492
17627
  await this.probeBasePath();
16493
- const url = `${this.baseUrl}${path10}`;
17628
+ const url = `${this.baseUrl}${path16}`;
16494
17629
  const headers = {
16495
17630
  Authorization: `Bearer ${this.apiKey}`,
16496
17631
  "Content-Type": "application/json",
@@ -16891,6 +18026,46 @@ var ApiClient = class {
16891
18026
  const qs = limit ? `?limit=${limit}` : "";
16892
18027
  return this.request("GET", `/projects/${encodeURIComponent(project)}/health/history${qs}`);
16893
18028
  }
18029
+ // --- Backlinks ---------------------------------------------------------
18030
+ async backlinksStatus() {
18031
+ return this.request("GET", "/backlinks/status");
18032
+ }
18033
+ async backlinksInstall() {
18034
+ return this.request("POST", "/backlinks/install");
18035
+ }
18036
+ async backlinksTriggerSync(release) {
18037
+ return this.request("POST", "/backlinks/syncs", { release });
18038
+ }
18039
+ async backlinksLatestSync() {
18040
+ return this.request("GET", "/backlinks/syncs/latest");
18041
+ }
18042
+ async backlinksListSyncs() {
18043
+ return this.request("GET", "/backlinks/syncs");
18044
+ }
18045
+ async backlinksCachedReleases() {
18046
+ return this.request("GET", "/backlinks/releases");
18047
+ }
18048
+ async backlinksPruneCache(release) {
18049
+ return this.request("DELETE", `/backlinks/cache/${encodeURIComponent(release)}`);
18050
+ }
18051
+ async backlinksExtract(project, release) {
18052
+ return this.request("POST", `/projects/${encodeURIComponent(project)}/backlinks/extract`, release ? { release } : {});
18053
+ }
18054
+ async backlinksSummary(project, release) {
18055
+ const qs = release ? `?release=${encodeURIComponent(release)}` : "";
18056
+ return this.request("GET", `/projects/${encodeURIComponent(project)}/backlinks/summary${qs}`);
18057
+ }
18058
+ async backlinksDomains(project, opts = {}) {
18059
+ const qs = new URLSearchParams();
18060
+ if (opts.limit !== void 0) qs.set("limit", String(opts.limit));
18061
+ if (opts.offset !== void 0) qs.set("offset", String(opts.offset));
18062
+ if (opts.release) qs.set("release", opts.release);
18063
+ const suffix = qs.toString() ? `?${qs.toString()}` : "";
18064
+ return this.request("GET", `/projects/${encodeURIComponent(project)}/backlinks/domains${suffix}`);
18065
+ }
18066
+ async backlinksHistory(project) {
18067
+ return this.request("GET", `/projects/${encodeURIComponent(project)}/backlinks/history`);
18068
+ }
16894
18069
  };
16895
18070
 
16896
18071
  // src/snapshot-service.ts
@@ -16915,13 +18090,13 @@ function extractHostname(domain) {
16915
18090
  function fetchWithPinnedAddress(target) {
16916
18091
  return new Promise((resolve) => {
16917
18092
  const port = target.url.port ? Number(target.url.port) : 443;
16918
- const path10 = target.url.pathname + target.url.search;
18093
+ const path16 = target.url.pathname + target.url.search;
16919
18094
  const req = https2.request(
16920
18095
  {
16921
18096
  hostname: target.address,
16922
18097
  family: target.family,
16923
18098
  port,
16924
- path: path10,
18099
+ path: path16,
16925
18100
  method: "GET",
16926
18101
  timeout: FETCH_TIMEOUT_MS,
16927
18102
  servername: target.url.hostname,
@@ -17013,7 +18188,7 @@ function formatAuditFactorScore(factor) {
17013
18188
  }
17014
18189
 
17015
18190
  // src/snapshot-service.ts
17016
- var log8 = createLogger("Snapshot");
18191
+ var log10 = createLogger("Snapshot");
17017
18192
  var ANALYSIS_PROVIDER_PRIORITY = ["openai", "claude", "gemini", "perplexity", "local"];
17018
18193
  var SNAPSHOT_QUERY_COUNT = 6;
17019
18194
  var ProviderExecutionGate2 = class {
@@ -17156,7 +18331,7 @@ var SnapshotService = class {
17156
18331
  return mapAuditReport(report);
17157
18332
  } catch (err) {
17158
18333
  const message = err instanceof Error ? err.message : String(err);
17159
- log8.warn("audit.failed", { homepageUrl, error: message });
18334
+ log10.warn("audit.failed", { homepageUrl, error: message });
17160
18335
  return {
17161
18336
  url: homepageUrl,
17162
18337
  finalUrl: homepageUrl,
@@ -17186,7 +18361,7 @@ var SnapshotService = class {
17186
18361
  phrases: parsedPhrases
17187
18362
  };
17188
18363
  } catch (err) {
17189
- log8.warn("profile.generation-failed", {
18364
+ log10.warn("profile.generation-failed", {
17190
18365
  domain: ctx.domain,
17191
18366
  provider: ctx.analysisProvider.adapter.name,
17192
18367
  error: err instanceof Error ? err.message : String(err)
@@ -17328,7 +18503,7 @@ var SnapshotService = class {
17328
18503
  recommendedActions: uniqueStrings(parsed.recommendedActions ?? []).slice(0, 4)
17329
18504
  };
17330
18505
  } catch (err) {
17331
- log8.warn("response.analysis-failed", {
18506
+ log10.warn("response.analysis-failed", {
17332
18507
  provider: ctx.analysisProvider.adapter.name,
17333
18508
  error: err instanceof Error ? err.message : String(err)
17334
18509
  });
@@ -17611,9 +18786,9 @@ function clipText(value, length) {
17611
18786
  }
17612
18787
 
17613
18788
  // src/server.ts
17614
- var _require2 = createRequire2(import.meta.url);
18789
+ var _require2 = createRequire3(import.meta.url);
17615
18790
  var { version: PKG_VERSION } = _require2("../package.json");
17616
- var log9 = createLogger("Server");
18791
+ var log11 = createLogger("Server");
17617
18792
  var DEFAULT_QUOTA = {
17618
18793
  maxConcurrency: 2,
17619
18794
  maxRequestsPerMinute: 10,
@@ -17644,7 +18819,7 @@ function summarizeProviderConfig(provider, config) {
17644
18819
  };
17645
18820
  }
17646
18821
  function hashApiKey(key) {
17647
- return crypto24.createHash("sha256").update(key).digest("hex");
18822
+ return crypto27.createHash("sha256").update(key).digest("hex");
17648
18823
  }
17649
18824
  function parseCookies2(header) {
17650
18825
  if (!header) return {};
@@ -17700,7 +18875,7 @@ function applyLegacyCredentials(rows, config) {
17700
18875
  }
17701
18876
  if (migratedGoogle > 0) {
17702
18877
  saveConfigPatch({ google: config.google });
17703
- log9.info("credentials.migrated", { type: "google", count: migratedGoogle });
18878
+ log11.info("credentials.migrated", { type: "google", count: migratedGoogle });
17704
18879
  }
17705
18880
  let migratedGa4 = 0;
17706
18881
  for (const row of rows.ga4) {
@@ -17718,7 +18893,7 @@ function applyLegacyCredentials(rows, config) {
17718
18893
  }
17719
18894
  if (migratedGa4 > 0) {
17720
18895
  saveConfigPatch({ ga4: config.ga4 });
17721
- log9.info("credentials.migrated", { type: "ga4", count: migratedGa4 });
18896
+ log11.info("credentials.migrated", { type: "ga4", count: migratedGa4 });
17722
18897
  }
17723
18898
  }
17724
18899
  async function createServer(opts) {
@@ -17750,11 +18925,11 @@ async function createServer(opts) {
17750
18925
  applyLegacyCredentials(legacyRows, opts.config);
17751
18926
  dropLegacyCredentialColumns(opts.db);
17752
18927
  } catch (err) {
17753
- log9.warn("credentials.migration.failed", {
18928
+ log11.warn("credentials.migration.failed", {
17754
18929
  error: err instanceof Error ? err.message : String(err)
17755
18930
  });
17756
18931
  }
17757
- log9.info("providers.configured", { providers: Object.keys(providers).filter((k) => {
18932
+ log11.info("providers.configured", { providers: Object.keys(providers).filter((k) => {
17758
18933
  const p = providers[k];
17759
18934
  return p?.apiKey || p?.baseUrl || p?.vertexProject;
17760
18935
  }) });
@@ -17802,7 +18977,7 @@ async function createServer(opts) {
17802
18977
  intelligenceService,
17803
18978
  (runId, projectId, result) => notifier.dispatchInsightWebhooks(runId, projectId, result),
17804
18979
  async ({ runId, projectId, insightCount, criticalOrHigh }) => {
17805
- const project = opts.db.select({ name: projects.name }).from(projects).where(eq26(projects.id, projectId)).get();
18980
+ const project = opts.db.select({ name: projects.name }).from(projects).where(eq29(projects.id, projectId)).get();
17806
18981
  if (!project) return;
17807
18982
  sessionRegistry.queueFollowUp(project.name, {
17808
18983
  role: "user",
@@ -17814,8 +18989,8 @@ async function createServer(opts) {
17814
18989
  );
17815
18990
  jobRunner.onRunCompleted = (runId, projectId) => runCoordinator.onRunCompleted(runId, projectId);
17816
18991
  const snapshotService = new SnapshotService(registry);
17817
- const orphanedOpenClawDir = path9.join(os5.homedir(), ".openclaw-aero");
17818
- if (fs8.existsSync(orphanedOpenClawDir)) {
18992
+ const orphanedOpenClawDir = path15.join(os6.homedir(), ".openclaw-aero");
18993
+ if (fs12.existsSync(orphanedOpenClawDir)) {
17819
18994
  app.log.warn(
17820
18995
  { path: orphanedOpenClawDir },
17821
18996
  "OpenClaw gateway is no longer used. Remove ~/.openclaw-aero/ manually to reclaim the directory."
@@ -17896,7 +19071,7 @@ async function createServer(opts) {
17896
19071
  return removed;
17897
19072
  }
17898
19073
  };
17899
- const googleStateSecret = process.env.GOOGLE_STATE_SECRET ?? crypto24.randomBytes(32).toString("hex");
19074
+ const googleStateSecret = process.env.GOOGLE_STATE_SECRET ?? crypto27.randomBytes(32).toString("hex");
17900
19075
  const googleConnectionStore = {
17901
19076
  listConnections: (domain) => listGoogleConnections(opts.config, domain),
17902
19077
  getConnection: (domain, connectionType) => getGoogleConnection(opts.config, domain, connectionType),
@@ -17942,11 +19117,11 @@ async function createServer(opts) {
17942
19117
  const apiPrefix = basePath ? `${basePath}api/v1` : "/api/v1";
17943
19118
  if (opts.config.apiKey) {
17944
19119
  const keyHash = hashApiKey(opts.config.apiKey);
17945
- const existing = opts.db.select().from(apiKeys).where(eq26(apiKeys.keyHash, keyHash)).get();
19120
+ const existing = opts.db.select().from(apiKeys).where(eq29(apiKeys.keyHash, keyHash)).get();
17946
19121
  if (!existing) {
17947
19122
  const prefix = opts.config.apiKey.slice(0, 12);
17948
19123
  opts.db.insert(apiKeys).values({
17949
- id: `key_${crypto24.randomBytes(8).toString("hex")}`,
19124
+ id: `key_${crypto27.randomBytes(8).toString("hex")}`,
17950
19125
  name: "default",
17951
19126
  keyHash,
17952
19127
  keyPrefix: prefix,
@@ -17970,7 +19145,7 @@ async function createServer(opts) {
17970
19145
  };
17971
19146
  const createSession = (apiKeyId) => {
17972
19147
  pruneExpiredSessions();
17973
- const sessionId = crypto24.randomBytes(32).toString("hex");
19148
+ const sessionId = crypto27.randomBytes(32).toString("hex");
17974
19149
  sessions.set(sessionId, {
17975
19150
  apiKeyId,
17976
19151
  expiresAt: Date.now() + SESSION_TTL_MS
@@ -17994,7 +19169,7 @@ async function createServer(opts) {
17994
19169
  };
17995
19170
  const getDefaultApiKey = () => {
17996
19171
  if (!opts.config.apiKey) return void 0;
17997
- return opts.db.select().from(apiKeys).where(eq26(apiKeys.keyHash, hashApiKey(opts.config.apiKey))).get();
19172
+ return opts.db.select().from(apiKeys).where(eq29(apiKeys.keyHash, hashApiKey(opts.config.apiKey))).get();
17998
19173
  };
17999
19174
  const createPasswordSession = (reply) => {
18000
19175
  const key = getDefaultApiKey();
@@ -18051,12 +19226,12 @@ async function createServer(opts) {
18051
19226
  return reply.send({ authenticated: true });
18052
19227
  }
18053
19228
  if (apiKey) {
18054
- const key = opts.db.select().from(apiKeys).where(eq26(apiKeys.keyHash, hashApiKey(apiKey))).get();
19229
+ const key = opts.db.select().from(apiKeys).where(eq29(apiKeys.keyHash, hashApiKey(apiKey))).get();
18055
19230
  if (!key || key.revokedAt) {
18056
19231
  const err2 = authInvalid();
18057
19232
  return reply.status(err2.statusCode).send(err2.toJSON());
18058
19233
  }
18059
- opts.db.update(apiKeys).set({ lastUsedAt: (/* @__PURE__ */ new Date()).toISOString() }).where(eq26(apiKeys.id, key.id)).run();
19234
+ opts.db.update(apiKeys).set({ lastUsedAt: (/* @__PURE__ */ new Date()).toISOString() }).where(eq29(apiKeys.id, key.id)).run();
18060
19235
  const sessionId = createSession(key.id);
18061
19236
  reply.header("set-cookie", serializeSessionCookie({
18062
19237
  name: SESSION_COOKIE_NAME,
@@ -18123,6 +19298,54 @@ async function createServer(opts) {
18123
19298
  app.log.error({ runId, err }, "Inspect sitemap failed");
18124
19299
  });
18125
19300
  },
19301
+ getBacklinksStatus: () => ({
19302
+ duckdbInstalled: isDuckdbInstalled(),
19303
+ duckdbVersion: readInstalledVersion() ?? void 0,
19304
+ duckdbSpec: DUCKDB_SPEC,
19305
+ pluginDir: PLUGIN_DIR
19306
+ }),
19307
+ onInstallBacklinks: async () => {
19308
+ const result = await installDuckdb({ onLog: (line) => app.log.info({ line }, "duckdb install") });
19309
+ return {
19310
+ installed: true,
19311
+ version: result.version,
19312
+ path: result.path,
19313
+ alreadyPresent: result.alreadyPresent
19314
+ };
19315
+ },
19316
+ onReleaseSyncRequested: (syncId, release) => {
19317
+ executeReleaseSync(opts.db, syncId, { release }).catch((err) => {
19318
+ app.log.error({ syncId, err }, "Common Crawl release sync failed");
19319
+ });
19320
+ },
19321
+ onBacklinkExtractRequested: (runId, projectId, release) => {
19322
+ executeBacklinkExtract(opts.db, runId, projectId, { release }).catch((err) => {
19323
+ app.log.error({ runId, err }, "Backlink extract failed");
19324
+ });
19325
+ },
19326
+ onBacklinksPruneCache: (release) => {
19327
+ try {
19328
+ pruneCachedRelease(release);
19329
+ } catch (err) {
19330
+ app.log.error({ release, err }, "Failed to prune cached release");
19331
+ }
19332
+ },
19333
+ listCachedReleases: () => {
19334
+ const cached = listCachedReleases();
19335
+ const syncByRelease = /* @__PURE__ */ new Map();
19336
+ for (const row of opts.db.select().from(ccReleaseSyncs).all()) {
19337
+ syncByRelease.set(row.release, { status: row.status, updatedAt: row.updatedAt });
19338
+ }
19339
+ return cached.map((entry) => {
19340
+ const sync = syncByRelease.get(entry.release);
19341
+ return {
19342
+ release: entry.release,
19343
+ syncStatus: sync?.status ?? null,
19344
+ bytes: entry.bytes,
19345
+ lastUsedAt: entry.lastUsedAt
19346
+ };
19347
+ });
19348
+ },
18126
19349
  openApiInfo: {
18127
19350
  title: "Canonry API",
18128
19351
  version: PKG_VERSION,
@@ -18203,7 +19426,7 @@ async function createServer(opts) {
18203
19426
  const targetProjectIds = affectedProjectIds.length > 0 ? affectedProjectIds : [null];
18204
19427
  const createdAt = (/* @__PURE__ */ new Date()).toISOString();
18205
19428
  opts.db.insert(auditLog).values(targetProjectIds.map((projectId) => ({
18206
- id: crypto24.randomUUID(),
19429
+ id: crypto27.randomUUID(),
18207
19430
  projectId,
18208
19431
  actor: "api",
18209
19432
  action: existing ? "provider.updated" : "provider.created",
@@ -18334,10 +19557,10 @@ async function createServer(opts) {
18334
19557
  return snapshotService.createReport(input);
18335
19558
  }
18336
19559
  });
18337
- const dirname = path9.dirname(fileURLToPath2(import.meta.url));
18338
- const assetsDir = path9.join(dirname, "..", "assets");
18339
- if (fs8.existsSync(assetsDir)) {
18340
- const indexPath = path9.join(assetsDir, "index.html");
19560
+ const dirname = path15.dirname(fileURLToPath2(import.meta.url));
19561
+ const assetsDir = path15.join(dirname, "..", "assets");
19562
+ if (fs12.existsSync(assetsDir)) {
19563
+ const indexPath = path15.join(assetsDir, "index.html");
18341
19564
  const injectConfig = (html) => {
18342
19565
  const clientConfig = {};
18343
19566
  if (basePath) clientConfig.basePath = basePath;
@@ -18355,8 +19578,8 @@ async function createServer(opts) {
18355
19578
  index: false
18356
19579
  });
18357
19580
  const serveIndex = (_request, reply) => {
18358
- if (fs8.existsSync(indexPath)) {
18359
- const html = fs8.readFileSync(indexPath, "utf-8");
19581
+ if (fs12.existsSync(indexPath)) {
19582
+ const html = fs12.readFileSync(indexPath, "utf-8");
18360
19583
  return reply.type("text/html").send(injectConfig(html));
18361
19584
  }
18362
19585
  return reply.status(404).send({ error: "Dashboard not built" });
@@ -18376,8 +19599,8 @@ async function createServer(opts) {
18376
19599
  if (basePath && !url.startsWith(basePath)) {
18377
19600
  return reply.status(404).send({ error: "Not found", path: request.url });
18378
19601
  }
18379
- if (fs8.existsSync(indexPath)) {
18380
- const html = fs8.readFileSync(indexPath, "utf-8");
19602
+ if (fs12.existsSync(indexPath)) {
19603
+ const html = fs12.readFileSync(indexPath, "utf-8");
18381
19604
  return reply.type("text/html").send(injectConfig(html));
18382
19605
  }
18383
19606
  return reply.status(404).send({ error: "Not found" });