@ainyc/canonry 2.2.3 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/assets/{index-Bmnz-wvT.js → index-C_pxQt0X.js} +139 -139
- package/assets/index.html +1 -1
- package/dist/{chunk-TAII35VC.js → chunk-CW6CAPBQ.js} +114 -1
- package/dist/{chunk-MXUOJWNL.js → chunk-JXOUZ6JH.js} +1438 -215
- package/dist/cli.js +445 -146
- package/dist/index.js +2 -2
- package/dist/{intelligence-service-C5LAYDFM.js → intelligence-service-232P7625.js} +1 -1
- package/package.json +7 -6
|
@@ -4,8 +4,11 @@ import {
|
|
|
4
4
|
agentSessions,
|
|
5
5
|
apiKeys,
|
|
6
6
|
auditLog,
|
|
7
|
+
backlinkDomains,
|
|
8
|
+
backlinkSummaries,
|
|
7
9
|
bingCoverageSnapshots,
|
|
8
10
|
bingUrlInspections,
|
|
11
|
+
ccReleaseSyncs,
|
|
9
12
|
competitors,
|
|
10
13
|
createLogger,
|
|
11
14
|
dropLegacyCredentialColumns,
|
|
@@ -27,7 +30,7 @@ import {
|
|
|
27
30
|
runs,
|
|
28
31
|
schedules,
|
|
29
32
|
usageCounters
|
|
30
|
-
} from "./chunk-
|
|
33
|
+
} from "./chunk-CW6CAPBQ.js";
|
|
31
34
|
|
|
32
35
|
// src/config.ts
|
|
33
36
|
import fs from "fs";
|
|
@@ -342,12 +345,12 @@ function printCliError(err, format) {
|
|
|
342
345
|
}
|
|
343
346
|
|
|
344
347
|
// src/server.ts
|
|
345
|
-
import { createRequire as
|
|
346
|
-
import
|
|
347
|
-
import
|
|
348
|
-
import
|
|
348
|
+
import { createRequire as createRequire3 } from "module";
|
|
349
|
+
import crypto27 from "crypto";
|
|
350
|
+
import fs12 from "fs";
|
|
351
|
+
import path15 from "path";
|
|
349
352
|
import { fileURLToPath as fileURLToPath2 } from "url";
|
|
350
|
-
import { eq as
|
|
353
|
+
import { eq as eq29 } from "drizzle-orm";
|
|
351
354
|
import Fastify from "fastify";
|
|
352
355
|
|
|
353
356
|
// ../contracts/src/config-schema.ts
|
|
@@ -616,6 +619,9 @@ function agentBusy(projectName) {
|
|
|
616
619
|
409
|
|
617
620
|
);
|
|
618
621
|
}
|
|
622
|
+
function missingDependency(message, details) {
|
|
623
|
+
return new AppError("MISSING_DEPENDENCY", message, 422, details);
|
|
624
|
+
}
|
|
619
625
|
|
|
620
626
|
// ../contracts/src/google.ts
|
|
621
627
|
import { z as z5 } from "zod";
|
|
@@ -944,7 +950,8 @@ var runKindSchema = z8.enum([
|
|
|
944
950
|
"gsc-sync",
|
|
945
951
|
"inspect-sitemap",
|
|
946
952
|
"ga-sync",
|
|
947
|
-
"bing-inspect"
|
|
953
|
+
"bing-inspect",
|
|
954
|
+
"backlink-extract"
|
|
948
955
|
]);
|
|
949
956
|
var RunKinds = runKindSchema.enum;
|
|
950
957
|
var runTriggerSchema = z8.enum(["manual", "scheduled", "config-apply"]);
|
|
@@ -1431,6 +1438,83 @@ var agentMemoryDeleteRequestSchema = z13.object({
|
|
|
1431
1438
|
key: z13.string().min(1).max(AGENT_MEMORY_KEY_MAX_LENGTH)
|
|
1432
1439
|
});
|
|
1433
1440
|
|
|
1441
|
+
// ../contracts/src/backlinks.ts
|
|
1442
|
+
import { z as z14 } from "zod";
|
|
1443
|
+
var ccReleaseSyncStatusSchema = z14.enum(["queued", "downloading", "querying", "ready", "failed"]);
|
|
1444
|
+
var CcReleaseSyncStatuses = ccReleaseSyncStatusSchema.enum;
|
|
1445
|
+
var ccReleaseSyncDtoSchema = z14.object({
|
|
1446
|
+
id: z14.string(),
|
|
1447
|
+
release: z14.string(),
|
|
1448
|
+
status: ccReleaseSyncStatusSchema,
|
|
1449
|
+
phaseDetail: z14.string().nullable().optional(),
|
|
1450
|
+
vertexPath: z14.string().nullable().optional(),
|
|
1451
|
+
edgesPath: z14.string().nullable().optional(),
|
|
1452
|
+
vertexSha256: z14.string().nullable().optional(),
|
|
1453
|
+
edgesSha256: z14.string().nullable().optional(),
|
|
1454
|
+
vertexBytes: z14.number().int().nullable().optional(),
|
|
1455
|
+
edgesBytes: z14.number().int().nullable().optional(),
|
|
1456
|
+
projectsProcessed: z14.number().int().nullable().optional(),
|
|
1457
|
+
domainsDiscovered: z14.number().int().nullable().optional(),
|
|
1458
|
+
downloadStartedAt: z14.string().nullable().optional(),
|
|
1459
|
+
downloadFinishedAt: z14.string().nullable().optional(),
|
|
1460
|
+
queryStartedAt: z14.string().nullable().optional(),
|
|
1461
|
+
queryFinishedAt: z14.string().nullable().optional(),
|
|
1462
|
+
error: z14.string().nullable().optional(),
|
|
1463
|
+
createdAt: z14.string(),
|
|
1464
|
+
updatedAt: z14.string()
|
|
1465
|
+
});
|
|
1466
|
+
var backlinkDomainDtoSchema = z14.object({
|
|
1467
|
+
linkingDomain: z14.string(),
|
|
1468
|
+
numHosts: z14.number().int()
|
|
1469
|
+
});
|
|
1470
|
+
var backlinkSummaryDtoSchema = z14.object({
|
|
1471
|
+
projectId: z14.string(),
|
|
1472
|
+
release: z14.string(),
|
|
1473
|
+
targetDomain: z14.string(),
|
|
1474
|
+
totalLinkingDomains: z14.number().int(),
|
|
1475
|
+
totalHosts: z14.number().int(),
|
|
1476
|
+
top10HostsShare: z14.string(),
|
|
1477
|
+
queriedAt: z14.string()
|
|
1478
|
+
});
|
|
1479
|
+
var backlinkListResponseSchema = z14.object({
|
|
1480
|
+
summary: backlinkSummaryDtoSchema.nullable(),
|
|
1481
|
+
total: z14.number().int(),
|
|
1482
|
+
rows: z14.array(backlinkDomainDtoSchema)
|
|
1483
|
+
});
|
|
1484
|
+
var backlinkHistoryEntrySchema = z14.object({
|
|
1485
|
+
release: z14.string(),
|
|
1486
|
+
totalLinkingDomains: z14.number().int(),
|
|
1487
|
+
totalHosts: z14.number().int(),
|
|
1488
|
+
top10HostsShare: z14.string(),
|
|
1489
|
+
queriedAt: z14.string()
|
|
1490
|
+
});
|
|
1491
|
+
var backlinksInstallStatusDtoSchema = z14.object({
|
|
1492
|
+
duckdbInstalled: z14.boolean(),
|
|
1493
|
+
duckdbVersion: z14.string().nullable().optional(),
|
|
1494
|
+
duckdbSpec: z14.string(),
|
|
1495
|
+
pluginDir: z14.string()
|
|
1496
|
+
});
|
|
1497
|
+
var backlinksInstallResultDtoSchema = z14.object({
|
|
1498
|
+
installed: z14.boolean(),
|
|
1499
|
+
version: z14.string(),
|
|
1500
|
+
path: z14.string(),
|
|
1501
|
+
alreadyPresent: z14.boolean()
|
|
1502
|
+
});
|
|
1503
|
+
var ccAvailableReleaseSchema = z14.object({
|
|
1504
|
+
release: z14.string(),
|
|
1505
|
+
vertexUrl: z14.string(),
|
|
1506
|
+
edgesUrl: z14.string(),
|
|
1507
|
+
vertexBytes: z14.number().int().nullable(),
|
|
1508
|
+
edgesBytes: z14.number().int().nullable(),
|
|
1509
|
+
lastModified: z14.string().nullable()
|
|
1510
|
+
});
|
|
1511
|
+
var ccCachedReleaseSchema = z14.object({
|
|
1512
|
+
release: z14.string(),
|
|
1513
|
+
syncStatus: ccReleaseSyncStatusSchema.nullable(),
|
|
1514
|
+
bytes: z14.number().int(),
|
|
1515
|
+
lastUsedAt: z14.string().nullable()
|
|
1516
|
+
});
|
|
1517
|
+
|
|
1434
1518
|
// ../api-routes/src/auth.ts
|
|
1435
1519
|
import crypto2 from "crypto";
|
|
1436
1520
|
import { eq } from "drizzle-orm";
|
|
@@ -2463,7 +2547,7 @@ async function deliverWebhook(target, payload, webhookSecret) {
|
|
|
2463
2547
|
const body = JSON.stringify(payload);
|
|
2464
2548
|
const isHttps = target.url.protocol === "https:";
|
|
2465
2549
|
const port = target.url.port ? Number(target.url.port) : isHttps ? 443 : 80;
|
|
2466
|
-
const
|
|
2550
|
+
const path16 = `${target.url.pathname}${target.url.search}`;
|
|
2467
2551
|
const headers = {
|
|
2468
2552
|
"Content-Length": String(Buffer.byteLength(body)),
|
|
2469
2553
|
"Content-Type": "application/json",
|
|
@@ -2479,7 +2563,7 @@ async function deliverWebhook(target, payload, webhookSecret) {
|
|
|
2479
2563
|
headers,
|
|
2480
2564
|
hostname: target.address,
|
|
2481
2565
|
method: "POST",
|
|
2482
|
-
path:
|
|
2566
|
+
path: path16,
|
|
2483
2567
|
port,
|
|
2484
2568
|
timeout: REQUEST_TIMEOUT_MS
|
|
2485
2569
|
};
|
|
@@ -5657,6 +5741,171 @@ var routeCatalog = [
|
|
|
5657
5741
|
200: { description: "Health history returned." },
|
|
5658
5742
|
404: { description: "Project not found." }
|
|
5659
5743
|
}
|
|
5744
|
+
},
|
|
5745
|
+
{
|
|
5746
|
+
method: "get",
|
|
5747
|
+
path: "/api/v1/backlinks/status",
|
|
5748
|
+
summary: "Get the Common Crawl DuckDB plugin install status",
|
|
5749
|
+
description: "Reports whether @duckdb/node-api is installed in the local plugin dir. Returns MISSING_DEPENDENCY (422) on deployments that cannot host the plugin (e.g. the cloud API).",
|
|
5750
|
+
tags: ["backlinks"],
|
|
5751
|
+
responses: {
|
|
5752
|
+
200: { description: "Install status returned." },
|
|
5753
|
+
422: { description: "Backlinks feature is not available on this deployment." }
|
|
5754
|
+
}
|
|
5755
|
+
},
|
|
5756
|
+
{
|
|
5757
|
+
method: "post",
|
|
5758
|
+
path: "/api/v1/backlinks/install",
|
|
5759
|
+
summary: "Install the @duckdb/node-api plugin",
|
|
5760
|
+
description: "Idempotently installs DuckDB into the canonry plugin dir. Returns MISSING_DEPENDENCY (422) when the host cannot perform the install.",
|
|
5761
|
+
tags: ["backlinks"],
|
|
5762
|
+
responses: {
|
|
5763
|
+
200: { description: "Installed (or already present)." },
|
|
5764
|
+
422: { description: "Backlinks feature is not available on this deployment." }
|
|
5765
|
+
}
|
|
5766
|
+
},
|
|
5767
|
+
{
|
|
5768
|
+
method: "post",
|
|
5769
|
+
path: "/api/v1/backlinks/syncs",
|
|
5770
|
+
summary: "Queue a workspace-wide Common Crawl release sync",
|
|
5771
|
+
description: "Creates a `cc_release_syncs` row and fires the sync callback. Idempotent: an existing in-flight row for the same release is returned.",
|
|
5772
|
+
tags: ["backlinks"],
|
|
5773
|
+
requestBody: {
|
|
5774
|
+
required: true,
|
|
5775
|
+
content: {
|
|
5776
|
+
"application/json": {
|
|
5777
|
+
schema: {
|
|
5778
|
+
type: "object",
|
|
5779
|
+
required: ["release"],
|
|
5780
|
+
properties: {
|
|
5781
|
+
release: stringSchema
|
|
5782
|
+
}
|
|
5783
|
+
}
|
|
5784
|
+
}
|
|
5785
|
+
}
|
|
5786
|
+
},
|
|
5787
|
+
responses: {
|
|
5788
|
+
200: { description: "Existing in-flight sync returned." },
|
|
5789
|
+
201: { description: "Sync queued." },
|
|
5790
|
+
400: { description: "Invalid release id." },
|
|
5791
|
+
422: { description: "Backlinks feature is not available on this deployment." }
|
|
5792
|
+
}
|
|
5793
|
+
},
|
|
5794
|
+
{
|
|
5795
|
+
method: "get",
|
|
5796
|
+
path: "/api/v1/backlinks/syncs",
|
|
5797
|
+
summary: "List Common Crawl release syncs",
|
|
5798
|
+
description: "Returns syncs ordered by updatedAt DESC \u2014 re-queued rows surface ahead of untouched newer rows.",
|
|
5799
|
+
tags: ["backlinks"],
|
|
5800
|
+
responses: {
|
|
5801
|
+
200: { description: "Sync history returned." }
|
|
5802
|
+
}
|
|
5803
|
+
},
|
|
5804
|
+
{
|
|
5805
|
+
method: "get",
|
|
5806
|
+
path: "/api/v1/backlinks/syncs/latest",
|
|
5807
|
+
summary: "Get the most recently-updated Common Crawl release sync",
|
|
5808
|
+
tags: ["backlinks"],
|
|
5809
|
+
responses: {
|
|
5810
|
+
200: { description: "Latest sync returned, or null when no sync exists." }
|
|
5811
|
+
}
|
|
5812
|
+
},
|
|
5813
|
+
{
|
|
5814
|
+
method: "get",
|
|
5815
|
+
path: "/api/v1/backlinks/releases",
|
|
5816
|
+
summary: "List cached Common Crawl releases on the local filesystem",
|
|
5817
|
+
tags: ["backlinks"],
|
|
5818
|
+
responses: {
|
|
5819
|
+
200: { description: "Cached release metadata returned." }
|
|
5820
|
+
}
|
|
5821
|
+
},
|
|
5822
|
+
{
|
|
5823
|
+
method: "delete",
|
|
5824
|
+
path: "/api/v1/backlinks/cache/{release}",
|
|
5825
|
+
summary: "Prune a cached Common Crawl release",
|
|
5826
|
+
tags: ["backlinks"],
|
|
5827
|
+
parameters: [
|
|
5828
|
+
{
|
|
5829
|
+
name: "release",
|
|
5830
|
+
in: "path",
|
|
5831
|
+
required: true,
|
|
5832
|
+
description: "Release id (e.g. cc-main-2026-jan-feb-mar).",
|
|
5833
|
+
schema: stringSchema
|
|
5834
|
+
}
|
|
5835
|
+
],
|
|
5836
|
+
responses: {
|
|
5837
|
+
200: { description: "Cache pruned." },
|
|
5838
|
+
400: { description: "Invalid release id." },
|
|
5839
|
+
422: { description: "Backlinks feature is not available on this deployment." }
|
|
5840
|
+
}
|
|
5841
|
+
},
|
|
5842
|
+
{
|
|
5843
|
+
method: "post",
|
|
5844
|
+
path: "/api/v1/projects/{name}/backlinks/extract",
|
|
5845
|
+
summary: "Extract backlinks for a single project from a cached release",
|
|
5846
|
+
description: 'Creates a `runs` row with kind="backlink-extract" and fires the extract callback. Defaults to the most recent ready release when `release` is omitted.',
|
|
5847
|
+
tags: ["backlinks"],
|
|
5848
|
+
parameters: [nameParameter],
|
|
5849
|
+
requestBody: {
|
|
5850
|
+
required: false,
|
|
5851
|
+
content: {
|
|
5852
|
+
"application/json": {
|
|
5853
|
+
schema: {
|
|
5854
|
+
type: "object",
|
|
5855
|
+
properties: {
|
|
5856
|
+
release: stringSchema
|
|
5857
|
+
}
|
|
5858
|
+
}
|
|
5859
|
+
}
|
|
5860
|
+
}
|
|
5861
|
+
},
|
|
5862
|
+
responses: {
|
|
5863
|
+
201: { description: "Extract run queued." },
|
|
5864
|
+
400: { description: "Invalid release id." },
|
|
5865
|
+
404: { description: "Project not found." },
|
|
5866
|
+
422: { description: "Backlinks feature is not available on this deployment." }
|
|
5867
|
+
}
|
|
5868
|
+
},
|
|
5869
|
+
{
|
|
5870
|
+
method: "get",
|
|
5871
|
+
path: "/api/v1/projects/{name}/backlinks/summary",
|
|
5872
|
+
summary: "Get the latest backlink summary for a project",
|
|
5873
|
+
tags: ["backlinks"],
|
|
5874
|
+
parameters: [
|
|
5875
|
+
nameParameter,
|
|
5876
|
+
{ name: "release", in: "query", description: "Release id filter.", schema: stringSchema }
|
|
5877
|
+
],
|
|
5878
|
+
responses: {
|
|
5879
|
+
200: { description: "Summary returned, or null when no backlinks exist." },
|
|
5880
|
+
404: { description: "Project not found." }
|
|
5881
|
+
}
|
|
5882
|
+
},
|
|
5883
|
+
{
|
|
5884
|
+
method: "get",
|
|
5885
|
+
path: "/api/v1/projects/{name}/backlinks/domains",
|
|
5886
|
+
summary: "Paginate backlink domains for a project",
|
|
5887
|
+
tags: ["backlinks"],
|
|
5888
|
+
parameters: [
|
|
5889
|
+
nameParameter,
|
|
5890
|
+
{ name: "release", in: "query", description: "Release id filter.", schema: stringSchema },
|
|
5891
|
+
{ name: "limit", in: "query", description: "Max results (1-500).", schema: stringSchema },
|
|
5892
|
+
{ name: "offset", in: "query", description: "Pagination offset.", schema: stringSchema }
|
|
5893
|
+
],
|
|
5894
|
+
responses: {
|
|
5895
|
+
200: { description: "Domain list returned." },
|
|
5896
|
+
404: { description: "Project not found." }
|
|
5897
|
+
}
|
|
5898
|
+
},
|
|
5899
|
+
{
|
|
5900
|
+
method: "get",
|
|
5901
|
+
path: "/api/v1/projects/{name}/backlinks/history",
|
|
5902
|
+
summary: "Get per-release backlink summaries for a project",
|
|
5903
|
+
tags: ["backlinks"],
|
|
5904
|
+
parameters: [nameParameter],
|
|
5905
|
+
responses: {
|
|
5906
|
+
200: { description: "History returned oldest-first by queriedAt." },
|
|
5907
|
+
404: { description: "Project not found." }
|
|
5908
|
+
}
|
|
5660
5909
|
}
|
|
5661
5910
|
];
|
|
5662
5911
|
var canonryLocalRouteCatalog = [
|
|
@@ -5791,8 +6040,8 @@ async function openApiRoutes(app, opts = {}) {
|
|
|
5791
6040
|
return reply.type("application/json").send(buildOpenApiDocument(opts));
|
|
5792
6041
|
});
|
|
5793
6042
|
}
|
|
5794
|
-
function buildOperationId(method,
|
|
5795
|
-
const parts =
|
|
6043
|
+
function buildOperationId(method, path16) {
|
|
6044
|
+
const parts = path16.split("/").filter(Boolean).map((part) => {
|
|
5796
6045
|
if (part.startsWith("{") && part.endsWith("}")) {
|
|
5797
6046
|
return `by-${part.slice(1, -1)}`;
|
|
5798
6047
|
}
|
|
@@ -9435,10 +9684,10 @@ function buildAuthErrorMessage(res, responseText) {
|
|
|
9435
9684
|
}
|
|
9436
9685
|
return "WordPress credentials are invalid or lack permission for this action";
|
|
9437
9686
|
}
|
|
9438
|
-
async function fetchJson(connection, siteUrl,
|
|
9687
|
+
async function fetchJson(connection, siteUrl, path16, init) {
|
|
9439
9688
|
if (siteUrl.startsWith("http:")) {
|
|
9440
9689
|
}
|
|
9441
|
-
const res = await fetch(`${normalizeSiteUrl(siteUrl)}${
|
|
9690
|
+
const res = await fetch(`${normalizeSiteUrl(siteUrl)}${path16}`, {
|
|
9442
9691
|
...init,
|
|
9443
9692
|
headers: {
|
|
9444
9693
|
"Authorization": `Basic ${encodeBasicAuth(connection.username, connection.appPassword)}`,
|
|
@@ -10920,6 +11169,566 @@ async function wordpressRoutes(app, opts) {
|
|
|
10920
11169
|
});
|
|
10921
11170
|
}
|
|
10922
11171
|
|
|
11172
|
+
// ../api-routes/src/backlinks.ts
|
|
11173
|
+
import crypto18 from "crypto";
|
|
11174
|
+
import { and as and7, asc as asc2, desc as desc8, eq as eq18, sql as sql5 } from "drizzle-orm";
|
|
11175
|
+
|
|
11176
|
+
// ../integration-commoncrawl/src/constants.ts
|
|
11177
|
+
import os3 from "os";
|
|
11178
|
+
import path3 from "path";
|
|
11179
|
+
var CC_BASE_URL = "https://data.commoncrawl.org/projects/hyperlinkgraph";
|
|
11180
|
+
var PLUGIN_DIR = path3.join(os3.homedir(), ".canonry", "plugins");
|
|
11181
|
+
var PLUGIN_PKG_JSON = path3.join(PLUGIN_DIR, "package.json");
|
|
11182
|
+
var DUCKDB_SPEC = process.env.CANONRY_DUCKDB_SPEC ?? "@duckdb/node-api@1.4.4-r.3";
|
|
11183
|
+
var CC_CACHE_DIR = process.env.CANONRY_CC_CACHE_DIR ?? path3.join(os3.homedir(), ".canonry", "cache", "commoncrawl");
|
|
11184
|
+
var RELEASE_ID_REGEX = /^cc-main-(\d{4})-(jan-feb-mar|apr-may-jun|jul-aug-sep|oct-nov-dec)$/;
|
|
11185
|
+
function ccReleasePaths(release) {
|
|
11186
|
+
const base = `${CC_BASE_URL}/${release}/domain`;
|
|
11187
|
+
const vertexFilename = `${release}-domain-vertices.txt.gz`;
|
|
11188
|
+
const edgesFilename = `${release}-domain-edges.txt.gz`;
|
|
11189
|
+
return {
|
|
11190
|
+
vertexUrl: `${base}/${vertexFilename}`,
|
|
11191
|
+
edgesUrl: `${base}/${edgesFilename}`,
|
|
11192
|
+
vertexFilename,
|
|
11193
|
+
edgesFilename
|
|
11194
|
+
};
|
|
11195
|
+
}
|
|
11196
|
+
|
|
11197
|
+
// ../integration-commoncrawl/src/reverse-domain.ts
|
|
11198
|
+
function reverseDomain(domain) {
|
|
11199
|
+
return domain.split(".").reverse().join(".");
|
|
11200
|
+
}
|
|
11201
|
+
function forwardDomain(revDomain) {
|
|
11202
|
+
return revDomain.split(".").reverse().join(".");
|
|
11203
|
+
}
|
|
11204
|
+
|
|
11205
|
+
// ../integration-commoncrawl/src/release-id.ts
|
|
11206
|
+
function isValidReleaseId(id) {
|
|
11207
|
+
return RELEASE_ID_REGEX.test(id);
|
|
11208
|
+
}
|
|
11209
|
+
|
|
11210
|
+
// ../integration-commoncrawl/src/downloader.ts
|
|
11211
|
+
import { createHash } from "crypto";
|
|
11212
|
+
import { createWriteStream } from "fs";
|
|
11213
|
+
import fs3 from "fs/promises";
|
|
11214
|
+
import path4 from "path";
|
|
11215
|
+
import { pipeline } from "stream/promises";
|
|
11216
|
+
import { Readable, Transform } from "stream";
|
|
11217
|
+
async function downloadFile(opts) {
|
|
11218
|
+
const start = Date.now();
|
|
11219
|
+
const fetchImpl = opts.fetchImpl ?? fetch;
|
|
11220
|
+
const sidecarPath = `${opts.destPath}.sha256`;
|
|
11221
|
+
try {
|
|
11222
|
+
const stat = await fs3.stat(opts.destPath);
|
|
11223
|
+
const sidecar = await readSidecar(sidecarPath);
|
|
11224
|
+
const sha2562 = sidecar ?? await hashFile(opts.destPath);
|
|
11225
|
+
if (!sidecar) await writeSidecar(sidecarPath, sha2562);
|
|
11226
|
+
return { bytes: stat.size, sha256: sha2562, cached: true, elapsedMs: Date.now() - start };
|
|
11227
|
+
} catch {
|
|
11228
|
+
}
|
|
11229
|
+
const partialPath = `${opts.destPath}.partial`;
|
|
11230
|
+
await fs3.mkdir(path4.dirname(opts.destPath), { recursive: true });
|
|
11231
|
+
await unlinkIfExists(partialPath);
|
|
11232
|
+
const res = await fetchImpl(opts.url);
|
|
11233
|
+
if (!res.ok || !res.body) {
|
|
11234
|
+
throw new Error(`HTTP ${res.status} ${res.statusText} for ${opts.url}`);
|
|
11235
|
+
}
|
|
11236
|
+
const total = parseContentLength(res.headers.get("content-length"));
|
|
11237
|
+
const hasher = createHash("sha256");
|
|
11238
|
+
let bytes = 0;
|
|
11239
|
+
const hashAndCount = new Transform({
|
|
11240
|
+
transform(chunk, _enc, cb) {
|
|
11241
|
+
hasher.update(chunk);
|
|
11242
|
+
bytes += chunk.length;
|
|
11243
|
+
opts.onProgress?.(bytes, total);
|
|
11244
|
+
cb(null, chunk);
|
|
11245
|
+
}
|
|
11246
|
+
});
|
|
11247
|
+
await pipeline(
|
|
11248
|
+
Readable.fromWeb(res.body),
|
|
11249
|
+
hashAndCount,
|
|
11250
|
+
createWriteStream(partialPath)
|
|
11251
|
+
);
|
|
11252
|
+
const sha256 = hasher.digest("hex");
|
|
11253
|
+
await fs3.rename(partialPath, opts.destPath);
|
|
11254
|
+
await writeSidecar(sidecarPath, sha256);
|
|
11255
|
+
return { bytes, sha256, cached: false, elapsedMs: Date.now() - start };
|
|
11256
|
+
}
|
|
11257
|
+
async function hashFile(filePath) {
|
|
11258
|
+
const hasher = createHash("sha256");
|
|
11259
|
+
const handle = await fs3.open(filePath, "r");
|
|
11260
|
+
try {
|
|
11261
|
+
const stream = handle.createReadStream();
|
|
11262
|
+
for await (const chunk of stream) hasher.update(chunk);
|
|
11263
|
+
} finally {
|
|
11264
|
+
await handle.close();
|
|
11265
|
+
}
|
|
11266
|
+
return hasher.digest("hex");
|
|
11267
|
+
}
|
|
11268
|
+
async function readSidecar(sidecarPath) {
|
|
11269
|
+
try {
|
|
11270
|
+
const raw = await fs3.readFile(sidecarPath, "utf8");
|
|
11271
|
+
const trimmed = raw.trim();
|
|
11272
|
+
return /^[0-9a-f]{64}$/i.test(trimmed) ? trimmed.toLowerCase() : null;
|
|
11273
|
+
} catch {
|
|
11274
|
+
return null;
|
|
11275
|
+
}
|
|
11276
|
+
}
|
|
11277
|
+
async function writeSidecar(sidecarPath, sha256) {
|
|
11278
|
+
await fs3.writeFile(sidecarPath, `${sha256}
|
|
11279
|
+
`);
|
|
11280
|
+
}
|
|
11281
|
+
async function unlinkIfExists(p) {
|
|
11282
|
+
try {
|
|
11283
|
+
await fs3.unlink(p);
|
|
11284
|
+
} catch {
|
|
11285
|
+
}
|
|
11286
|
+
}
|
|
11287
|
+
function parseContentLength(value) {
|
|
11288
|
+
if (!value) return null;
|
|
11289
|
+
const n = Number.parseInt(value, 10);
|
|
11290
|
+
return Number.isFinite(n) ? n : null;
|
|
11291
|
+
}
|
|
11292
|
+
|
|
11293
|
+
// ../integration-commoncrawl/src/plugin-resolver.ts
|
|
11294
|
+
import fs4 from "fs";
|
|
11295
|
+
import { createRequire as createRequire2 } from "module";
|
|
11296
|
+
import path5 from "path";
|
|
11297
|
+
function pluginDirFor(pkgJson) {
|
|
11298
|
+
return path5.dirname(pkgJson);
|
|
11299
|
+
}
|
|
11300
|
+
function duckdbPkgJsonFor(pluginDir) {
|
|
11301
|
+
return path5.join(pluginDir, "node_modules", "@duckdb", "node-api", "package.json");
|
|
11302
|
+
}
|
|
11303
|
+
function loadDuckdb(opts = {}) {
|
|
11304
|
+
const pkgJson = opts.pluginPkgJson ?? PLUGIN_PKG_JSON;
|
|
11305
|
+
const pluginDir = pluginDirFor(pkgJson);
|
|
11306
|
+
const duckdbPkg = duckdbPkgJsonFor(pluginDir);
|
|
11307
|
+
if (!fs4.existsSync(duckdbPkg)) {
|
|
11308
|
+
throw missingDependency(
|
|
11309
|
+
"@duckdb/node-api is not installed. Run `canonry backlinks install` to enable the backlinks feature.",
|
|
11310
|
+
{ pluginDir }
|
|
11311
|
+
);
|
|
11312
|
+
}
|
|
11313
|
+
try {
|
|
11314
|
+
const pluginRequire = createRequire2(duckdbPkg);
|
|
11315
|
+
return pluginRequire("@duckdb/node-api");
|
|
11316
|
+
} catch {
|
|
11317
|
+
throw missingDependency(
|
|
11318
|
+
"@duckdb/node-api is installed but failed to load. Re-run `canonry backlinks install`.",
|
|
11319
|
+
{ pluginDir }
|
|
11320
|
+
);
|
|
11321
|
+
}
|
|
11322
|
+
}
|
|
11323
|
+
function isDuckdbInstalled(opts = {}) {
|
|
11324
|
+
const pkgJson = opts.pluginPkgJson ?? PLUGIN_PKG_JSON;
|
|
11325
|
+
return fs4.existsSync(duckdbPkgJsonFor(pluginDirFor(pkgJson)));
|
|
11326
|
+
}
|
|
11327
|
+
function readInstalledVersion(opts = {}) {
|
|
11328
|
+
const pluginDir = opts.pluginPkgJson ? pluginDirFor(opts.pluginPkgJson) : PLUGIN_DIR;
|
|
11329
|
+
try {
|
|
11330
|
+
const raw = fs4.readFileSync(duckdbPkgJsonFor(pluginDir), "utf8");
|
|
11331
|
+
const pkg = JSON.parse(raw);
|
|
11332
|
+
return pkg.version ?? null;
|
|
11333
|
+
} catch {
|
|
11334
|
+
return null;
|
|
11335
|
+
}
|
|
11336
|
+
}
|
|
11337
|
+
|
|
11338
|
+
// ../integration-commoncrawl/src/plugin-installer.ts
|
|
11339
|
+
import { spawn } from "child_process";
|
|
11340
|
+
import fs5 from "fs/promises";
|
|
11341
|
+
import path6 from "path";
|
|
11342
|
+
async function installDuckdb(opts = {}) {
|
|
11343
|
+
const pluginDir = opts.pluginDir ?? PLUGIN_DIR;
|
|
11344
|
+
const pluginPkgJson = path6.join(pluginDir, "package.json");
|
|
11345
|
+
const spec = opts.spec ?? DUCKDB_SPEC;
|
|
11346
|
+
const pkgManager = opts.packageManager ?? "npm";
|
|
11347
|
+
await ensurePluginDir(pluginDir, pluginPkgJson);
|
|
11348
|
+
if (isDuckdbInstalled({ pluginPkgJson })) {
|
|
11349
|
+
const version2 = readInstalledVersion({ pluginPkgJson }) ?? "unknown";
|
|
11350
|
+
return { alreadyPresent: true, version: version2, path: pluginDir };
|
|
11351
|
+
}
|
|
11352
|
+
await runInstall(pkgManager, spec, pluginDir, opts.onLog);
|
|
11353
|
+
if (!isDuckdbInstalled({ pluginPkgJson })) {
|
|
11354
|
+
throw new Error(`${pkgManager} install completed but @duckdb/node-api still cannot be resolved from ${pluginDir}`);
|
|
11355
|
+
}
|
|
11356
|
+
const version = readInstalledVersion({ pluginPkgJson }) ?? "unknown";
|
|
11357
|
+
return { alreadyPresent: false, version, path: pluginDir };
|
|
11358
|
+
}
|
|
11359
|
+
async function ensurePluginDir(pluginDir = PLUGIN_DIR, pluginPkgJson = PLUGIN_PKG_JSON) {
|
|
11360
|
+
await fs5.mkdir(pluginDir, { recursive: true });
|
|
11361
|
+
try {
|
|
11362
|
+
await fs5.access(pluginPkgJson);
|
|
11363
|
+
} catch {
|
|
11364
|
+
const contents = JSON.stringify({ name: "canonry-plugins", private: true, dependencies: {} }, null, 2);
|
|
11365
|
+
await fs5.writeFile(pluginPkgJson, `${contents}
|
|
11366
|
+
`);
|
|
11367
|
+
}
|
|
11368
|
+
}
|
|
11369
|
+
async function runInstall(pkgManager, spec, pluginDir, onLog) {
|
|
11370
|
+
const args = pkgManager === "pnpm" ? ["add", spec, "--dir", pluginDir] : ["install", spec, "--prefix", pluginDir];
|
|
11371
|
+
await new Promise((resolve, reject) => {
|
|
11372
|
+
const child = spawn(pkgManager, args, {
|
|
11373
|
+
stdio: onLog ? ["ignore", "pipe", "pipe"] : "inherit"
|
|
11374
|
+
});
|
|
11375
|
+
if (onLog) {
|
|
11376
|
+
child.stdout?.setEncoding("utf8");
|
|
11377
|
+
child.stderr?.setEncoding("utf8");
|
|
11378
|
+
child.stdout?.on("data", (chunk) => {
|
|
11379
|
+
for (const line of chunk.split(/\r?\n/)) {
|
|
11380
|
+
if (line.length > 0) onLog(line);
|
|
11381
|
+
}
|
|
11382
|
+
});
|
|
11383
|
+
child.stderr?.on("data", (chunk) => {
|
|
11384
|
+
for (const line of chunk.split(/\r?\n/)) {
|
|
11385
|
+
if (line.length > 0) onLog(line);
|
|
11386
|
+
}
|
|
11387
|
+
});
|
|
11388
|
+
}
|
|
11389
|
+
child.on("error", reject);
|
|
11390
|
+
child.on("exit", (code) => {
|
|
11391
|
+
if (code === 0) resolve();
|
|
11392
|
+
else reject(new Error(`${pkgManager} install exited with code ${code}`));
|
|
11393
|
+
});
|
|
11394
|
+
});
|
|
11395
|
+
}
|
|
11396
|
+
|
|
11397
|
+
// ../integration-commoncrawl/src/duckdb-query.ts
|
|
11398
|
+
async function queryBacklinks(opts) {
|
|
11399
|
+
if (opts.targets.length === 0) return [];
|
|
11400
|
+
const duckdb = opts.duckdb;
|
|
11401
|
+
const reversed = opts.targets.map(reverseDomain);
|
|
11402
|
+
const targetList = reversed.map(quote).join(", ");
|
|
11403
|
+
const limitClause = opts.limitPerTarget ? `QUALIFY row_number() OVER (PARTITION BY t.target_rev_domain ORDER BY v.num_hosts DESC) <= ${Math.floor(opts.limitPerTarget)}` : "";
|
|
11404
|
+
const sql10 = `
|
|
11405
|
+
WITH vertices AS (
|
|
11406
|
+
SELECT * FROM read_csv(
|
|
11407
|
+
${quote(opts.vertexPath)},
|
|
11408
|
+
delim=' ', header=false,
|
|
11409
|
+
columns={'id':'BIGINT','rev_domain':'VARCHAR','num_hosts':'BIGINT'}
|
|
11410
|
+
)
|
|
11411
|
+
),
|
|
11412
|
+
targets AS (
|
|
11413
|
+
SELECT v.id AS target_id, v.rev_domain AS target_rev_domain
|
|
11414
|
+
FROM vertices v
|
|
11415
|
+
WHERE v.rev_domain IN (${targetList})
|
|
11416
|
+
),
|
|
11417
|
+
inbound AS (
|
|
11418
|
+
SELECT e.from_id, e.to_id
|
|
11419
|
+
FROM read_csv(
|
|
11420
|
+
${quote(opts.edgesPath)},
|
|
11421
|
+
delim=' ', header=false,
|
|
11422
|
+
columns={'from_id':'BIGINT','to_id':'BIGINT'}
|
|
11423
|
+
) e
|
|
11424
|
+
WHERE e.to_id IN (SELECT target_id FROM targets)
|
|
11425
|
+
)
|
|
11426
|
+
SELECT
|
|
11427
|
+
t.target_rev_domain,
|
|
11428
|
+
v.rev_domain AS linking_rev_domain,
|
|
11429
|
+
v.num_hosts
|
|
11430
|
+
FROM inbound i
|
|
11431
|
+
JOIN targets t ON t.target_id = i.to_id
|
|
11432
|
+
JOIN vertices v ON v.id = i.from_id
|
|
11433
|
+
${limitClause}
|
|
11434
|
+
ORDER BY t.target_rev_domain, v.num_hosts DESC
|
|
11435
|
+
`;
|
|
11436
|
+
const instance = await duckdb.DuckDBInstance.create(":memory:");
|
|
11437
|
+
const conn = await instance.connect();
|
|
11438
|
+
let rows;
|
|
11439
|
+
try {
|
|
11440
|
+
const reader = await conn.runAndReadAll(sql10);
|
|
11441
|
+
rows = reader.getRowObjects();
|
|
11442
|
+
} finally {
|
|
11443
|
+
conn.disconnectSync?.();
|
|
11444
|
+
conn.closeSync?.();
|
|
11445
|
+
instance.closeSync?.();
|
|
11446
|
+
}
|
|
11447
|
+
return rows.map((r) => ({
|
|
11448
|
+
targetDomain: forwardDomain(String(r["target_rev_domain"])),
|
|
11449
|
+
linkingDomain: forwardDomain(String(r["linking_rev_domain"])),
|
|
11450
|
+
numHosts: Number(r["num_hosts"])
|
|
11451
|
+
}));
|
|
11452
|
+
}
|
|
11453
|
+
function quote(s) {
|
|
11454
|
+
return `'${s.replace(/'/g, "''")}'`;
|
|
11455
|
+
}
|
|
11456
|
+
|
|
11457
|
+
// ../integration-commoncrawl/src/cache.ts
|
|
11458
|
+
import fs6 from "fs";
|
|
11459
|
+
import path7 from "path";
|
|
11460
|
+
function cacheRoot(opts = {}) {
|
|
11461
|
+
return opts.cacheDir ?? CC_CACHE_DIR;
|
|
11462
|
+
}
|
|
11463
|
+
function directoryBytesAndLastUsed(dir) {
|
|
11464
|
+
let bytes = 0;
|
|
11465
|
+
let latestMtimeMs = 0;
|
|
11466
|
+
const walk = (p) => {
|
|
11467
|
+
let stat;
|
|
11468
|
+
try {
|
|
11469
|
+
stat = fs6.statSync(p);
|
|
11470
|
+
} catch {
|
|
11471
|
+
return;
|
|
11472
|
+
}
|
|
11473
|
+
if (stat.isDirectory()) {
|
|
11474
|
+
let entries;
|
|
11475
|
+
try {
|
|
11476
|
+
entries = fs6.readdirSync(p);
|
|
11477
|
+
} catch {
|
|
11478
|
+
return;
|
|
11479
|
+
}
|
|
11480
|
+
for (const e of entries) walk(path7.join(p, e));
|
|
11481
|
+
} else if (stat.isFile()) {
|
|
11482
|
+
bytes += stat.size;
|
|
11483
|
+
const mtime = Math.max(stat.mtimeMs, stat.atimeMs);
|
|
11484
|
+
if (mtime > latestMtimeMs) latestMtimeMs = mtime;
|
|
11485
|
+
}
|
|
11486
|
+
};
|
|
11487
|
+
walk(dir);
|
|
11488
|
+
return {
|
|
11489
|
+
bytes,
|
|
11490
|
+
lastUsedAt: latestMtimeMs > 0 ? new Date(latestMtimeMs).toISOString() : null
|
|
11491
|
+
};
|
|
11492
|
+
}
|
|
11493
|
+
function listCachedReleases(opts = {}) {
|
|
11494
|
+
const root = cacheRoot(opts);
|
|
11495
|
+
if (!fs6.existsSync(root)) return [];
|
|
11496
|
+
const entries = fs6.readdirSync(root, { withFileTypes: true });
|
|
11497
|
+
const result = [];
|
|
11498
|
+
for (const entry of entries) {
|
|
11499
|
+
if (!entry.isDirectory()) continue;
|
|
11500
|
+
if (!RELEASE_ID_REGEX.test(entry.name)) continue;
|
|
11501
|
+
const dir = path7.join(root, entry.name);
|
|
11502
|
+
const stats = directoryBytesAndLastUsed(dir);
|
|
11503
|
+
result.push({ release: entry.name, bytes: stats.bytes, lastUsedAt: stats.lastUsedAt });
|
|
11504
|
+
}
|
|
11505
|
+
result.sort((a, b) => (b.lastUsedAt ?? "").localeCompare(a.lastUsedAt ?? ""));
|
|
11506
|
+
return result;
|
|
11507
|
+
}
|
|
11508
|
+
function pruneCachedRelease(release, opts = {}) {
|
|
11509
|
+
if (!RELEASE_ID_REGEX.test(release)) {
|
|
11510
|
+
throw new Error(`Invalid release id: ${release}`);
|
|
11511
|
+
}
|
|
11512
|
+
const dir = path7.join(cacheRoot(opts), release);
|
|
11513
|
+
fs6.rmSync(dir, { recursive: true, force: true });
|
|
11514
|
+
}
|
|
11515
|
+
|
|
11516
|
+
// ../api-routes/src/backlinks.ts
|
|
11517
|
+
var BACKLINKS_UNSUPPORTED_MESSAGE = "Backlinks sync and install are only available from a local canonry install. Run `canonry backlinks install` locally to use this feature.";
|
|
11518
|
+
var NON_TERMINAL_SYNC_STATUSES = /* @__PURE__ */ new Set([
|
|
11519
|
+
CcReleaseSyncStatuses.queued,
|
|
11520
|
+
CcReleaseSyncStatuses.downloading,
|
|
11521
|
+
CcReleaseSyncStatuses.querying
|
|
11522
|
+
]);
|
|
11523
|
+
function mapSyncRow(row) {
|
|
11524
|
+
return {
|
|
11525
|
+
id: row.id,
|
|
11526
|
+
release: row.release,
|
|
11527
|
+
status: row.status,
|
|
11528
|
+
phaseDetail: row.phaseDetail ?? null,
|
|
11529
|
+
vertexPath: row.vertexPath ?? null,
|
|
11530
|
+
edgesPath: row.edgesPath ?? null,
|
|
11531
|
+
vertexSha256: row.vertexSha256 ?? null,
|
|
11532
|
+
edgesSha256: row.edgesSha256 ?? null,
|
|
11533
|
+
vertexBytes: row.vertexBytes ?? null,
|
|
11534
|
+
edgesBytes: row.edgesBytes ?? null,
|
|
11535
|
+
projectsProcessed: row.projectsProcessed ?? null,
|
|
11536
|
+
domainsDiscovered: row.domainsDiscovered ?? null,
|
|
11537
|
+
downloadStartedAt: row.downloadStartedAt ?? null,
|
|
11538
|
+
downloadFinishedAt: row.downloadFinishedAt ?? null,
|
|
11539
|
+
queryStartedAt: row.queryStartedAt ?? null,
|
|
11540
|
+
queryFinishedAt: row.queryFinishedAt ?? null,
|
|
11541
|
+
error: row.error ?? null,
|
|
11542
|
+
createdAt: row.createdAt,
|
|
11543
|
+
updatedAt: row.updatedAt
|
|
11544
|
+
};
|
|
11545
|
+
}
|
|
11546
|
+
function mapSummaryRow(row) {
|
|
11547
|
+
return {
|
|
11548
|
+
projectId: row.projectId,
|
|
11549
|
+
release: row.release,
|
|
11550
|
+
targetDomain: row.targetDomain,
|
|
11551
|
+
totalLinkingDomains: row.totalLinkingDomains,
|
|
11552
|
+
totalHosts: row.totalHosts,
|
|
11553
|
+
top10HostsShare: row.top10HostsShare,
|
|
11554
|
+
queriedAt: row.queriedAt
|
|
11555
|
+
};
|
|
11556
|
+
}
|
|
11557
|
+
function mapRunRow(row) {
|
|
11558
|
+
return {
|
|
11559
|
+
id: row.id,
|
|
11560
|
+
projectId: row.projectId,
|
|
11561
|
+
kind: row.kind,
|
|
11562
|
+
status: row.status,
|
|
11563
|
+
trigger: row.trigger,
|
|
11564
|
+
location: row.location ?? null,
|
|
11565
|
+
startedAt: row.startedAt ?? null,
|
|
11566
|
+
finishedAt: row.finishedAt ?? null,
|
|
11567
|
+
error: row.error ?? null,
|
|
11568
|
+
createdAt: row.createdAt
|
|
11569
|
+
};
|
|
11570
|
+
}
|
|
11571
|
+
function latestSummaryForProject(db, projectId, release) {
|
|
11572
|
+
const condition = release ? and7(eq18(backlinkSummaries.projectId, projectId), eq18(backlinkSummaries.release, release)) : eq18(backlinkSummaries.projectId, projectId);
|
|
11573
|
+
return db.select().from(backlinkSummaries).where(condition).orderBy(desc8(backlinkSummaries.queriedAt)).limit(1).get();
|
|
11574
|
+
}
|
|
11575
|
+
async function backlinksRoutes(app, opts) {
|
|
11576
|
+
app.get("/backlinks/status", async (_request, reply) => {
|
|
11577
|
+
if (!opts.getBacklinksStatus) {
|
|
11578
|
+
throw missingDependency(BACKLINKS_UNSUPPORTED_MESSAGE);
|
|
11579
|
+
}
|
|
11580
|
+
return reply.send(opts.getBacklinksStatus());
|
|
11581
|
+
});
|
|
11582
|
+
app.post("/backlinks/install", async (_request, reply) => {
|
|
11583
|
+
if (!opts.onInstallBacklinks) {
|
|
11584
|
+
throw missingDependency(BACKLINKS_UNSUPPORTED_MESSAGE);
|
|
11585
|
+
}
|
|
11586
|
+
const result = await opts.onInstallBacklinks();
|
|
11587
|
+
return reply.status(200).send(result);
|
|
11588
|
+
});
|
|
11589
|
+
app.post("/backlinks/syncs", async (request, reply) => {
|
|
11590
|
+
const release = request.body?.release;
|
|
11591
|
+
if (!release || !isValidReleaseId(release)) {
|
|
11592
|
+
throw validationError("Invalid release id. Expected form: cc-main-YYYY-{jan-feb-mar,apr-may-jun,jul-aug-sep,oct-nov-dec}");
|
|
11593
|
+
}
|
|
11594
|
+
if (!opts.getBacklinksStatus || !opts.onReleaseSyncRequested) {
|
|
11595
|
+
throw missingDependency(BACKLINKS_UNSUPPORTED_MESSAGE);
|
|
11596
|
+
}
|
|
11597
|
+
if (!opts.getBacklinksStatus().duckdbInstalled) {
|
|
11598
|
+
throw missingDependency(
|
|
11599
|
+
"@duckdb/node-api is not installed. Run `canonry backlinks install` to enable the backlinks feature."
|
|
11600
|
+
);
|
|
11601
|
+
}
|
|
11602
|
+
const existing = app.db.select().from(ccReleaseSyncs).where(eq18(ccReleaseSyncs.release, release)).get();
|
|
11603
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
11604
|
+
if (existing) {
|
|
11605
|
+
if (NON_TERMINAL_SYNC_STATUSES.has(existing.status)) {
|
|
11606
|
+
return reply.status(200).send(mapSyncRow(existing));
|
|
11607
|
+
}
|
|
11608
|
+
app.db.update(ccReleaseSyncs).set({
|
|
11609
|
+
status: CcReleaseSyncStatuses.queued,
|
|
11610
|
+
phaseDetail: null,
|
|
11611
|
+
error: null,
|
|
11612
|
+
updatedAt: now
|
|
11613
|
+
}).where(eq18(ccReleaseSyncs.id, existing.id)).run();
|
|
11614
|
+
opts.onReleaseSyncRequested(existing.id, release);
|
|
11615
|
+
const refreshed = app.db.select().from(ccReleaseSyncs).where(eq18(ccReleaseSyncs.id, existing.id)).get();
|
|
11616
|
+
return reply.status(200).send(mapSyncRow(refreshed));
|
|
11617
|
+
}
|
|
11618
|
+
const id = crypto18.randomUUID();
|
|
11619
|
+
app.db.insert(ccReleaseSyncs).values({
|
|
11620
|
+
id,
|
|
11621
|
+
release,
|
|
11622
|
+
status: CcReleaseSyncStatuses.queued,
|
|
11623
|
+
createdAt: now,
|
|
11624
|
+
updatedAt: now
|
|
11625
|
+
}).run();
|
|
11626
|
+
opts.onReleaseSyncRequested(id, release);
|
|
11627
|
+
const inserted = app.db.select().from(ccReleaseSyncs).where(eq18(ccReleaseSyncs.id, id)).get();
|
|
11628
|
+
return reply.status(201).send(mapSyncRow(inserted));
|
|
11629
|
+
});
|
|
11630
|
+
app.get("/backlinks/syncs/latest", async (_request, reply) => {
|
|
11631
|
+
const row = app.db.select().from(ccReleaseSyncs).orderBy(desc8(ccReleaseSyncs.updatedAt)).limit(1).get();
|
|
11632
|
+
return reply.send(row ? mapSyncRow(row) : null);
|
|
11633
|
+
});
|
|
11634
|
+
app.get("/backlinks/syncs", async (_request, reply) => {
|
|
11635
|
+
const rows = app.db.select().from(ccReleaseSyncs).orderBy(desc8(ccReleaseSyncs.updatedAt)).all();
|
|
11636
|
+
return reply.send(rows.map(mapSyncRow));
|
|
11637
|
+
});
|
|
11638
|
+
app.get("/backlinks/releases", async (_request, reply) => {
|
|
11639
|
+
const releases = opts.listCachedReleases?.() ?? [];
|
|
11640
|
+
return reply.send(releases);
|
|
11641
|
+
});
|
|
11642
|
+
app.delete("/backlinks/cache/:release", async (request, reply) => {
|
|
11643
|
+
const release = request.params.release;
|
|
11644
|
+
if (!isValidReleaseId(release)) {
|
|
11645
|
+
throw validationError("Invalid release id");
|
|
11646
|
+
}
|
|
11647
|
+
if (!opts.onBacklinksPruneCache) {
|
|
11648
|
+
throw missingDependency(BACKLINKS_UNSUPPORTED_MESSAGE);
|
|
11649
|
+
}
|
|
11650
|
+
opts.onBacklinksPruneCache(release);
|
|
11651
|
+
return reply.send({ ok: true });
|
|
11652
|
+
});
|
|
11653
|
+
app.post("/projects/:name/backlinks/extract", async (request, reply) => {
|
|
11654
|
+
const project = resolveProject(app.db, request.params.name);
|
|
11655
|
+
if (!opts.getBacklinksStatus || !opts.onBacklinkExtractRequested) {
|
|
11656
|
+
throw missingDependency(BACKLINKS_UNSUPPORTED_MESSAGE);
|
|
11657
|
+
}
|
|
11658
|
+
if (!opts.getBacklinksStatus().duckdbInstalled) {
|
|
11659
|
+
throw missingDependency(
|
|
11660
|
+
"@duckdb/node-api is not installed. Run `canonry backlinks install` to enable the backlinks feature."
|
|
11661
|
+
);
|
|
11662
|
+
}
|
|
11663
|
+
const release = request.body?.release;
|
|
11664
|
+
if (release !== void 0 && !isValidReleaseId(release)) {
|
|
11665
|
+
throw validationError("Invalid release id");
|
|
11666
|
+
}
|
|
11667
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
11668
|
+
const runId = crypto18.randomUUID();
|
|
11669
|
+
app.db.insert(runs).values({
|
|
11670
|
+
id: runId,
|
|
11671
|
+
projectId: project.id,
|
|
11672
|
+
kind: RunKinds["backlink-extract"],
|
|
11673
|
+
status: RunStatuses.queued,
|
|
11674
|
+
trigger: RunTriggers.manual,
|
|
11675
|
+
createdAt: now
|
|
11676
|
+
}).run();
|
|
11677
|
+
opts.onBacklinkExtractRequested(runId, project.id, release);
|
|
11678
|
+
const run = app.db.select().from(runs).where(eq18(runs.id, runId)).get();
|
|
11679
|
+
return reply.status(201).send(mapRunRow(run));
|
|
11680
|
+
});
|
|
11681
|
+
app.get(
|
|
11682
|
+
"/projects/:name/backlinks/summary",
|
|
11683
|
+
async (request, reply) => {
|
|
11684
|
+
const project = resolveProject(app.db, request.params.name);
|
|
11685
|
+
const row = latestSummaryForProject(app.db, project.id, request.query.release);
|
|
11686
|
+
return reply.send(row ? mapSummaryRow(row) : null);
|
|
11687
|
+
}
|
|
11688
|
+
);
|
|
11689
|
+
app.get("/projects/:name/backlinks/domains", async (request, reply) => {
|
|
11690
|
+
const project = resolveProject(app.db, request.params.name);
|
|
11691
|
+
const summaryRow = latestSummaryForProject(app.db, project.id, request.query.release);
|
|
11692
|
+
const targetRelease = request.query.release ?? summaryRow?.release;
|
|
11693
|
+
if (!targetRelease) {
|
|
11694
|
+
const response2 = { summary: null, total: 0, rows: [] };
|
|
11695
|
+
return reply.send(response2);
|
|
11696
|
+
}
|
|
11697
|
+
const limit = Math.min(Math.max(parseInt(request.query.limit ?? "50", 10) || 50, 1), 500);
|
|
11698
|
+
const offset = Math.max(parseInt(request.query.offset ?? "0", 10) || 0, 0);
|
|
11699
|
+
const domainCondition = and7(
|
|
11700
|
+
eq18(backlinkDomains.projectId, project.id),
|
|
11701
|
+
eq18(backlinkDomains.release, targetRelease)
|
|
11702
|
+
);
|
|
11703
|
+
const totalRow = app.db.select({ count: sql5`count(*)` }).from(backlinkDomains).where(domainCondition).get();
|
|
11704
|
+
const rows = app.db.select({
|
|
11705
|
+
linkingDomain: backlinkDomains.linkingDomain,
|
|
11706
|
+
numHosts: backlinkDomains.numHosts
|
|
11707
|
+
}).from(backlinkDomains).where(domainCondition).orderBy(desc8(backlinkDomains.numHosts)).limit(limit).offset(offset).all();
|
|
11708
|
+
const response = {
|
|
11709
|
+
summary: summaryRow ? mapSummaryRow(summaryRow) : null,
|
|
11710
|
+
total: Number(totalRow?.count ?? 0),
|
|
11711
|
+
rows
|
|
11712
|
+
};
|
|
11713
|
+
return reply.send(response);
|
|
11714
|
+
});
|
|
11715
|
+
app.get(
|
|
11716
|
+
"/projects/:name/backlinks/history",
|
|
11717
|
+
async (request, reply) => {
|
|
11718
|
+
const project = resolveProject(app.db, request.params.name);
|
|
11719
|
+
const rows = app.db.select().from(backlinkSummaries).where(eq18(backlinkSummaries.projectId, project.id)).orderBy(asc2(backlinkSummaries.queriedAt)).all();
|
|
11720
|
+
const response = rows.map((r) => ({
|
|
11721
|
+
release: r.release,
|
|
11722
|
+
totalLinkingDomains: r.totalLinkingDomains,
|
|
11723
|
+
totalHosts: r.totalHosts,
|
|
11724
|
+
top10HostsShare: r.top10HostsShare,
|
|
11725
|
+
queriedAt: r.queriedAt
|
|
11726
|
+
}));
|
|
11727
|
+
return reply.send(response);
|
|
11728
|
+
}
|
|
11729
|
+
);
|
|
11730
|
+
}
|
|
11731
|
+
|
|
10923
11732
|
// ../api-routes/src/index.ts
|
|
10924
11733
|
async function apiRoutes(app, opts) {
|
|
10925
11734
|
app.decorate("db", opts.db);
|
|
@@ -11028,6 +11837,14 @@ async function apiRoutes(app, opts) {
|
|
|
11028
11837
|
googleConnectionStore: opts.googleConnectionStore,
|
|
11029
11838
|
getGoogleAuthConfig: opts.getGoogleAuthConfig
|
|
11030
11839
|
});
|
|
11840
|
+
await api.register(backlinksRoutes, {
|
|
11841
|
+
getBacklinksStatus: opts.getBacklinksStatus,
|
|
11842
|
+
onInstallBacklinks: opts.onInstallBacklinks,
|
|
11843
|
+
onReleaseSyncRequested: opts.onReleaseSyncRequested,
|
|
11844
|
+
onBacklinkExtractRequested: opts.onBacklinkExtractRequested,
|
|
11845
|
+
onBacklinksPruneCache: opts.onBacklinksPruneCache,
|
|
11846
|
+
listCachedReleases: opts.listCachedReleases
|
|
11847
|
+
});
|
|
11031
11848
|
if (opts.registerAuthenticatedRoutes) {
|
|
11032
11849
|
await opts.registerAuthenticatedRoutes(api);
|
|
11033
11850
|
}
|
|
@@ -11035,7 +11852,7 @@ async function apiRoutes(app, opts) {
|
|
|
11035
11852
|
}
|
|
11036
11853
|
|
|
11037
11854
|
// src/server.ts
|
|
11038
|
-
import
|
|
11855
|
+
import os6 from "os";
|
|
11039
11856
|
|
|
11040
11857
|
// ../provider-gemini/src/normalize.ts
|
|
11041
11858
|
import { GoogleGenAI } from "@google/genai";
|
|
@@ -12423,8 +13240,8 @@ var localAdapter = {
|
|
|
12423
13240
|
};
|
|
12424
13241
|
|
|
12425
13242
|
// ../provider-cdp/src/adapter.ts
|
|
12426
|
-
import
|
|
12427
|
-
import
|
|
13243
|
+
import path9 from "path";
|
|
13244
|
+
import os4 from "os";
|
|
12428
13245
|
|
|
12429
13246
|
// ../provider-cdp/src/connection.ts
|
|
12430
13247
|
import CDP from "chrome-remote-interface";
|
|
@@ -12788,12 +13605,12 @@ function sleep2(ms) {
|
|
|
12788
13605
|
}
|
|
12789
13606
|
|
|
12790
13607
|
// ../provider-cdp/src/screenshot.ts
|
|
12791
|
-
import
|
|
12792
|
-
import
|
|
13608
|
+
import fs7 from "fs";
|
|
13609
|
+
import path8 from "path";
|
|
12793
13610
|
async function captureElementScreenshot(client, selector, outputPath) {
|
|
12794
|
-
const dir =
|
|
12795
|
-
if (!
|
|
12796
|
-
|
|
13611
|
+
const dir = path8.dirname(outputPath);
|
|
13612
|
+
if (!fs7.existsSync(dir)) {
|
|
13613
|
+
fs7.mkdirSync(dir, { recursive: true });
|
|
12797
13614
|
}
|
|
12798
13615
|
let clip;
|
|
12799
13616
|
try {
|
|
@@ -12827,7 +13644,7 @@ async function captureElementScreenshot(client, selector, outputPath) {
|
|
|
12827
13644
|
}
|
|
12828
13645
|
const { data } = await client.Page.captureScreenshot(screenshotParams);
|
|
12829
13646
|
const buffer = Buffer.from(data, "base64");
|
|
12830
|
-
|
|
13647
|
+
fs7.writeFileSync(outputPath, buffer);
|
|
12831
13648
|
return outputPath;
|
|
12832
13649
|
}
|
|
12833
13650
|
|
|
@@ -12888,7 +13705,7 @@ function getConnection(config) {
|
|
|
12888
13705
|
return conn;
|
|
12889
13706
|
}
|
|
12890
13707
|
function getScreenshotDir2() {
|
|
12891
|
-
return
|
|
13708
|
+
return path9.join(os4.homedir(), ".canonry", "screenshots");
|
|
12892
13709
|
}
|
|
12893
13710
|
var cdpChatgptAdapter = {
|
|
12894
13711
|
name: "cdp:chatgpt",
|
|
@@ -12952,7 +13769,7 @@ var cdpChatgptAdapter = {
|
|
|
12952
13769
|
const answerText = await target.extractAnswer(client);
|
|
12953
13770
|
const groundingSources = await target.extractCitations(client);
|
|
12954
13771
|
const screenshotId = `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
12955
|
-
const screenshotPath =
|
|
13772
|
+
const screenshotPath = path9.join(getScreenshotDir2(), `${screenshotId}.png`);
|
|
12956
13773
|
let capturedScreenshotPath;
|
|
12957
13774
|
try {
|
|
12958
13775
|
capturedScreenshotPath = await captureElementScreenshot(
|
|
@@ -13488,11 +14305,11 @@ function removeWordpressConnection(config, projectName) {
|
|
|
13488
14305
|
}
|
|
13489
14306
|
|
|
13490
14307
|
// src/job-runner.ts
|
|
13491
|
-
import
|
|
13492
|
-
import
|
|
13493
|
-
import
|
|
13494
|
-
import
|
|
13495
|
-
import { and as
|
|
14308
|
+
import crypto19 from "crypto";
|
|
14309
|
+
import fs8 from "fs";
|
|
14310
|
+
import path10 from "path";
|
|
14311
|
+
import os5 from "os";
|
|
14312
|
+
import { and as and8, eq as eq19, inArray as inArray3, sql as sql6 } from "drizzle-orm";
|
|
13496
14313
|
|
|
13497
14314
|
// src/citation-utils.ts
|
|
13498
14315
|
function domainMatches(domain, canonicalDomain) {
|
|
@@ -13728,7 +14545,7 @@ var JobRunner = class {
|
|
|
13728
14545
|
if (stale.length === 0) return;
|
|
13729
14546
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
13730
14547
|
for (const run of stale) {
|
|
13731
|
-
this.db.update(runs).set({ status: "failed", finishedAt: now, error: "Server restarted while run was in progress" }).where(
|
|
14548
|
+
this.db.update(runs).set({ status: "failed", finishedAt: now, error: "Server restarted while run was in progress" }).where(eq19(runs.id, run.id)).run();
|
|
13732
14549
|
log.warn("run.recovered-stale", { runId: run.id, previousStatus: run.status });
|
|
13733
14550
|
}
|
|
13734
14551
|
}
|
|
@@ -13756,10 +14573,10 @@ var JobRunner = class {
|
|
|
13756
14573
|
throw new Error(`Run ${runId} is not executable from status '${existingRun.status}'`);
|
|
13757
14574
|
}
|
|
13758
14575
|
if (existingRun.status === "queued") {
|
|
13759
|
-
this.db.update(runs).set({ status: "running", startedAt: now }).where(
|
|
14576
|
+
this.db.update(runs).set({ status: "running", startedAt: now }).where(and8(eq19(runs.id, runId), eq19(runs.status, "queued"))).run();
|
|
13760
14577
|
}
|
|
13761
14578
|
this.throwIfRunCancelled(runId);
|
|
13762
|
-
const project = this.db.select().from(projects).where(
|
|
14579
|
+
const project = this.db.select().from(projects).where(eq19(projects.id, projectId)).get();
|
|
13763
14580
|
if (!project) {
|
|
13764
14581
|
throw new Error(`Project ${projectId} not found`);
|
|
13765
14582
|
}
|
|
@@ -13779,8 +14596,8 @@ var JobRunner = class {
|
|
|
13779
14596
|
throw new Error("No providers configured. Add at least one provider API key.");
|
|
13780
14597
|
}
|
|
13781
14598
|
log.info("run.dispatch", { runId, providerCount: activeProviders.length, providers: activeProviders.map((p) => p.adapter.name) });
|
|
13782
|
-
projectKeywords = this.db.select().from(keywords).where(
|
|
13783
|
-
const projectCompetitors = this.db.select().from(competitors).where(
|
|
14599
|
+
projectKeywords = this.db.select().from(keywords).where(eq19(keywords.projectId, projectId)).all();
|
|
14600
|
+
const projectCompetitors = this.db.select().from(competitors).where(eq19(competitors.projectId, projectId)).all();
|
|
13784
14601
|
const competitorDomains = projectCompetitors.map((c) => c.domain);
|
|
13785
14602
|
const allDomains = effectiveDomains({
|
|
13786
14603
|
canonicalDomain: project.canonicalDomain,
|
|
@@ -13796,7 +14613,7 @@ var JobRunner = class {
|
|
|
13796
14613
|
const todayPeriod = getCurrentUsageDay();
|
|
13797
14614
|
for (const p of activeProviders) {
|
|
13798
14615
|
const providerScope = `${projectId}:${p.adapter.name}`;
|
|
13799
|
-
const providerUsage = this.db.select().from(usageCounters).where(
|
|
14616
|
+
const providerUsage = this.db.select().from(usageCounters).where(eq19(usageCounters.scope, providerScope)).all().filter((r) => r.period === todayPeriod && r.metric === "queries").reduce((sum, r) => sum + r.count, 0);
|
|
13800
14617
|
const limit = p.config.quotaPolicy.maxRequestsPerDay;
|
|
13801
14618
|
if (providerUsage + queriesPerProvider > limit) {
|
|
13802
14619
|
throw new Error(
|
|
@@ -13855,12 +14672,12 @@ var JobRunner = class {
|
|
|
13855
14672
|
competitorDomains
|
|
13856
14673
|
);
|
|
13857
14674
|
let screenshotRelPath = null;
|
|
13858
|
-
if (raw.screenshotPath &&
|
|
13859
|
-
const snapshotId =
|
|
13860
|
-
const screenshotDir =
|
|
13861
|
-
if (!
|
|
13862
|
-
const destPath =
|
|
13863
|
-
|
|
14675
|
+
if (raw.screenshotPath && fs8.existsSync(raw.screenshotPath)) {
|
|
14676
|
+
const snapshotId = crypto19.randomUUID();
|
|
14677
|
+
const screenshotDir = path10.join(os5.homedir(), ".canonry", "screenshots", runId);
|
|
14678
|
+
if (!fs8.existsSync(screenshotDir)) fs8.mkdirSync(screenshotDir, { recursive: true });
|
|
14679
|
+
const destPath = path10.join(screenshotDir, `${snapshotId}.png`);
|
|
14680
|
+
fs8.renameSync(raw.screenshotPath, destPath);
|
|
13864
14681
|
screenshotRelPath = `${runId}/${snapshotId}.png`;
|
|
13865
14682
|
this.db.insert(querySnapshots).values({
|
|
13866
14683
|
id: snapshotId,
|
|
@@ -13886,7 +14703,7 @@ var JobRunner = class {
|
|
|
13886
14703
|
}).run();
|
|
13887
14704
|
} else {
|
|
13888
14705
|
this.db.insert(querySnapshots).values({
|
|
13889
|
-
id:
|
|
14706
|
+
id: crypto19.randomUUID(),
|
|
13890
14707
|
runId,
|
|
13891
14708
|
keywordId: kw.id,
|
|
13892
14709
|
provider: providerName,
|
|
@@ -13937,12 +14754,12 @@ var JobRunner = class {
|
|
|
13937
14754
|
const someFailed = providerErrors.size > 0;
|
|
13938
14755
|
if (allFailed) {
|
|
13939
14756
|
const errorDetail = JSON.stringify(Object.fromEntries(providerErrors));
|
|
13940
|
-
this.db.update(runs).set({ status: "failed", finishedAt: (/* @__PURE__ */ new Date()).toISOString(), error: errorDetail }).where(
|
|
14757
|
+
this.db.update(runs).set({ status: "failed", finishedAt: (/* @__PURE__ */ new Date()).toISOString(), error: errorDetail }).where(eq19(runs.id, runId)).run();
|
|
13941
14758
|
} else if (someFailed) {
|
|
13942
14759
|
const errorDetail = JSON.stringify(Object.fromEntries(providerErrors));
|
|
13943
|
-
this.db.update(runs).set({ status: "partial", finishedAt: (/* @__PURE__ */ new Date()).toISOString(), error: errorDetail }).where(
|
|
14760
|
+
this.db.update(runs).set({ status: "partial", finishedAt: (/* @__PURE__ */ new Date()).toISOString(), error: errorDetail }).where(eq19(runs.id, runId)).run();
|
|
13944
14761
|
} else {
|
|
13945
|
-
this.db.update(runs).set({ status: "completed", finishedAt: (/* @__PURE__ */ new Date()).toISOString() }).where(
|
|
14762
|
+
this.db.update(runs).set({ status: "completed", finishedAt: (/* @__PURE__ */ new Date()).toISOString() }).where(eq19(runs.id, runId)).run();
|
|
13946
14763
|
}
|
|
13947
14764
|
this.flushProviderUsage(projectId, providerDispatchCounts);
|
|
13948
14765
|
const finalStatus = allFailed ? "failed" : someFailed ? "partial" : "completed";
|
|
@@ -13977,7 +14794,7 @@ var JobRunner = class {
|
|
|
13977
14794
|
status: "failed",
|
|
13978
14795
|
finishedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
13979
14796
|
error: errorMessage
|
|
13980
|
-
}).where(
|
|
14797
|
+
}).where(eq19(runs.id, runId)).run();
|
|
13981
14798
|
this.flushProviderUsage(projectId, providerDispatchCounts);
|
|
13982
14799
|
trackEvent("run.completed", {
|
|
13983
14800
|
status: "failed",
|
|
@@ -13998,7 +14815,7 @@ var JobRunner = class {
|
|
|
13998
14815
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
13999
14816
|
const period = now.slice(0, 10);
|
|
14000
14817
|
this.db.insert(usageCounters).values({
|
|
14001
|
-
id:
|
|
14818
|
+
id: crypto19.randomUUID(),
|
|
14002
14819
|
scope,
|
|
14003
14820
|
period,
|
|
14004
14821
|
metric,
|
|
@@ -14006,7 +14823,7 @@ var JobRunner = class {
|
|
|
14006
14823
|
updatedAt: now
|
|
14007
14824
|
}).onConflictDoUpdate({
|
|
14008
14825
|
target: [usageCounters.scope, usageCounters.period, usageCounters.metric],
|
|
14009
|
-
set: { count:
|
|
14826
|
+
set: { count: sql6`${usageCounters.count} + ${count}`, updatedAt: now }
|
|
14010
14827
|
}).run();
|
|
14011
14828
|
}
|
|
14012
14829
|
flushProviderUsage(projectId, providerDispatchCounts) {
|
|
@@ -14020,7 +14837,7 @@ var JobRunner = class {
|
|
|
14020
14837
|
status: runs.status,
|
|
14021
14838
|
finishedAt: runs.finishedAt,
|
|
14022
14839
|
error: runs.error
|
|
14023
|
-
}).from(runs).where(
|
|
14840
|
+
}).from(runs).where(eq19(runs.id, runId)).get();
|
|
14024
14841
|
}
|
|
14025
14842
|
isRunCancelled(runId) {
|
|
14026
14843
|
return this.getRunState(runId)?.status === "cancelled";
|
|
@@ -14036,7 +14853,7 @@ var JobRunner = class {
|
|
|
14036
14853
|
this.db.update(runs).set({
|
|
14037
14854
|
finishedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
14038
14855
|
error: currentRun.error ?? "Cancelled by user"
|
|
14039
|
-
}).where(
|
|
14856
|
+
}).where(eq19(runs.id, runId)).run();
|
|
14040
14857
|
}
|
|
14041
14858
|
trackEvent("run.completed", {
|
|
14042
14859
|
status: "cancelled",
|
|
@@ -14058,8 +14875,8 @@ function getCurrentUsageDay() {
|
|
|
14058
14875
|
}
|
|
14059
14876
|
|
|
14060
14877
|
// src/gsc-sync.ts
|
|
14061
|
-
import
|
|
14062
|
-
import { eq as
|
|
14878
|
+
import crypto20 from "crypto";
|
|
14879
|
+
import { eq as eq20, and as and9, sql as sql7 } from "drizzle-orm";
|
|
14063
14880
|
var log2 = createLogger("GscSync");
|
|
14064
14881
|
function formatDate2(d) {
|
|
14065
14882
|
return d.toISOString().split("T")[0];
|
|
@@ -14071,13 +14888,13 @@ function daysAgo(n) {
|
|
|
14071
14888
|
}
|
|
14072
14889
|
async function executeGscSync(db, runId, projectId, opts) {
|
|
14073
14890
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
14074
|
-
db.update(runs).set({ status: "running", startedAt: now }).where(
|
|
14891
|
+
db.update(runs).set({ status: "running", startedAt: now }).where(eq20(runs.id, runId)).run();
|
|
14075
14892
|
try {
|
|
14076
14893
|
const { clientId: googleClientId, clientSecret: googleClientSecret } = getGoogleAuthConfig(opts.config);
|
|
14077
14894
|
if (!googleClientId || !googleClientSecret) {
|
|
14078
14895
|
throw new Error("Google OAuth is not configured in the local Canonry config");
|
|
14079
14896
|
}
|
|
14080
|
-
const project = db.select().from(projects).where(
|
|
14897
|
+
const project = db.select().from(projects).where(eq20(projects.id, projectId)).get();
|
|
14081
14898
|
if (!project) {
|
|
14082
14899
|
throw new Error(`Project not found: ${projectId}`);
|
|
14083
14900
|
}
|
|
@@ -14111,10 +14928,10 @@ async function executeGscSync(db, runId, projectId, opts) {
|
|
|
14111
14928
|
});
|
|
14112
14929
|
log2.info("fetch.complete", { runId, projectId, rowCount: rows.length });
|
|
14113
14930
|
db.delete(gscSearchData).where(
|
|
14114
|
-
|
|
14115
|
-
|
|
14116
|
-
|
|
14117
|
-
|
|
14931
|
+
and9(
|
|
14932
|
+
eq20(gscSearchData.projectId, projectId),
|
|
14933
|
+
sql7`${gscSearchData.date} >= ${startDate}`,
|
|
14934
|
+
sql7`${gscSearchData.date} <= ${endDate}`
|
|
14118
14935
|
)
|
|
14119
14936
|
).run();
|
|
14120
14937
|
const batchSize = 500;
|
|
@@ -14124,7 +14941,7 @@ async function executeGscSync(db, runId, projectId, opts) {
|
|
|
14124
14941
|
for (const row of batch) {
|
|
14125
14942
|
const [query, page, country, device, date] = row.keys;
|
|
14126
14943
|
db.insert(gscSearchData).values({
|
|
14127
|
-
id:
|
|
14944
|
+
id: crypto20.randomUUID(),
|
|
14128
14945
|
projectId,
|
|
14129
14946
|
syncRunId: runId,
|
|
14130
14947
|
date: date ?? "",
|
|
@@ -14158,7 +14975,7 @@ async function executeGscSync(db, runId, projectId, opts) {
|
|
|
14158
14975
|
const rich = ir.richResultsResult;
|
|
14159
14976
|
const inspectedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
14160
14977
|
db.insert(gscUrlInspections).values({
|
|
14161
|
-
id:
|
|
14978
|
+
id: crypto20.randomUUID(),
|
|
14162
14979
|
projectId,
|
|
14163
14980
|
syncRunId: runId,
|
|
14164
14981
|
url: pageUrl,
|
|
@@ -14179,7 +14996,7 @@ async function executeGscSync(db, runId, projectId, opts) {
|
|
|
14179
14996
|
log2.error("inspect.url-failed", { runId, projectId, url: pageUrl, error: err instanceof Error ? err.message : String(err) });
|
|
14180
14997
|
}
|
|
14181
14998
|
}
|
|
14182
|
-
const allInspections = db.select().from(gscUrlInspections).where(
|
|
14999
|
+
const allInspections = db.select().from(gscUrlInspections).where(eq20(gscUrlInspections.projectId, projectId)).all();
|
|
14183
15000
|
const latestByUrl = /* @__PURE__ */ new Map();
|
|
14184
15001
|
for (const row of allInspections) {
|
|
14185
15002
|
const existing = latestByUrl.get(row.url);
|
|
@@ -14200,9 +15017,9 @@ async function executeGscSync(db, runId, projectId, opts) {
|
|
|
14200
15017
|
}
|
|
14201
15018
|
}
|
|
14202
15019
|
const snapshotDate = formatDate2(/* @__PURE__ */ new Date());
|
|
14203
|
-
db.delete(gscCoverageSnapshots).where(
|
|
15020
|
+
db.delete(gscCoverageSnapshots).where(and9(eq20(gscCoverageSnapshots.projectId, projectId), eq20(gscCoverageSnapshots.date, snapshotDate))).run();
|
|
14204
15021
|
db.insert(gscCoverageSnapshots).values({
|
|
14205
|
-
id:
|
|
15022
|
+
id: crypto20.randomUUID(),
|
|
14206
15023
|
projectId,
|
|
14207
15024
|
syncRunId: runId,
|
|
14208
15025
|
date: snapshotDate,
|
|
@@ -14211,19 +15028,19 @@ async function executeGscSync(db, runId, projectId, opts) {
|
|
|
14211
15028
|
reasonBreakdown: JSON.stringify(reasonCounts),
|
|
14212
15029
|
createdAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
14213
15030
|
}).run();
|
|
14214
|
-
db.update(runs).set({ status: "completed", finishedAt: (/* @__PURE__ */ new Date()).toISOString() }).where(
|
|
15031
|
+
db.update(runs).set({ status: "completed", finishedAt: (/* @__PURE__ */ new Date()).toISOString() }).where(eq20(runs.id, runId)).run();
|
|
14215
15032
|
log2.info("sync.completed", { runId, projectId, searchDataRows: rows.length, urlInspections: topPages.length, indexed: snapIndexed, notIndexed: snapNotIndexed });
|
|
14216
15033
|
} catch (err) {
|
|
14217
15034
|
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
14218
|
-
db.update(runs).set({ status: "failed", error: errorMsg, finishedAt: (/* @__PURE__ */ new Date()).toISOString() }).where(
|
|
15035
|
+
db.update(runs).set({ status: "failed", error: errorMsg, finishedAt: (/* @__PURE__ */ new Date()).toISOString() }).where(eq20(runs.id, runId)).run();
|
|
14219
15036
|
log2.error("sync.failed", { runId, projectId, error: errorMsg });
|
|
14220
15037
|
throw err;
|
|
14221
15038
|
}
|
|
14222
15039
|
}
|
|
14223
15040
|
|
|
14224
15041
|
// src/gsc-inspect-sitemap.ts
|
|
14225
|
-
import
|
|
14226
|
-
import { eq as
|
|
15042
|
+
import crypto21 from "crypto";
|
|
15043
|
+
import { eq as eq21, and as and10 } from "drizzle-orm";
|
|
14227
15044
|
|
|
14228
15045
|
// src/sitemap-parser.ts
|
|
14229
15046
|
var LOC_REGEX = /<loc>\s*([^<]+?)\s*<\/loc>/gi;
|
|
@@ -14292,13 +15109,13 @@ async function parseSitemapRecursive(url, urls, depth) {
|
|
|
14292
15109
|
var log3 = createLogger("InspectSitemap");
|
|
14293
15110
|
async function executeInspectSitemap(db, runId, projectId, opts) {
|
|
14294
15111
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
14295
|
-
db.update(runs).set({ status: "running", startedAt: now }).where(
|
|
15112
|
+
db.update(runs).set({ status: "running", startedAt: now }).where(eq21(runs.id, runId)).run();
|
|
14296
15113
|
try {
|
|
14297
15114
|
const { clientId: googleClientId, clientSecret: googleClientSecret } = getGoogleAuthConfig(opts.config);
|
|
14298
15115
|
if (!googleClientId || !googleClientSecret) {
|
|
14299
15116
|
throw new Error("Google OAuth is not configured in the local Canonry config");
|
|
14300
15117
|
}
|
|
14301
|
-
const project = db.select().from(projects).where(
|
|
15118
|
+
const project = db.select().from(projects).where(eq21(projects.id, projectId)).get();
|
|
14302
15119
|
if (!project) {
|
|
14303
15120
|
throw new Error(`Project not found: ${projectId}`);
|
|
14304
15121
|
}
|
|
@@ -14339,7 +15156,7 @@ async function executeInspectSitemap(db, runId, projectId, opts) {
|
|
|
14339
15156
|
const rich = ir.richResultsResult;
|
|
14340
15157
|
const inspectedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
14341
15158
|
db.insert(gscUrlInspections).values({
|
|
14342
|
-
id:
|
|
15159
|
+
id: crypto21.randomUUID(),
|
|
14343
15160
|
projectId,
|
|
14344
15161
|
syncRunId: runId,
|
|
14345
15162
|
url: pageUrl,
|
|
@@ -14366,7 +15183,7 @@ async function executeInspectSitemap(db, runId, projectId, opts) {
|
|
|
14366
15183
|
await new Promise((r) => setTimeout(r, 1e3));
|
|
14367
15184
|
}
|
|
14368
15185
|
}
|
|
14369
|
-
const allInspections = db.select().from(gscUrlInspections).where(
|
|
15186
|
+
const allInspections = db.select().from(gscUrlInspections).where(eq21(gscUrlInspections.projectId, projectId)).all();
|
|
14370
15187
|
const latestByUrl = /* @__PURE__ */ new Map();
|
|
14371
15188
|
for (const row of allInspections) {
|
|
14372
15189
|
const existing = latestByUrl.get(row.url);
|
|
@@ -14387,9 +15204,9 @@ async function executeInspectSitemap(db, runId, projectId, opts) {
|
|
|
14387
15204
|
}
|
|
14388
15205
|
}
|
|
14389
15206
|
const snapshotDate = (/* @__PURE__ */ new Date()).toISOString().split("T")[0];
|
|
14390
|
-
db.delete(gscCoverageSnapshots).where(
|
|
15207
|
+
db.delete(gscCoverageSnapshots).where(and10(eq21(gscCoverageSnapshots.projectId, projectId), eq21(gscCoverageSnapshots.date, snapshotDate))).run();
|
|
14391
15208
|
db.insert(gscCoverageSnapshots).values({
|
|
14392
|
-
id:
|
|
15209
|
+
id: crypto21.randomUUID(),
|
|
14393
15210
|
projectId,
|
|
14394
15211
|
syncRunId: runId,
|
|
14395
15212
|
date: snapshotDate,
|
|
@@ -14399,16 +15216,304 @@ async function executeInspectSitemap(db, runId, projectId, opts) {
|
|
|
14399
15216
|
createdAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
14400
15217
|
}).run();
|
|
14401
15218
|
const status = errors > 0 && inspected > 0 ? "partial" : errors === urls.length ? "failed" : "completed";
|
|
14402
|
-
db.update(runs).set({ status, finishedAt: (/* @__PURE__ */ new Date()).toISOString() }).where(
|
|
15219
|
+
db.update(runs).set({ status, finishedAt: (/* @__PURE__ */ new Date()).toISOString() }).where(eq21(runs.id, runId)).run();
|
|
14403
15220
|
log3.info("inspect.completed", { runId, projectId, inspected, errors, total: urls.length, indexed: snapIndexed, notIndexed: snapNotIndexed });
|
|
14404
15221
|
} catch (err) {
|
|
14405
15222
|
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
14406
|
-
db.update(runs).set({ status: "failed", error: errorMsg, finishedAt: (/* @__PURE__ */ new Date()).toISOString() }).where(
|
|
15223
|
+
db.update(runs).set({ status: "failed", error: errorMsg, finishedAt: (/* @__PURE__ */ new Date()).toISOString() }).where(eq21(runs.id, runId)).run();
|
|
14407
15224
|
log3.error("inspect.failed", { runId, projectId, error: errorMsg });
|
|
14408
15225
|
throw err;
|
|
14409
15226
|
}
|
|
14410
15227
|
}
|
|
14411
15228
|
|
|
15229
|
+
// src/commoncrawl-sync.ts
|
|
15230
|
+
import crypto22 from "crypto";
|
|
15231
|
+
import path11 from "path";
|
|
15232
|
+
import { and as and11, eq as eq22, sql as sql8 } from "drizzle-orm";
|
|
15233
|
+
var log4 = createLogger("CommonCrawlSync");
|
|
15234
|
+
var INSERT_CHUNK_SIZE = 1e4;
|
|
15235
|
+
function defaultDeps() {
|
|
15236
|
+
return {
|
|
15237
|
+
downloadFile,
|
|
15238
|
+
queryBacklinks,
|
|
15239
|
+
loadDuckdb,
|
|
15240
|
+
now: () => /* @__PURE__ */ new Date(),
|
|
15241
|
+
cacheDir: CC_CACHE_DIR
|
|
15242
|
+
};
|
|
15243
|
+
}
|
|
15244
|
+
async function executeReleaseSync(db, syncId, opts) {
|
|
15245
|
+
const deps = { ...defaultDeps(), ...opts.deps };
|
|
15246
|
+
const release = opts.release;
|
|
15247
|
+
try {
|
|
15248
|
+
if (!isValidReleaseId(release)) {
|
|
15249
|
+
throw new Error(`Invalid release id: ${release}`);
|
|
15250
|
+
}
|
|
15251
|
+
const downloadStartedAt = deps.now().toISOString();
|
|
15252
|
+
db.update(ccReleaseSyncs).set({
|
|
15253
|
+
status: CcReleaseSyncStatuses.downloading,
|
|
15254
|
+
downloadStartedAt,
|
|
15255
|
+
phaseDetail: "downloading vertices + edges",
|
|
15256
|
+
updatedAt: downloadStartedAt,
|
|
15257
|
+
error: null
|
|
15258
|
+
}).where(eq22(ccReleaseSyncs.id, syncId)).run();
|
|
15259
|
+
const paths = ccReleasePaths(release);
|
|
15260
|
+
const releaseCacheDir = path11.join(deps.cacheDir, release);
|
|
15261
|
+
const vertexPath = path11.join(releaseCacheDir, paths.vertexFilename);
|
|
15262
|
+
const edgesPath = path11.join(releaseCacheDir, paths.edgesFilename);
|
|
15263
|
+
const [vertex, edges] = await Promise.all([
|
|
15264
|
+
deps.downloadFile({ url: paths.vertexUrl, destPath: vertexPath }),
|
|
15265
|
+
deps.downloadFile({ url: paths.edgesUrl, destPath: edgesPath })
|
|
15266
|
+
]);
|
|
15267
|
+
const downloadFinishedAt = deps.now().toISOString();
|
|
15268
|
+
const queryStartedAt = downloadFinishedAt;
|
|
15269
|
+
db.update(ccReleaseSyncs).set({
|
|
15270
|
+
status: CcReleaseSyncStatuses.querying,
|
|
15271
|
+
downloadFinishedAt,
|
|
15272
|
+
queryStartedAt,
|
|
15273
|
+
phaseDetail: "querying backlinks",
|
|
15274
|
+
vertexPath,
|
|
15275
|
+
edgesPath,
|
|
15276
|
+
vertexBytes: vertex.bytes,
|
|
15277
|
+
edgesBytes: edges.bytes,
|
|
15278
|
+
vertexSha256: vertex.sha256,
|
|
15279
|
+
edgesSha256: edges.sha256,
|
|
15280
|
+
updatedAt: downloadFinishedAt
|
|
15281
|
+
}).where(eq22(ccReleaseSyncs.id, syncId)).run();
|
|
15282
|
+
const allProjects = db.select().from(projects).all();
|
|
15283
|
+
const targets = Array.from(new Set(allProjects.map((p) => p.canonicalDomain)));
|
|
15284
|
+
let rows = [];
|
|
15285
|
+
if (targets.length > 0) {
|
|
15286
|
+
const duckdb = deps.loadDuckdb();
|
|
15287
|
+
rows = await deps.queryBacklinks({ vertexPath, edgesPath, targets, duckdb });
|
|
15288
|
+
}
|
|
15289
|
+
const projectsByDomain = /* @__PURE__ */ new Map();
|
|
15290
|
+
for (const p of allProjects) {
|
|
15291
|
+
const ids = projectsByDomain.get(p.canonicalDomain) ?? [];
|
|
15292
|
+
ids.push(p.id);
|
|
15293
|
+
projectsByDomain.set(p.canonicalDomain, ids);
|
|
15294
|
+
}
|
|
15295
|
+
const queriedAt = deps.now().toISOString();
|
|
15296
|
+
db.transaction((tx) => {
|
|
15297
|
+
tx.delete(backlinkDomains).where(eq22(backlinkDomains.releaseSyncId, syncId)).run();
|
|
15298
|
+
tx.delete(backlinkSummaries).where(eq22(backlinkSummaries.releaseSyncId, syncId)).run();
|
|
15299
|
+
const expanded = [];
|
|
15300
|
+
for (const r of rows) {
|
|
15301
|
+
const projectIds = projectsByDomain.get(r.targetDomain);
|
|
15302
|
+
if (!projectIds) continue;
|
|
15303
|
+
for (const projectId of projectIds) {
|
|
15304
|
+
expanded.push({
|
|
15305
|
+
id: crypto22.randomUUID(),
|
|
15306
|
+
projectId,
|
|
15307
|
+
releaseSyncId: syncId,
|
|
15308
|
+
release,
|
|
15309
|
+
targetDomain: r.targetDomain,
|
|
15310
|
+
linkingDomain: r.linkingDomain,
|
|
15311
|
+
numHosts: r.numHosts,
|
|
15312
|
+
createdAt: queriedAt
|
|
15313
|
+
});
|
|
15314
|
+
}
|
|
15315
|
+
}
|
|
15316
|
+
for (let i = 0; i < expanded.length; i += INSERT_CHUNK_SIZE) {
|
|
15317
|
+
const chunk = expanded.slice(i, i + INSERT_CHUNK_SIZE);
|
|
15318
|
+
if (chunk.length > 0) tx.insert(backlinkDomains).values(chunk).run();
|
|
15319
|
+
}
|
|
15320
|
+
const rowsByProject = groupByProject(rows, projectsByDomain);
|
|
15321
|
+
for (const p of allProjects) {
|
|
15322
|
+
const projectRows = rowsByProject.get(p.id) ?? [];
|
|
15323
|
+
const summary = computeSummary(projectRows);
|
|
15324
|
+
tx.insert(backlinkSummaries).values({
|
|
15325
|
+
id: crypto22.randomUUID(),
|
|
15326
|
+
projectId: p.id,
|
|
15327
|
+
releaseSyncId: syncId,
|
|
15328
|
+
release,
|
|
15329
|
+
targetDomain: p.canonicalDomain,
|
|
15330
|
+
totalLinkingDomains: summary.totalLinkingDomains,
|
|
15331
|
+
totalHosts: summary.totalHosts,
|
|
15332
|
+
top10HostsShare: summary.top10HostsShare,
|
|
15333
|
+
queriedAt,
|
|
15334
|
+
createdAt: queriedAt
|
|
15335
|
+
}).onConflictDoUpdate({
|
|
15336
|
+
target: [backlinkSummaries.projectId, backlinkSummaries.release],
|
|
15337
|
+
set: {
|
|
15338
|
+
releaseSyncId: syncId,
|
|
15339
|
+
targetDomain: p.canonicalDomain,
|
|
15340
|
+
totalLinkingDomains: summary.totalLinkingDomains,
|
|
15341
|
+
totalHosts: summary.totalHosts,
|
|
15342
|
+
top10HostsShare: summary.top10HostsShare,
|
|
15343
|
+
queriedAt
|
|
15344
|
+
}
|
|
15345
|
+
}).run();
|
|
15346
|
+
}
|
|
15347
|
+
});
|
|
15348
|
+
const finishedAt = deps.now().toISOString();
|
|
15349
|
+
db.update(ccReleaseSyncs).set({
|
|
15350
|
+
status: CcReleaseSyncStatuses.ready,
|
|
15351
|
+
queryFinishedAt: finishedAt,
|
|
15352
|
+
phaseDetail: null,
|
|
15353
|
+
projectsProcessed: allProjects.length,
|
|
15354
|
+
domainsDiscovered: rows.length,
|
|
15355
|
+
updatedAt: finishedAt,
|
|
15356
|
+
error: null
|
|
15357
|
+
}).where(eq22(ccReleaseSyncs.id, syncId)).run();
|
|
15358
|
+
log4.info("sync.completed", {
|
|
15359
|
+
syncId,
|
|
15360
|
+
release,
|
|
15361
|
+
projectsProcessed: allProjects.length,
|
|
15362
|
+
domainsDiscovered: rows.length
|
|
15363
|
+
});
|
|
15364
|
+
} catch (err) {
|
|
15365
|
+
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
15366
|
+
const finishedAt = deps.now().toISOString();
|
|
15367
|
+
db.update(ccReleaseSyncs).set({
|
|
15368
|
+
status: CcReleaseSyncStatuses.failed,
|
|
15369
|
+
error: errorMsg,
|
|
15370
|
+
phaseDetail: null,
|
|
15371
|
+
updatedAt: finishedAt
|
|
15372
|
+
}).where(eq22(ccReleaseSyncs.id, syncId)).run();
|
|
15373
|
+
log4.error("sync.failed", { syncId, release, error: errorMsg });
|
|
15374
|
+
throw err;
|
|
15375
|
+
}
|
|
15376
|
+
}
|
|
15377
|
+
function groupByProject(rows, projectsByDomain) {
|
|
15378
|
+
const out = /* @__PURE__ */ new Map();
|
|
15379
|
+
for (const row of rows) {
|
|
15380
|
+
const projectIds = projectsByDomain.get(row.targetDomain);
|
|
15381
|
+
if (!projectIds) continue;
|
|
15382
|
+
for (const projectId of projectIds) {
|
|
15383
|
+
const bucket = out.get(projectId) ?? [];
|
|
15384
|
+
bucket.push(row);
|
|
15385
|
+
out.set(projectId, bucket);
|
|
15386
|
+
}
|
|
15387
|
+
}
|
|
15388
|
+
return out;
|
|
15389
|
+
}
|
|
15390
|
+
function computeSummary(rows) {
|
|
15391
|
+
if (rows.length === 0) {
|
|
15392
|
+
return { totalLinkingDomains: 0, totalHosts: 0, top10HostsShare: "0" };
|
|
15393
|
+
}
|
|
15394
|
+
const sorted = [...rows].sort((a, b) => b.numHosts - a.numHosts);
|
|
15395
|
+
const totalHosts = sorted.reduce((acc, r) => acc + r.numHosts, 0);
|
|
15396
|
+
const top10Hosts = sorted.slice(0, 10).reduce((acc, r) => acc + r.numHosts, 0);
|
|
15397
|
+
const share = totalHosts > 0 ? top10Hosts / totalHosts : 0;
|
|
15398
|
+
return {
|
|
15399
|
+
totalLinkingDomains: rows.length,
|
|
15400
|
+
totalHosts,
|
|
15401
|
+
top10HostsShare: share.toFixed(6)
|
|
15402
|
+
};
|
|
15403
|
+
}
|
|
15404
|
+
|
|
15405
|
+
// src/backlink-extract.ts
|
|
15406
|
+
import crypto23 from "crypto";
|
|
15407
|
+
import { and as and12, desc as desc9, eq as eq23 } from "drizzle-orm";
|
|
15408
|
+
var log5 = createLogger("BacklinkExtract");
|
|
15409
|
+
function defaultDeps2() {
|
|
15410
|
+
return {
|
|
15411
|
+
queryBacklinks,
|
|
15412
|
+
loadDuckdb,
|
|
15413
|
+
now: () => /* @__PURE__ */ new Date()
|
|
15414
|
+
};
|
|
15415
|
+
}
|
|
15416
|
+
async function executeBacklinkExtract(db, runId, projectId, opts = {}) {
|
|
15417
|
+
const deps = { ...defaultDeps2(), ...opts.deps };
|
|
15418
|
+
const startedAt = deps.now().toISOString();
|
|
15419
|
+
db.update(runs).set({ status: RunStatuses.running, startedAt }).where(eq23(runs.id, runId)).run();
|
|
15420
|
+
try {
|
|
15421
|
+
const project = db.select().from(projects).where(eq23(projects.id, projectId)).get();
|
|
15422
|
+
if (!project) {
|
|
15423
|
+
throw new Error(`Project not found: ${projectId}`);
|
|
15424
|
+
}
|
|
15425
|
+
const sync = opts.release ? db.select().from(ccReleaseSyncs).where(eq23(ccReleaseSyncs.release, opts.release)).get() : db.select().from(ccReleaseSyncs).where(eq23(ccReleaseSyncs.status, CcReleaseSyncStatuses.ready)).orderBy(desc9(ccReleaseSyncs.createdAt)).limit(1).get();
|
|
15426
|
+
if (!sync) {
|
|
15427
|
+
throw new Error("No ready release sync available \u2014 run `canonry backlinks sync` first");
|
|
15428
|
+
}
|
|
15429
|
+
if (sync.status !== CcReleaseSyncStatuses.ready) {
|
|
15430
|
+
throw new Error(`Release ${sync.release} is not ready (status=${sync.status})`);
|
|
15431
|
+
}
|
|
15432
|
+
if (!sync.vertexPath || !sync.edgesPath) {
|
|
15433
|
+
throw new Error(`Release ${sync.release} is missing cached file paths`);
|
|
15434
|
+
}
|
|
15435
|
+
const duckdb = deps.loadDuckdb();
|
|
15436
|
+
const rows = await deps.queryBacklinks({
|
|
15437
|
+
vertexPath: sync.vertexPath,
|
|
15438
|
+
edgesPath: sync.edgesPath,
|
|
15439
|
+
targets: [project.canonicalDomain],
|
|
15440
|
+
duckdb
|
|
15441
|
+
});
|
|
15442
|
+
const queriedAt = deps.now().toISOString();
|
|
15443
|
+
const syncId = sync.id;
|
|
15444
|
+
const release = sync.release;
|
|
15445
|
+
const targetDomain = project.canonicalDomain;
|
|
15446
|
+
db.transaction((tx) => {
|
|
15447
|
+
tx.delete(backlinkDomains).where(
|
|
15448
|
+
and12(eq23(backlinkDomains.projectId, projectId), eq23(backlinkDomains.release, release))
|
|
15449
|
+
).run();
|
|
15450
|
+
if (rows.length > 0) {
|
|
15451
|
+
const values = rows.map((r) => ({
|
|
15452
|
+
id: crypto23.randomUUID(),
|
|
15453
|
+
projectId,
|
|
15454
|
+
releaseSyncId: syncId,
|
|
15455
|
+
release,
|
|
15456
|
+
targetDomain,
|
|
15457
|
+
linkingDomain: r.linkingDomain,
|
|
15458
|
+
numHosts: r.numHosts,
|
|
15459
|
+
createdAt: queriedAt
|
|
15460
|
+
}));
|
|
15461
|
+
tx.insert(backlinkDomains).values(values).run();
|
|
15462
|
+
}
|
|
15463
|
+
const summary = computeSummary2(rows);
|
|
15464
|
+
tx.insert(backlinkSummaries).values({
|
|
15465
|
+
id: crypto23.randomUUID(),
|
|
15466
|
+
projectId,
|
|
15467
|
+
releaseSyncId: syncId,
|
|
15468
|
+
release,
|
|
15469
|
+
targetDomain,
|
|
15470
|
+
totalLinkingDomains: summary.totalLinkingDomains,
|
|
15471
|
+
totalHosts: summary.totalHosts,
|
|
15472
|
+
top10HostsShare: summary.top10HostsShare,
|
|
15473
|
+
queriedAt,
|
|
15474
|
+
createdAt: queriedAt
|
|
15475
|
+
}).onConflictDoUpdate({
|
|
15476
|
+
target: [backlinkSummaries.projectId, backlinkSummaries.release],
|
|
15477
|
+
set: {
|
|
15478
|
+
releaseSyncId: syncId,
|
|
15479
|
+
targetDomain,
|
|
15480
|
+
totalLinkingDomains: summary.totalLinkingDomains,
|
|
15481
|
+
totalHosts: summary.totalHosts,
|
|
15482
|
+
top10HostsShare: summary.top10HostsShare,
|
|
15483
|
+
queriedAt
|
|
15484
|
+
}
|
|
15485
|
+
}).run();
|
|
15486
|
+
});
|
|
15487
|
+
const finishedAt = deps.now().toISOString();
|
|
15488
|
+
db.update(runs).set({ status: RunStatuses.completed, finishedAt }).where(eq23(runs.id, runId)).run();
|
|
15489
|
+
log5.info("extract.completed", { runId, projectId, release, rows: rows.length });
|
|
15490
|
+
} catch (err) {
|
|
15491
|
+
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
15492
|
+
const finishedAt = deps.now().toISOString();
|
|
15493
|
+
db.update(runs).set({
|
|
15494
|
+
status: RunStatuses.failed,
|
|
15495
|
+
error: errorMsg,
|
|
15496
|
+
finishedAt
|
|
15497
|
+
}).where(eq23(runs.id, runId)).run();
|
|
15498
|
+
log5.error("extract.failed", { runId, projectId, error: errorMsg });
|
|
15499
|
+
throw err;
|
|
15500
|
+
}
|
|
15501
|
+
}
|
|
15502
|
+
function computeSummary2(rows) {
|
|
15503
|
+
if (rows.length === 0) {
|
|
15504
|
+
return { totalLinkingDomains: 0, totalHosts: 0, top10HostsShare: "0" };
|
|
15505
|
+
}
|
|
15506
|
+
const sorted = [...rows].sort((a, b) => b.numHosts - a.numHosts);
|
|
15507
|
+
const totalHosts = sorted.reduce((acc, r) => acc + r.numHosts, 0);
|
|
15508
|
+
const top10Hosts = sorted.slice(0, 10).reduce((acc, r) => acc + r.numHosts, 0);
|
|
15509
|
+
const share = totalHosts > 0 ? top10Hosts / totalHosts : 0;
|
|
15510
|
+
return {
|
|
15511
|
+
totalLinkingDomains: rows.length,
|
|
15512
|
+
totalHosts,
|
|
15513
|
+
top10HostsShare: share.toFixed(6)
|
|
15514
|
+
};
|
|
15515
|
+
}
|
|
15516
|
+
|
|
14412
15517
|
// src/provider-registry.ts
|
|
14413
15518
|
var ProviderRegistry = class {
|
|
14414
15519
|
providers = /* @__PURE__ */ new Map();
|
|
@@ -14462,8 +15567,8 @@ var ProviderRegistry = class {
|
|
|
14462
15567
|
|
|
14463
15568
|
// src/scheduler.ts
|
|
14464
15569
|
import cron from "node-cron";
|
|
14465
|
-
import { eq as
|
|
14466
|
-
var
|
|
15570
|
+
import { eq as eq24 } from "drizzle-orm";
|
|
15571
|
+
var log6 = createLogger("Scheduler");
|
|
14467
15572
|
var Scheduler = class {
|
|
14468
15573
|
db;
|
|
14469
15574
|
callbacks;
|
|
@@ -14474,16 +15579,16 @@ var Scheduler = class {
|
|
|
14474
15579
|
}
|
|
14475
15580
|
/** Load all enabled schedules from DB and register cron jobs. */
|
|
14476
15581
|
start() {
|
|
14477
|
-
const allSchedules = this.db.select().from(schedules).where(
|
|
15582
|
+
const allSchedules = this.db.select().from(schedules).where(eq24(schedules.enabled, 1)).all();
|
|
14478
15583
|
for (const schedule of allSchedules) {
|
|
14479
15584
|
const missedRunAt = schedule.nextRunAt;
|
|
14480
15585
|
this.registerCronTask(schedule);
|
|
14481
15586
|
if (missedRunAt && new Date(missedRunAt) < /* @__PURE__ */ new Date()) {
|
|
14482
|
-
|
|
15587
|
+
log6.info("run.catch-up", { projectId: schedule.projectId, missedRunAt });
|
|
14483
15588
|
this.triggerRun(schedule.id, schedule.projectId);
|
|
14484
15589
|
}
|
|
14485
15590
|
}
|
|
14486
|
-
|
|
15591
|
+
log6.info("started", { scheduleCount: allSchedules.length });
|
|
14487
15592
|
}
|
|
14488
15593
|
/** Stop all cron tasks for graceful shutdown. */
|
|
14489
15594
|
stop() {
|
|
@@ -14499,7 +15604,7 @@ var Scheduler = class {
|
|
|
14499
15604
|
this.stopTask(projectId, existing, "Stopped");
|
|
14500
15605
|
this.tasks.delete(projectId);
|
|
14501
15606
|
}
|
|
14502
|
-
const schedule = this.db.select().from(schedules).where(
|
|
15607
|
+
const schedule = this.db.select().from(schedules).where(eq24(schedules.projectId, projectId)).get();
|
|
14503
15608
|
if (schedule && schedule.enabled === 1) {
|
|
14504
15609
|
this.registerCronTask(schedule);
|
|
14505
15610
|
}
|
|
@@ -14515,12 +15620,12 @@ var Scheduler = class {
|
|
|
14515
15620
|
stopTask(projectId, task, verb) {
|
|
14516
15621
|
task.stop();
|
|
14517
15622
|
task.destroy();
|
|
14518
|
-
|
|
15623
|
+
log6.info(`task.${verb.toLowerCase()}`, { projectId });
|
|
14519
15624
|
}
|
|
14520
15625
|
registerCronTask(schedule) {
|
|
14521
15626
|
const { id: scheduleId, projectId, cronExpr, timezone } = schedule;
|
|
14522
15627
|
if (!cron.validate(cronExpr)) {
|
|
14523
|
-
|
|
15628
|
+
log6.error("cron.invalid", { projectId, cronExpr });
|
|
14524
15629
|
return;
|
|
14525
15630
|
}
|
|
14526
15631
|
const task = cron.schedule(cronExpr, () => {
|
|
@@ -14532,24 +15637,24 @@ var Scheduler = class {
|
|
|
14532
15637
|
this.db.update(schedules).set({
|
|
14533
15638
|
nextRunAt: task.getNextRun()?.toISOString() ?? null,
|
|
14534
15639
|
updatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
14535
|
-
}).where(
|
|
15640
|
+
}).where(eq24(schedules.id, scheduleId)).run();
|
|
14536
15641
|
const label = schedule.preset ?? cronExpr;
|
|
14537
|
-
|
|
15642
|
+
log6.info("cron.registered", { projectId, schedule: label, timezone });
|
|
14538
15643
|
}
|
|
14539
15644
|
triggerRun(scheduleId, projectId) {
|
|
14540
15645
|
try {
|
|
14541
15646
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
14542
|
-
const currentSchedule = this.db.select().from(schedules).where(
|
|
15647
|
+
const currentSchedule = this.db.select().from(schedules).where(eq24(schedules.id, scheduleId)).get();
|
|
14543
15648
|
if (!currentSchedule || currentSchedule.enabled !== 1) {
|
|
14544
|
-
|
|
15649
|
+
log6.warn("schedule.stale", { scheduleId, projectId, msg: "schedule no longer exists or is disabled" });
|
|
14545
15650
|
this.remove(projectId);
|
|
14546
15651
|
return;
|
|
14547
15652
|
}
|
|
14548
15653
|
const task = this.tasks.get(projectId);
|
|
14549
15654
|
const nextRunAt = task?.getNextRun()?.toISOString() ?? null;
|
|
14550
|
-
const project = this.db.select().from(projects).where(
|
|
15655
|
+
const project = this.db.select().from(projects).where(eq24(projects.id, projectId)).get();
|
|
14551
15656
|
if (!project) {
|
|
14552
|
-
|
|
15657
|
+
log6.error("project.not-found", { projectId, msg: "skipping scheduled run" });
|
|
14553
15658
|
this.remove(projectId);
|
|
14554
15659
|
return;
|
|
14555
15660
|
}
|
|
@@ -14558,7 +15663,7 @@ var Scheduler = class {
|
|
|
14558
15663
|
if (project.defaultLocation) {
|
|
14559
15664
|
const loc = projectLocations.find((l) => l.label === project.defaultLocation);
|
|
14560
15665
|
if (!loc) {
|
|
14561
|
-
|
|
15666
|
+
log6.warn("default-location.stale", { scheduleId, projectId, label: project.defaultLocation });
|
|
14562
15667
|
return;
|
|
14563
15668
|
}
|
|
14564
15669
|
resolvedLocation = loc;
|
|
@@ -14572,11 +15677,11 @@ var Scheduler = class {
|
|
|
14572
15677
|
location: locationLabel
|
|
14573
15678
|
});
|
|
14574
15679
|
if (queueResult.conflict) {
|
|
14575
|
-
|
|
15680
|
+
log6.info("run.skipped-active", { projectName: project.name, activeRunId: queueResult.activeRunId });
|
|
14576
15681
|
this.db.update(schedules).set({
|
|
14577
15682
|
nextRunAt,
|
|
14578
15683
|
updatedAt: now
|
|
14579
|
-
}).where(
|
|
15684
|
+
}).where(eq24(schedules.id, currentSchedule.id)).run();
|
|
14580
15685
|
return;
|
|
14581
15686
|
}
|
|
14582
15687
|
const runId = queueResult.runId;
|
|
@@ -14584,21 +15689,21 @@ var Scheduler = class {
|
|
|
14584
15689
|
lastRunAt: now,
|
|
14585
15690
|
nextRunAt,
|
|
14586
15691
|
updatedAt: now
|
|
14587
|
-
}).where(
|
|
15692
|
+
}).where(eq24(schedules.id, currentSchedule.id)).run();
|
|
14588
15693
|
const scheduleProviders = parseJsonColumn(currentSchedule.providers, []);
|
|
14589
15694
|
const providers = scheduleProviders.length > 0 ? scheduleProviders : void 0;
|
|
14590
|
-
|
|
15695
|
+
log6.info("run.triggered", { runId, projectName: project.name, providers: providers ?? "all" });
|
|
14591
15696
|
this.callbacks.onRunCreated(runId, projectId, providers, resolvedLocation);
|
|
14592
15697
|
} catch (err) {
|
|
14593
|
-
|
|
15698
|
+
log6.error("trigger.error", { scheduleId, projectId, error: err instanceof Error ? err.message : String(err) });
|
|
14594
15699
|
}
|
|
14595
15700
|
}
|
|
14596
15701
|
};
|
|
14597
15702
|
|
|
14598
15703
|
// src/notifier.ts
|
|
14599
|
-
import { eq as
|
|
14600
|
-
import
|
|
14601
|
-
var
|
|
15704
|
+
import { eq as eq25, desc as desc10, and as and13, or as or2 } from "drizzle-orm";
|
|
15705
|
+
import crypto24 from "crypto";
|
|
15706
|
+
var log7 = createLogger("Notifier");
|
|
14602
15707
|
var Notifier = class {
|
|
14603
15708
|
db;
|
|
14604
15709
|
serverUrl;
|
|
@@ -14608,26 +15713,26 @@ var Notifier = class {
|
|
|
14608
15713
|
}
|
|
14609
15714
|
/** Called after a run completes (success, partial, or failed). */
|
|
14610
15715
|
async onRunCompleted(runId, projectId) {
|
|
14611
|
-
|
|
14612
|
-
const notifs = this.db.select().from(notifications).where(
|
|
15716
|
+
log7.info("run.completed", { runId, projectId });
|
|
15717
|
+
const notifs = this.db.select().from(notifications).where(eq25(notifications.projectId, projectId)).all().filter((n) => n.enabled === 1);
|
|
14613
15718
|
if (notifs.length === 0) {
|
|
14614
|
-
|
|
15719
|
+
log7.info("notifications.none-enabled", { projectId });
|
|
14615
15720
|
return;
|
|
14616
15721
|
}
|
|
14617
|
-
|
|
14618
|
-
const run = this.db.select().from(runs).where(
|
|
15722
|
+
log7.info("notifications.found", { projectId, count: notifs.length });
|
|
15723
|
+
const run = this.db.select().from(runs).where(eq25(runs.id, runId)).get();
|
|
14619
15724
|
if (!run) {
|
|
14620
|
-
|
|
15725
|
+
log7.error("run.not-found", { runId, msg: "skipping notification dispatch" });
|
|
14621
15726
|
return;
|
|
14622
15727
|
}
|
|
14623
|
-
const project = this.db.select().from(projects).where(
|
|
15728
|
+
const project = this.db.select().from(projects).where(eq25(projects.id, projectId)).get();
|
|
14624
15729
|
if (!project) {
|
|
14625
|
-
|
|
15730
|
+
log7.error("project.not-found", { projectId, msg: "skipping notification dispatch" });
|
|
14626
15731
|
return;
|
|
14627
15732
|
}
|
|
14628
15733
|
const transitions = this.computeTransitions(runId, projectId);
|
|
14629
15734
|
const events = [];
|
|
14630
|
-
|
|
15735
|
+
log7.info("run.status", { runId: run.id, status: run.status, projectId });
|
|
14631
15736
|
if (run.status === "completed" || run.status === "partial") {
|
|
14632
15737
|
events.push("run.completed");
|
|
14633
15738
|
}
|
|
@@ -14643,7 +15748,7 @@ var Notifier = class {
|
|
|
14643
15748
|
if (!config.url) continue;
|
|
14644
15749
|
const subscribedEvents = config.events;
|
|
14645
15750
|
const matchingEvents = events.filter((e) => subscribedEvents.includes(e));
|
|
14646
|
-
|
|
15751
|
+
log7.info("notification.match", { notificationId: notif.id, subscribedEvents, matchedEvents: matchingEvents });
|
|
14647
15752
|
if (matchingEvents.length === 0) continue;
|
|
14648
15753
|
for (const event of matchingEvents) {
|
|
14649
15754
|
const relevantTransitions = event === "citation.lost" ? lostTransitions : event === "citation.gained" ? gainedTransitions : transitions;
|
|
@@ -14667,11 +15772,11 @@ var Notifier = class {
|
|
|
14667
15772
|
if (criticalInsights.length > 0) insightEvents.push("insight.critical");
|
|
14668
15773
|
if (highInsights.length > 0) insightEvents.push("insight.high");
|
|
14669
15774
|
if (insightEvents.length === 0) return;
|
|
14670
|
-
const notifs = this.db.select().from(notifications).where(
|
|
15775
|
+
const notifs = this.db.select().from(notifications).where(eq25(notifications.projectId, projectId)).all().filter((n) => n.enabled === 1);
|
|
14671
15776
|
if (notifs.length === 0) return;
|
|
14672
|
-
const run = this.db.select().from(runs).where(
|
|
15777
|
+
const run = this.db.select().from(runs).where(eq25(runs.id, runId)).get();
|
|
14673
15778
|
if (!run) return;
|
|
14674
|
-
const project = this.db.select().from(projects).where(
|
|
15779
|
+
const project = this.db.select().from(projects).where(eq25(projects.id, projectId)).get();
|
|
14675
15780
|
if (!project) return;
|
|
14676
15781
|
for (const notif of notifs) {
|
|
14677
15782
|
const config = parseJsonColumn(notif.config, { url: "", events: [] });
|
|
@@ -14702,11 +15807,11 @@ var Notifier = class {
|
|
|
14702
15807
|
}
|
|
14703
15808
|
computeTransitions(runId, projectId) {
|
|
14704
15809
|
const recentRuns = this.db.select().from(runs).where(
|
|
14705
|
-
|
|
14706
|
-
|
|
14707
|
-
or2(
|
|
15810
|
+
and13(
|
|
15811
|
+
eq25(runs.projectId, projectId),
|
|
15812
|
+
or2(eq25(runs.status, "completed"), eq25(runs.status, "partial"))
|
|
14708
15813
|
)
|
|
14709
|
-
).orderBy(
|
|
15814
|
+
).orderBy(desc10(runs.createdAt)).limit(2).all();
|
|
14710
15815
|
if (recentRuns.length < 2) return [];
|
|
14711
15816
|
const currentRunId = recentRuns[0].id;
|
|
14712
15817
|
const previousRunId = recentRuns[1].id;
|
|
@@ -14716,12 +15821,12 @@ var Notifier = class {
|
|
|
14716
15821
|
keyword: keywords.keyword,
|
|
14717
15822
|
provider: querySnapshots.provider,
|
|
14718
15823
|
citationState: querySnapshots.citationState
|
|
14719
|
-
}).from(querySnapshots).leftJoin(keywords,
|
|
15824
|
+
}).from(querySnapshots).leftJoin(keywords, eq25(querySnapshots.keywordId, keywords.id)).where(eq25(querySnapshots.runId, currentRunId)).all();
|
|
14720
15825
|
const previousSnapshots = this.db.select({
|
|
14721
15826
|
keywordId: querySnapshots.keywordId,
|
|
14722
15827
|
provider: querySnapshots.provider,
|
|
14723
15828
|
citationState: querySnapshots.citationState
|
|
14724
|
-
}).from(querySnapshots).where(
|
|
15829
|
+
}).from(querySnapshots).where(eq25(querySnapshots.runId, previousRunId)).all();
|
|
14725
15830
|
const prevMap = /* @__PURE__ */ new Map();
|
|
14726
15831
|
for (const s of previousSnapshots) {
|
|
14727
15832
|
prevMap.set(`${s.keywordId}:${s.provider}`, s.citationState);
|
|
@@ -14745,23 +15850,23 @@ var Notifier = class {
|
|
|
14745
15850
|
const targetLabel = redactNotificationUrl(url).urlDisplay;
|
|
14746
15851
|
const targetCheck = await resolveWebhookTarget(url);
|
|
14747
15852
|
if (!targetCheck.ok) {
|
|
14748
|
-
|
|
15853
|
+
log7.error("webhook.ssrf-blocked", { url: targetLabel, reason: targetCheck.message });
|
|
14749
15854
|
this.logDelivery(projectId, notificationId, payload.event, "failed", `SSRF: ${targetCheck.message}`);
|
|
14750
15855
|
return;
|
|
14751
15856
|
}
|
|
14752
|
-
|
|
15857
|
+
log7.info("webhook.send", { event: payload.event, url: targetLabel });
|
|
14753
15858
|
const maxRetries = 3;
|
|
14754
15859
|
const delays = [1e3, 4e3, 16e3];
|
|
14755
15860
|
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
14756
15861
|
try {
|
|
14757
15862
|
const response = await deliverWebhook(targetCheck.target, payload, webhookSecret);
|
|
14758
15863
|
if (response.status >= 200 && response.status < 300) {
|
|
14759
|
-
|
|
15864
|
+
log7.info("webhook.delivered", { event: payload.event, url: targetLabel, httpStatus: response.status });
|
|
14760
15865
|
this.logDelivery(projectId, notificationId, payload.event, "sent", null);
|
|
14761
15866
|
return;
|
|
14762
15867
|
}
|
|
14763
15868
|
const errorDetail = response.error ?? `HTTP ${response.status}`;
|
|
14764
|
-
|
|
15869
|
+
log7.warn("webhook.attempt-failed", { event: payload.event, url: targetLabel, attempt: attempt + 1, maxRetries, httpStatus: response.status, error: errorDetail });
|
|
14765
15870
|
if (attempt === maxRetries - 1) {
|
|
14766
15871
|
this.logDelivery(projectId, notificationId, payload.event, "failed", errorDetail);
|
|
14767
15872
|
}
|
|
@@ -14769,7 +15874,7 @@ var Notifier = class {
|
|
|
14769
15874
|
const errorDetail = err instanceof Error ? err.message : String(err);
|
|
14770
15875
|
if (attempt === maxRetries - 1) {
|
|
14771
15876
|
this.logDelivery(projectId, notificationId, payload.event, "failed", errorDetail);
|
|
14772
|
-
|
|
15877
|
+
log7.error("webhook.exhausted", { event: payload.event, url: targetLabel, maxRetries, error: errorDetail });
|
|
14773
15878
|
}
|
|
14774
15879
|
}
|
|
14775
15880
|
if (attempt < maxRetries - 1) {
|
|
@@ -14779,7 +15884,7 @@ var Notifier = class {
|
|
|
14779
15884
|
}
|
|
14780
15885
|
logDelivery(projectId, notificationId, event, status, error) {
|
|
14781
15886
|
this.db.insert(auditLog).values({
|
|
14782
|
-
id:
|
|
15887
|
+
id: crypto24.randomUUID(),
|
|
14783
15888
|
projectId,
|
|
14784
15889
|
actor: "scheduler",
|
|
14785
15890
|
action: `notification.${status}`,
|
|
@@ -14792,7 +15897,7 @@ var Notifier = class {
|
|
|
14792
15897
|
};
|
|
14793
15898
|
|
|
14794
15899
|
// src/run-coordinator.ts
|
|
14795
|
-
var
|
|
15900
|
+
var log8 = createLogger("RunCoordinator");
|
|
14796
15901
|
var RunCoordinator = class {
|
|
14797
15902
|
constructor(notifier, intelligenceService, onInsightsGenerated, onAeroEvent) {
|
|
14798
15903
|
this.notifier = notifier;
|
|
@@ -14814,35 +15919,35 @@ var RunCoordinator = class {
|
|
|
14814
15919
|
try {
|
|
14815
15920
|
await this.onInsightsGenerated(runId, projectId, result);
|
|
14816
15921
|
} catch (err) {
|
|
14817
|
-
|
|
15922
|
+
log8.error("insight-webhook.failed", { runId, error: err instanceof Error ? err.message : String(err) });
|
|
14818
15923
|
}
|
|
14819
15924
|
}
|
|
14820
15925
|
}
|
|
14821
15926
|
} catch (err) {
|
|
14822
|
-
|
|
15927
|
+
log8.error("intelligence.failed", { runId, error: err instanceof Error ? err.message : String(err) });
|
|
14823
15928
|
}
|
|
14824
15929
|
try {
|
|
14825
15930
|
await this.notifier.onRunCompleted(runId, projectId);
|
|
14826
15931
|
} catch (err) {
|
|
14827
|
-
|
|
15932
|
+
log8.error("notifier.failed", { runId, error: err instanceof Error ? err.message : String(err) });
|
|
14828
15933
|
}
|
|
14829
15934
|
if (this.onAeroEvent) {
|
|
14830
15935
|
try {
|
|
14831
15936
|
await this.onAeroEvent({ runId, projectId, insightCount, criticalOrHigh });
|
|
14832
15937
|
} catch (err) {
|
|
14833
|
-
|
|
15938
|
+
log8.error("aero.failed", { runId, error: err instanceof Error ? err.message : String(err) });
|
|
14834
15939
|
}
|
|
14835
15940
|
}
|
|
14836
15941
|
}
|
|
14837
15942
|
};
|
|
14838
15943
|
|
|
14839
15944
|
// src/agent/session-registry.ts
|
|
14840
|
-
import
|
|
14841
|
-
import { eq as
|
|
15945
|
+
import crypto26 from "crypto";
|
|
15946
|
+
import { eq as eq27 } from "drizzle-orm";
|
|
14842
15947
|
|
|
14843
15948
|
// src/agent/session.ts
|
|
14844
|
-
import
|
|
14845
|
-
import
|
|
15949
|
+
import fs11 from "fs";
|
|
15950
|
+
import path14 from "path";
|
|
14846
15951
|
import { Agent } from "@mariozechner/pi-agent-core";
|
|
14847
15952
|
import { registerBuiltInApiProviders } from "@mariozechner/pi-ai";
|
|
14848
15953
|
|
|
@@ -14943,26 +16048,26 @@ function buildAgentProvidersResponse(config) {
|
|
|
14943
16048
|
}
|
|
14944
16049
|
|
|
14945
16050
|
// src/agent/skill-paths.ts
|
|
14946
|
-
import
|
|
14947
|
-
import
|
|
16051
|
+
import fs9 from "fs";
|
|
16052
|
+
import path12 from "path";
|
|
14948
16053
|
import { fileURLToPath } from "url";
|
|
14949
16054
|
function resolveAeroSkillDir(pkgDir) {
|
|
14950
|
-
const here = pkgDir ??
|
|
16055
|
+
const here = pkgDir ?? path12.dirname(fileURLToPath(import.meta.url));
|
|
14951
16056
|
const candidates = [
|
|
14952
|
-
|
|
14953
|
-
|
|
14954
|
-
|
|
16057
|
+
path12.join(here, "../assets/agent-workspace/skills/aero"),
|
|
16058
|
+
path12.join(here, "../../assets/agent-workspace/skills/aero"),
|
|
16059
|
+
path12.join(here, "../../../../skills/aero")
|
|
14955
16060
|
];
|
|
14956
16061
|
for (const candidate of candidates) {
|
|
14957
|
-
if (
|
|
16062
|
+
if (fs9.existsSync(path12.join(candidate, "SKILL.md"))) return candidate;
|
|
14958
16063
|
}
|
|
14959
16064
|
throw new Error(`Aero skill not found. Searched:
|
|
14960
16065
|
${candidates.join("\n ")}`);
|
|
14961
16066
|
}
|
|
14962
16067
|
|
|
14963
16068
|
// src/agent/skill-tools.ts
|
|
14964
|
-
import
|
|
14965
|
-
import
|
|
16069
|
+
import fs10 from "fs";
|
|
16070
|
+
import path13 from "path";
|
|
14966
16071
|
import { Type } from "@sinclair/typebox";
|
|
14967
16072
|
var MAX_DOC_CHARS = 2e4;
|
|
14968
16073
|
function textResult(details) {
|
|
@@ -14983,13 +16088,13 @@ function parseDescription(body) {
|
|
|
14983
16088
|
return "(no description)";
|
|
14984
16089
|
}
|
|
14985
16090
|
function scanSkillDocs(skillDir) {
|
|
14986
|
-
const refsDir =
|
|
14987
|
-
if (!
|
|
16091
|
+
const refsDir = path13.join(skillDir ?? resolveAeroSkillDir(), "references");
|
|
16092
|
+
if (!fs10.existsSync(refsDir)) return [];
|
|
14988
16093
|
const entries = [];
|
|
14989
|
-
for (const file of
|
|
16094
|
+
for (const file of fs10.readdirSync(refsDir)) {
|
|
14990
16095
|
if (!file.endsWith(".md")) continue;
|
|
14991
|
-
const filePath =
|
|
14992
|
-
const body =
|
|
16096
|
+
const filePath = path13.join(refsDir, file);
|
|
16097
|
+
const body = fs10.readFileSync(filePath, "utf-8");
|
|
14993
16098
|
entries.push({
|
|
14994
16099
|
slug: file.replace(/\.md$/, ""),
|
|
14995
16100
|
description: parseDescription(body),
|
|
@@ -15032,8 +16137,8 @@ function buildReadSkillDocTool() {
|
|
|
15032
16137
|
availableSlugs: docs.map((d) => d.slug)
|
|
15033
16138
|
});
|
|
15034
16139
|
}
|
|
15035
|
-
const filePath =
|
|
15036
|
-
const content =
|
|
16140
|
+
const filePath = path13.join(skillDir, "references", `${match.slug}.md`);
|
|
16141
|
+
const content = fs10.readFileSync(filePath, "utf-8");
|
|
15037
16142
|
if (content.length > MAX_DOC_CHARS) {
|
|
15038
16143
|
return textResult({
|
|
15039
16144
|
slug: match.slug,
|
|
@@ -15057,8 +16162,8 @@ function buildSkillDocTools() {
|
|
|
15057
16162
|
import { Type as Type2 } from "@sinclair/typebox";
|
|
15058
16163
|
|
|
15059
16164
|
// src/agent/memory-store.ts
|
|
15060
|
-
import
|
|
15061
|
-
import { and as
|
|
16165
|
+
import crypto25 from "crypto";
|
|
16166
|
+
import { and as and14, desc as desc11, eq as eq26, like, sql as sql9 } from "drizzle-orm";
|
|
15062
16167
|
var COMPACTION_KEY_PREFIX = "compaction:";
|
|
15063
16168
|
var COMPACTION_NOTES_PER_SESSION = 3;
|
|
15064
16169
|
function rowToDto(row) {
|
|
@@ -15072,7 +16177,7 @@ function rowToDto(row) {
|
|
|
15072
16177
|
};
|
|
15073
16178
|
}
|
|
15074
16179
|
function listMemoryEntries(db, projectId, opts = {}) {
|
|
15075
|
-
const query = db.select().from(agentMemory).where(
|
|
16180
|
+
const query = db.select().from(agentMemory).where(eq26(agentMemory.projectId, projectId)).orderBy(desc11(agentMemory.updatedAt));
|
|
15076
16181
|
const rows = opts.limit === void 0 ? query.all() : query.limit(opts.limit).all();
|
|
15077
16182
|
return rows.map(rowToDto);
|
|
15078
16183
|
}
|
|
@@ -15086,7 +16191,7 @@ function upsertMemoryEntry(db, args) {
|
|
|
15086
16191
|
throw new Error(`memory key prefix "${COMPACTION_KEY_PREFIX}" is reserved for compaction notes`);
|
|
15087
16192
|
}
|
|
15088
16193
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
15089
|
-
const id =
|
|
16194
|
+
const id = crypto25.randomUUID();
|
|
15090
16195
|
db.insert(agentMemory).values({
|
|
15091
16196
|
id,
|
|
15092
16197
|
projectId: args.projectId,
|
|
@@ -15103,12 +16208,12 @@ function upsertMemoryEntry(db, args) {
|
|
|
15103
16208
|
updatedAt: now
|
|
15104
16209
|
}
|
|
15105
16210
|
}).run();
|
|
15106
|
-
const row = db.select().from(agentMemory).where(
|
|
16211
|
+
const row = db.select().from(agentMemory).where(and14(eq26(agentMemory.projectId, args.projectId), eq26(agentMemory.key, args.key))).get();
|
|
15107
16212
|
if (!row) throw new Error("memory upsert produced no row");
|
|
15108
16213
|
return rowToDto(row);
|
|
15109
16214
|
}
|
|
15110
16215
|
function deleteMemoryEntry(db, projectId, key) {
|
|
15111
|
-
const result = db.delete(agentMemory).where(
|
|
16216
|
+
const result = db.delete(agentMemory).where(and14(eq26(agentMemory.projectId, projectId), eq26(agentMemory.key, key))).run();
|
|
15112
16217
|
const changes = result.changes ?? 0;
|
|
15113
16218
|
return changes > 0;
|
|
15114
16219
|
}
|
|
@@ -15123,7 +16228,7 @@ function writeCompactionNote(db, args) {
|
|
|
15123
16228
|
}
|
|
15124
16229
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
15125
16230
|
const key = `${COMPACTION_KEY_PREFIX}${args.sessionId}:${now}`;
|
|
15126
|
-
const id =
|
|
16231
|
+
const id = crypto25.randomUUID();
|
|
15127
16232
|
let inserted;
|
|
15128
16233
|
db.transaction((tx) => {
|
|
15129
16234
|
tx.insert(agentMemory).values({
|
|
@@ -15137,16 +16242,16 @@ function writeCompactionNote(db, args) {
|
|
|
15137
16242
|
}).run();
|
|
15138
16243
|
const sessionPrefix = `${COMPACTION_KEY_PREFIX}${args.sessionId}:`;
|
|
15139
16244
|
const existing = tx.select({ id: agentMemory.id, updatedAt: agentMemory.updatedAt }).from(agentMemory).where(
|
|
15140
|
-
|
|
15141
|
-
|
|
16245
|
+
and14(
|
|
16246
|
+
eq26(agentMemory.projectId, args.projectId),
|
|
15142
16247
|
like(agentMemory.key, `${sessionPrefix}%`)
|
|
15143
16248
|
)
|
|
15144
|
-
).orderBy(
|
|
16249
|
+
).orderBy(desc11(agentMemory.updatedAt)).all();
|
|
15145
16250
|
const stale = existing.slice(COMPACTION_NOTES_PER_SESSION).map((r) => r.id);
|
|
15146
16251
|
if (stale.length > 0) {
|
|
15147
|
-
tx.delete(agentMemory).where(
|
|
16252
|
+
tx.delete(agentMemory).where(sql9`${agentMemory.id} IN (${sql9.join(stale.map((s) => sql9`${s}`), sql9`, `)})`).run();
|
|
15148
16253
|
}
|
|
15149
|
-
const row = tx.select().from(agentMemory).where(
|
|
16254
|
+
const row = tx.select().from(agentMemory).where(and14(eq26(agentMemory.projectId, args.projectId), eq26(agentMemory.key, key))).get();
|
|
15150
16255
|
if (row) inserted = rowToDto(row);
|
|
15151
16256
|
});
|
|
15152
16257
|
if (!inserted) throw new Error("compaction note write produced no row");
|
|
@@ -15293,6 +16398,35 @@ function buildGetRunTool(ctx) {
|
|
|
15293
16398
|
}
|
|
15294
16399
|
};
|
|
15295
16400
|
}
|
|
16401
|
+
var BacklinksSchema = Type2.Object({
|
|
16402
|
+
limit: Type2.Optional(
|
|
16403
|
+
Type2.Number({
|
|
16404
|
+
description: "Max linking-domain rows to include. Default 50, max 200.",
|
|
16405
|
+
minimum: 1,
|
|
16406
|
+
maximum: 200
|
|
16407
|
+
})
|
|
16408
|
+
),
|
|
16409
|
+
release: Type2.Optional(
|
|
16410
|
+
Type2.String({
|
|
16411
|
+
description: "Common Crawl release id (e.g., cc-main-2026-jan-feb-mar). Omit for the most recent release with data."
|
|
16412
|
+
})
|
|
16413
|
+
)
|
|
16414
|
+
});
|
|
16415
|
+
function buildListBacklinksTool(ctx) {
|
|
16416
|
+
return {
|
|
16417
|
+
name: "list_backlinks",
|
|
16418
|
+
label: "List backlinks",
|
|
16419
|
+
description: "Backlink summary and top linking domains from the most recent ready Common Crawl release. Off-site authority signal that correlates with citation likelihood. Returns null summary when no release sync has completed for this workspace.",
|
|
16420
|
+
parameters: BacklinksSchema,
|
|
16421
|
+
execute: async (_toolCallId, params) => {
|
|
16422
|
+
const response = await ctx.client.backlinksDomains(ctx.projectName, {
|
|
16423
|
+
limit: params.limit ?? 50,
|
|
16424
|
+
release: params.release
|
|
16425
|
+
});
|
|
16426
|
+
return textResult2(response);
|
|
16427
|
+
}
|
|
16428
|
+
};
|
|
16429
|
+
}
|
|
15296
16430
|
var RecallSchema = Type2.Object({
|
|
15297
16431
|
limit: Type2.Optional(
|
|
15298
16432
|
Type2.Number({
|
|
@@ -15323,7 +16457,8 @@ function buildReadTools(ctx) {
|
|
|
15323
16457
|
buildListKeywordsTool(ctx),
|
|
15324
16458
|
buildListCompetitorsTool(ctx),
|
|
15325
16459
|
buildGetRunTool(ctx),
|
|
15326
|
-
buildRecallTool(ctx)
|
|
16460
|
+
buildRecallTool(ctx),
|
|
16461
|
+
buildListBacklinksTool(ctx)
|
|
15327
16462
|
];
|
|
15328
16463
|
}
|
|
15329
16464
|
var RunSweepSchema = Type2.Object({
|
|
@@ -15557,10 +16692,10 @@ function ensureBuiltinsRegistered() {
|
|
|
15557
16692
|
}
|
|
15558
16693
|
function loadAeroSystemPrompt(pkgDir) {
|
|
15559
16694
|
const skillDir = resolveAeroSkillDir(pkgDir);
|
|
15560
|
-
const skillBody =
|
|
15561
|
-
const soulPath =
|
|
15562
|
-
if (!
|
|
15563
|
-
const soulBody =
|
|
16695
|
+
const skillBody = fs11.readFileSync(path14.join(skillDir, "SKILL.md"), "utf-8");
|
|
16696
|
+
const soulPath = path14.join(skillDir, "soul.md");
|
|
16697
|
+
if (!fs11.existsSync(soulPath)) return skillBody;
|
|
16698
|
+
const soulBody = fs11.readFileSync(soulPath, "utf-8");
|
|
15564
16699
|
return `${soulBody.trimEnd()}
|
|
15565
16700
|
|
|
15566
16701
|
---
|
|
@@ -15744,7 +16879,7 @@ async function compactMessages(args) {
|
|
|
15744
16879
|
}
|
|
15745
16880
|
|
|
15746
16881
|
// src/agent/session-registry.ts
|
|
15747
|
-
var
|
|
16882
|
+
var log9 = createLogger("SessionRegistry");
|
|
15748
16883
|
var MAX_HYDRATE_NOTES = 20;
|
|
15749
16884
|
var MAX_HYDRATE_BYTES = 32 * 1024;
|
|
15750
16885
|
function escapeMemoryFragment(value) {
|
|
@@ -15793,7 +16928,7 @@ var SessionRegistry = class {
|
|
|
15793
16928
|
modelProvider: effectiveProvider,
|
|
15794
16929
|
modelId: effectiveModelId,
|
|
15795
16930
|
updatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
15796
|
-
}).where(
|
|
16931
|
+
}).where(eq27(agentSessions.projectId, projectId)).run();
|
|
15797
16932
|
}
|
|
15798
16933
|
const agent2 = createAeroSession({
|
|
15799
16934
|
projectName,
|
|
@@ -15975,13 +17110,13 @@ ${lines.join("\n")}
|
|
|
15975
17110
|
agent.state.messages = result.messages;
|
|
15976
17111
|
agent.state.systemPrompt = this.buildHydratedSystemPrompt(projectId, row.systemPrompt);
|
|
15977
17112
|
this.save(projectName);
|
|
15978
|
-
|
|
17113
|
+
log9.info("compaction.completed", {
|
|
15979
17114
|
projectName,
|
|
15980
17115
|
removedCount: result.removedCount,
|
|
15981
17116
|
summaryBytes: Buffer.byteLength(result.summary, "utf8")
|
|
15982
17117
|
});
|
|
15983
17118
|
} catch (err) {
|
|
15984
|
-
|
|
17119
|
+
log9.error("compaction.failed", {
|
|
15985
17120
|
projectName,
|
|
15986
17121
|
error: err instanceof Error ? err.message : String(err)
|
|
15987
17122
|
});
|
|
@@ -16011,7 +17146,7 @@ ${lines.join("\n")}
|
|
|
16011
17146
|
modelProvider: nextProvider,
|
|
16012
17147
|
modelId: nextModelId,
|
|
16013
17148
|
updatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
16014
|
-
}).where(
|
|
17149
|
+
}).where(eq27(agentSessions.projectId, projectId)).run();
|
|
16015
17150
|
}
|
|
16016
17151
|
/** Persist a session's transcript back to the DB. Call after any run settles. */
|
|
16017
17152
|
save(projectName) {
|
|
@@ -16078,7 +17213,7 @@ ${lines.join("\n")}
|
|
|
16078
17213
|
await agent.prompt(msgs);
|
|
16079
17214
|
this.save(projectName);
|
|
16080
17215
|
} catch (err) {
|
|
16081
|
-
|
|
17216
|
+
log9.error("drain.failed", {
|
|
16082
17217
|
projectName,
|
|
16083
17218
|
error: err instanceof Error ? err.message : String(err)
|
|
16084
17219
|
});
|
|
@@ -16173,17 +17308,17 @@ ${lines.join("\n")}
|
|
|
16173
17308
|
return id;
|
|
16174
17309
|
}
|
|
16175
17310
|
tryResolveProjectId(projectName) {
|
|
16176
|
-
const row = this.opts.db.select({ id: projects.id }).from(projects).where(
|
|
17311
|
+
const row = this.opts.db.select({ id: projects.id }).from(projects).where(eq27(projects.name, projectName)).get();
|
|
16177
17312
|
return row?.id;
|
|
16178
17313
|
}
|
|
16179
17314
|
loadRow(projectId) {
|
|
16180
|
-
const row = this.opts.db.select().from(agentSessions).where(
|
|
17315
|
+
const row = this.opts.db.select().from(agentSessions).where(eq27(agentSessions.projectId, projectId)).get();
|
|
16181
17316
|
return row ?? null;
|
|
16182
17317
|
}
|
|
16183
17318
|
insertRow(params) {
|
|
16184
17319
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
16185
17320
|
this.opts.db.insert(agentSessions).values({
|
|
16186
|
-
id:
|
|
17321
|
+
id: crypto26.randomUUID(),
|
|
16187
17322
|
projectId: params.projectId,
|
|
16188
17323
|
systemPrompt: params.systemPrompt,
|
|
16189
17324
|
modelProvider: params.provider ?? params.modelProvider ?? AgentProviderIds.claude,
|
|
@@ -16196,14 +17331,14 @@ ${lines.join("\n")}
|
|
|
16196
17331
|
}
|
|
16197
17332
|
updateRow(projectId, patch) {
|
|
16198
17333
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
16199
|
-
this.opts.db.update(agentSessions).set({ ...patch, updatedAt: now }).where(
|
|
17334
|
+
this.opts.db.update(agentSessions).set({ ...patch, updatedAt: now }).where(eq27(agentSessions.projectId, projectId)).run();
|
|
16200
17335
|
}
|
|
16201
17336
|
};
|
|
16202
17337
|
|
|
16203
17338
|
// src/agent/agent-routes.ts
|
|
16204
|
-
import { eq as
|
|
17339
|
+
import { eq as eq28 } from "drizzle-orm";
|
|
16205
17340
|
function resolveProject2(db, name) {
|
|
16206
|
-
const row = db.select({ id: projects.id, name: projects.name }).from(projects).where(
|
|
17341
|
+
const row = db.select({ id: projects.id, name: projects.name }).from(projects).where(eq28(projects.name, name)).get();
|
|
16207
17342
|
if (!row) throw notFound("project", name);
|
|
16208
17343
|
return row;
|
|
16209
17344
|
}
|
|
@@ -16212,7 +17347,7 @@ function registerAgentRoutes(app, opts) {
|
|
|
16212
17347
|
"/projects/:name/agent/transcript",
|
|
16213
17348
|
async (request) => {
|
|
16214
17349
|
const project = resolveProject2(opts.db, request.params.name);
|
|
16215
|
-
const row = opts.db.select().from(agentSessions).where(
|
|
17350
|
+
const row = opts.db.select().from(agentSessions).where(eq28(agentSessions.projectId, project.id)).get();
|
|
16216
17351
|
if (!row) {
|
|
16217
17352
|
return { messages: [], modelProvider: null, modelId: null, updatedAt: null };
|
|
16218
17353
|
}
|
|
@@ -16236,7 +17371,7 @@ function registerAgentRoutes(app, opts) {
|
|
|
16236
17371
|
async (request) => {
|
|
16237
17372
|
const project = resolveProject2(opts.db, request.params.name);
|
|
16238
17373
|
opts.sessionRegistry.reset(project.name);
|
|
16239
|
-
opts.db.update(agentSessions).set({ messages: "[]", followUpQueue: "[]", updatedAt: (/* @__PURE__ */ new Date()).toISOString() }).where(
|
|
17374
|
+
opts.db.update(agentSessions).set({ messages: "[]", followUpQueue: "[]", updatedAt: (/* @__PURE__ */ new Date()).toISOString() }).where(eq28(agentSessions.projectId, project.id)).run();
|
|
16240
17375
|
return { status: "reset" };
|
|
16241
17376
|
}
|
|
16242
17377
|
);
|
|
@@ -16398,9 +17533,9 @@ var ApiClient = class {
|
|
|
16398
17533
|
}
|
|
16399
17534
|
return this.probePromise;
|
|
16400
17535
|
}
|
|
16401
|
-
async request(method,
|
|
17536
|
+
async request(method, path16, body) {
|
|
16402
17537
|
await this.probeBasePath();
|
|
16403
|
-
const url = `${this.baseUrl}${
|
|
17538
|
+
const url = `${this.baseUrl}${path16}`;
|
|
16404
17539
|
const serializedBody = body != null ? JSON.stringify(body) : void 0;
|
|
16405
17540
|
const headers = {
|
|
16406
17541
|
"Authorization": `Bearer ${this.apiKey}`,
|
|
@@ -16488,9 +17623,9 @@ var ApiClient = class {
|
|
|
16488
17623
|
* structured-error behavior of `request()`; the caller reads `res.body`
|
|
16489
17624
|
* and releases the response when done.
|
|
16490
17625
|
*/
|
|
16491
|
-
async streamPost(
|
|
17626
|
+
async streamPost(path16, body, signal) {
|
|
16492
17627
|
await this.probeBasePath();
|
|
16493
|
-
const url = `${this.baseUrl}${
|
|
17628
|
+
const url = `${this.baseUrl}${path16}`;
|
|
16494
17629
|
const headers = {
|
|
16495
17630
|
Authorization: `Bearer ${this.apiKey}`,
|
|
16496
17631
|
"Content-Type": "application/json",
|
|
@@ -16891,6 +18026,46 @@ var ApiClient = class {
|
|
|
16891
18026
|
const qs = limit ? `?limit=${limit}` : "";
|
|
16892
18027
|
return this.request("GET", `/projects/${encodeURIComponent(project)}/health/history${qs}`);
|
|
16893
18028
|
}
|
|
18029
|
+
// --- Backlinks ---------------------------------------------------------
|
|
18030
|
+
async backlinksStatus() {
|
|
18031
|
+
return this.request("GET", "/backlinks/status");
|
|
18032
|
+
}
|
|
18033
|
+
async backlinksInstall() {
|
|
18034
|
+
return this.request("POST", "/backlinks/install");
|
|
18035
|
+
}
|
|
18036
|
+
async backlinksTriggerSync(release) {
|
|
18037
|
+
return this.request("POST", "/backlinks/syncs", { release });
|
|
18038
|
+
}
|
|
18039
|
+
async backlinksLatestSync() {
|
|
18040
|
+
return this.request("GET", "/backlinks/syncs/latest");
|
|
18041
|
+
}
|
|
18042
|
+
async backlinksListSyncs() {
|
|
18043
|
+
return this.request("GET", "/backlinks/syncs");
|
|
18044
|
+
}
|
|
18045
|
+
async backlinksCachedReleases() {
|
|
18046
|
+
return this.request("GET", "/backlinks/releases");
|
|
18047
|
+
}
|
|
18048
|
+
async backlinksPruneCache(release) {
|
|
18049
|
+
return this.request("DELETE", `/backlinks/cache/${encodeURIComponent(release)}`);
|
|
18050
|
+
}
|
|
18051
|
+
async backlinksExtract(project, release) {
|
|
18052
|
+
return this.request("POST", `/projects/${encodeURIComponent(project)}/backlinks/extract`, release ? { release } : {});
|
|
18053
|
+
}
|
|
18054
|
+
async backlinksSummary(project, release) {
|
|
18055
|
+
const qs = release ? `?release=${encodeURIComponent(release)}` : "";
|
|
18056
|
+
return this.request("GET", `/projects/${encodeURIComponent(project)}/backlinks/summary${qs}`);
|
|
18057
|
+
}
|
|
18058
|
+
async backlinksDomains(project, opts = {}) {
|
|
18059
|
+
const qs = new URLSearchParams();
|
|
18060
|
+
if (opts.limit !== void 0) qs.set("limit", String(opts.limit));
|
|
18061
|
+
if (opts.offset !== void 0) qs.set("offset", String(opts.offset));
|
|
18062
|
+
if (opts.release) qs.set("release", opts.release);
|
|
18063
|
+
const suffix = qs.toString() ? `?${qs.toString()}` : "";
|
|
18064
|
+
return this.request("GET", `/projects/${encodeURIComponent(project)}/backlinks/domains${suffix}`);
|
|
18065
|
+
}
|
|
18066
|
+
async backlinksHistory(project) {
|
|
18067
|
+
return this.request("GET", `/projects/${encodeURIComponent(project)}/backlinks/history`);
|
|
18068
|
+
}
|
|
16894
18069
|
};
|
|
16895
18070
|
|
|
16896
18071
|
// src/snapshot-service.ts
|
|
@@ -16915,13 +18090,13 @@ function extractHostname(domain) {
|
|
|
16915
18090
|
function fetchWithPinnedAddress(target) {
|
|
16916
18091
|
return new Promise((resolve) => {
|
|
16917
18092
|
const port = target.url.port ? Number(target.url.port) : 443;
|
|
16918
|
-
const
|
|
18093
|
+
const path16 = target.url.pathname + target.url.search;
|
|
16919
18094
|
const req = https2.request(
|
|
16920
18095
|
{
|
|
16921
18096
|
hostname: target.address,
|
|
16922
18097
|
family: target.family,
|
|
16923
18098
|
port,
|
|
16924
|
-
path:
|
|
18099
|
+
path: path16,
|
|
16925
18100
|
method: "GET",
|
|
16926
18101
|
timeout: FETCH_TIMEOUT_MS,
|
|
16927
18102
|
servername: target.url.hostname,
|
|
@@ -17013,7 +18188,7 @@ function formatAuditFactorScore(factor) {
|
|
|
17013
18188
|
}
|
|
17014
18189
|
|
|
17015
18190
|
// src/snapshot-service.ts
|
|
17016
|
-
var
|
|
18191
|
+
var log10 = createLogger("Snapshot");
|
|
17017
18192
|
var ANALYSIS_PROVIDER_PRIORITY = ["openai", "claude", "gemini", "perplexity", "local"];
|
|
17018
18193
|
var SNAPSHOT_QUERY_COUNT = 6;
|
|
17019
18194
|
var ProviderExecutionGate2 = class {
|
|
@@ -17156,7 +18331,7 @@ var SnapshotService = class {
|
|
|
17156
18331
|
return mapAuditReport(report);
|
|
17157
18332
|
} catch (err) {
|
|
17158
18333
|
const message = err instanceof Error ? err.message : String(err);
|
|
17159
|
-
|
|
18334
|
+
log10.warn("audit.failed", { homepageUrl, error: message });
|
|
17160
18335
|
return {
|
|
17161
18336
|
url: homepageUrl,
|
|
17162
18337
|
finalUrl: homepageUrl,
|
|
@@ -17186,7 +18361,7 @@ var SnapshotService = class {
|
|
|
17186
18361
|
phrases: parsedPhrases
|
|
17187
18362
|
};
|
|
17188
18363
|
} catch (err) {
|
|
17189
|
-
|
|
18364
|
+
log10.warn("profile.generation-failed", {
|
|
17190
18365
|
domain: ctx.domain,
|
|
17191
18366
|
provider: ctx.analysisProvider.adapter.name,
|
|
17192
18367
|
error: err instanceof Error ? err.message : String(err)
|
|
@@ -17328,7 +18503,7 @@ var SnapshotService = class {
|
|
|
17328
18503
|
recommendedActions: uniqueStrings(parsed.recommendedActions ?? []).slice(0, 4)
|
|
17329
18504
|
};
|
|
17330
18505
|
} catch (err) {
|
|
17331
|
-
|
|
18506
|
+
log10.warn("response.analysis-failed", {
|
|
17332
18507
|
provider: ctx.analysisProvider.adapter.name,
|
|
17333
18508
|
error: err instanceof Error ? err.message : String(err)
|
|
17334
18509
|
});
|
|
@@ -17611,9 +18786,9 @@ function clipText(value, length) {
|
|
|
17611
18786
|
}
|
|
17612
18787
|
|
|
17613
18788
|
// src/server.ts
|
|
17614
|
-
var _require2 =
|
|
18789
|
+
var _require2 = createRequire3(import.meta.url);
|
|
17615
18790
|
var { version: PKG_VERSION } = _require2("../package.json");
|
|
17616
|
-
var
|
|
18791
|
+
var log11 = createLogger("Server");
|
|
17617
18792
|
var DEFAULT_QUOTA = {
|
|
17618
18793
|
maxConcurrency: 2,
|
|
17619
18794
|
maxRequestsPerMinute: 10,
|
|
@@ -17644,7 +18819,7 @@ function summarizeProviderConfig(provider, config) {
|
|
|
17644
18819
|
};
|
|
17645
18820
|
}
|
|
17646
18821
|
function hashApiKey(key) {
|
|
17647
|
-
return
|
|
18822
|
+
return crypto27.createHash("sha256").update(key).digest("hex");
|
|
17648
18823
|
}
|
|
17649
18824
|
function parseCookies2(header) {
|
|
17650
18825
|
if (!header) return {};
|
|
@@ -17700,7 +18875,7 @@ function applyLegacyCredentials(rows, config) {
|
|
|
17700
18875
|
}
|
|
17701
18876
|
if (migratedGoogle > 0) {
|
|
17702
18877
|
saveConfigPatch({ google: config.google });
|
|
17703
|
-
|
|
18878
|
+
log11.info("credentials.migrated", { type: "google", count: migratedGoogle });
|
|
17704
18879
|
}
|
|
17705
18880
|
let migratedGa4 = 0;
|
|
17706
18881
|
for (const row of rows.ga4) {
|
|
@@ -17718,7 +18893,7 @@ function applyLegacyCredentials(rows, config) {
|
|
|
17718
18893
|
}
|
|
17719
18894
|
if (migratedGa4 > 0) {
|
|
17720
18895
|
saveConfigPatch({ ga4: config.ga4 });
|
|
17721
|
-
|
|
18896
|
+
log11.info("credentials.migrated", { type: "ga4", count: migratedGa4 });
|
|
17722
18897
|
}
|
|
17723
18898
|
}
|
|
17724
18899
|
async function createServer(opts) {
|
|
@@ -17750,11 +18925,11 @@ async function createServer(opts) {
|
|
|
17750
18925
|
applyLegacyCredentials(legacyRows, opts.config);
|
|
17751
18926
|
dropLegacyCredentialColumns(opts.db);
|
|
17752
18927
|
} catch (err) {
|
|
17753
|
-
|
|
18928
|
+
log11.warn("credentials.migration.failed", {
|
|
17754
18929
|
error: err instanceof Error ? err.message : String(err)
|
|
17755
18930
|
});
|
|
17756
18931
|
}
|
|
17757
|
-
|
|
18932
|
+
log11.info("providers.configured", { providers: Object.keys(providers).filter((k) => {
|
|
17758
18933
|
const p = providers[k];
|
|
17759
18934
|
return p?.apiKey || p?.baseUrl || p?.vertexProject;
|
|
17760
18935
|
}) });
|
|
@@ -17802,7 +18977,7 @@ async function createServer(opts) {
|
|
|
17802
18977
|
intelligenceService,
|
|
17803
18978
|
(runId, projectId, result) => notifier.dispatchInsightWebhooks(runId, projectId, result),
|
|
17804
18979
|
async ({ runId, projectId, insightCount, criticalOrHigh }) => {
|
|
17805
|
-
const project = opts.db.select({ name: projects.name }).from(projects).where(
|
|
18980
|
+
const project = opts.db.select({ name: projects.name }).from(projects).where(eq29(projects.id, projectId)).get();
|
|
17806
18981
|
if (!project) return;
|
|
17807
18982
|
sessionRegistry.queueFollowUp(project.name, {
|
|
17808
18983
|
role: "user",
|
|
@@ -17814,8 +18989,8 @@ async function createServer(opts) {
|
|
|
17814
18989
|
);
|
|
17815
18990
|
jobRunner.onRunCompleted = (runId, projectId) => runCoordinator.onRunCompleted(runId, projectId);
|
|
17816
18991
|
const snapshotService = new SnapshotService(registry);
|
|
17817
|
-
const orphanedOpenClawDir =
|
|
17818
|
-
if (
|
|
18992
|
+
const orphanedOpenClawDir = path15.join(os6.homedir(), ".openclaw-aero");
|
|
18993
|
+
if (fs12.existsSync(orphanedOpenClawDir)) {
|
|
17819
18994
|
app.log.warn(
|
|
17820
18995
|
{ path: orphanedOpenClawDir },
|
|
17821
18996
|
"OpenClaw gateway is no longer used. Remove ~/.openclaw-aero/ manually to reclaim the directory."
|
|
@@ -17896,7 +19071,7 @@ async function createServer(opts) {
|
|
|
17896
19071
|
return removed;
|
|
17897
19072
|
}
|
|
17898
19073
|
};
|
|
17899
|
-
const googleStateSecret = process.env.GOOGLE_STATE_SECRET ??
|
|
19074
|
+
const googleStateSecret = process.env.GOOGLE_STATE_SECRET ?? crypto27.randomBytes(32).toString("hex");
|
|
17900
19075
|
const googleConnectionStore = {
|
|
17901
19076
|
listConnections: (domain) => listGoogleConnections(opts.config, domain),
|
|
17902
19077
|
getConnection: (domain, connectionType) => getGoogleConnection(opts.config, domain, connectionType),
|
|
@@ -17942,11 +19117,11 @@ async function createServer(opts) {
|
|
|
17942
19117
|
const apiPrefix = basePath ? `${basePath}api/v1` : "/api/v1";
|
|
17943
19118
|
if (opts.config.apiKey) {
|
|
17944
19119
|
const keyHash = hashApiKey(opts.config.apiKey);
|
|
17945
|
-
const existing = opts.db.select().from(apiKeys).where(
|
|
19120
|
+
const existing = opts.db.select().from(apiKeys).where(eq29(apiKeys.keyHash, keyHash)).get();
|
|
17946
19121
|
if (!existing) {
|
|
17947
19122
|
const prefix = opts.config.apiKey.slice(0, 12);
|
|
17948
19123
|
opts.db.insert(apiKeys).values({
|
|
17949
|
-
id: `key_${
|
|
19124
|
+
id: `key_${crypto27.randomBytes(8).toString("hex")}`,
|
|
17950
19125
|
name: "default",
|
|
17951
19126
|
keyHash,
|
|
17952
19127
|
keyPrefix: prefix,
|
|
@@ -17970,7 +19145,7 @@ async function createServer(opts) {
|
|
|
17970
19145
|
};
|
|
17971
19146
|
const createSession = (apiKeyId) => {
|
|
17972
19147
|
pruneExpiredSessions();
|
|
17973
|
-
const sessionId =
|
|
19148
|
+
const sessionId = crypto27.randomBytes(32).toString("hex");
|
|
17974
19149
|
sessions.set(sessionId, {
|
|
17975
19150
|
apiKeyId,
|
|
17976
19151
|
expiresAt: Date.now() + SESSION_TTL_MS
|
|
@@ -17994,7 +19169,7 @@ async function createServer(opts) {
|
|
|
17994
19169
|
};
|
|
17995
19170
|
const getDefaultApiKey = () => {
|
|
17996
19171
|
if (!opts.config.apiKey) return void 0;
|
|
17997
|
-
return opts.db.select().from(apiKeys).where(
|
|
19172
|
+
return opts.db.select().from(apiKeys).where(eq29(apiKeys.keyHash, hashApiKey(opts.config.apiKey))).get();
|
|
17998
19173
|
};
|
|
17999
19174
|
const createPasswordSession = (reply) => {
|
|
18000
19175
|
const key = getDefaultApiKey();
|
|
@@ -18051,12 +19226,12 @@ async function createServer(opts) {
|
|
|
18051
19226
|
return reply.send({ authenticated: true });
|
|
18052
19227
|
}
|
|
18053
19228
|
if (apiKey) {
|
|
18054
|
-
const key = opts.db.select().from(apiKeys).where(
|
|
19229
|
+
const key = opts.db.select().from(apiKeys).where(eq29(apiKeys.keyHash, hashApiKey(apiKey))).get();
|
|
18055
19230
|
if (!key || key.revokedAt) {
|
|
18056
19231
|
const err2 = authInvalid();
|
|
18057
19232
|
return reply.status(err2.statusCode).send(err2.toJSON());
|
|
18058
19233
|
}
|
|
18059
|
-
opts.db.update(apiKeys).set({ lastUsedAt: (/* @__PURE__ */ new Date()).toISOString() }).where(
|
|
19234
|
+
opts.db.update(apiKeys).set({ lastUsedAt: (/* @__PURE__ */ new Date()).toISOString() }).where(eq29(apiKeys.id, key.id)).run();
|
|
18060
19235
|
const sessionId = createSession(key.id);
|
|
18061
19236
|
reply.header("set-cookie", serializeSessionCookie({
|
|
18062
19237
|
name: SESSION_COOKIE_NAME,
|
|
@@ -18123,6 +19298,54 @@ async function createServer(opts) {
|
|
|
18123
19298
|
app.log.error({ runId, err }, "Inspect sitemap failed");
|
|
18124
19299
|
});
|
|
18125
19300
|
},
|
|
19301
|
+
getBacklinksStatus: () => ({
|
|
19302
|
+
duckdbInstalled: isDuckdbInstalled(),
|
|
19303
|
+
duckdbVersion: readInstalledVersion() ?? void 0,
|
|
19304
|
+
duckdbSpec: DUCKDB_SPEC,
|
|
19305
|
+
pluginDir: PLUGIN_DIR
|
|
19306
|
+
}),
|
|
19307
|
+
onInstallBacklinks: async () => {
|
|
19308
|
+
const result = await installDuckdb({ onLog: (line) => app.log.info({ line }, "duckdb install") });
|
|
19309
|
+
return {
|
|
19310
|
+
installed: true,
|
|
19311
|
+
version: result.version,
|
|
19312
|
+
path: result.path,
|
|
19313
|
+
alreadyPresent: result.alreadyPresent
|
|
19314
|
+
};
|
|
19315
|
+
},
|
|
19316
|
+
onReleaseSyncRequested: (syncId, release) => {
|
|
19317
|
+
executeReleaseSync(opts.db, syncId, { release }).catch((err) => {
|
|
19318
|
+
app.log.error({ syncId, err }, "Common Crawl release sync failed");
|
|
19319
|
+
});
|
|
19320
|
+
},
|
|
19321
|
+
onBacklinkExtractRequested: (runId, projectId, release) => {
|
|
19322
|
+
executeBacklinkExtract(opts.db, runId, projectId, { release }).catch((err) => {
|
|
19323
|
+
app.log.error({ runId, err }, "Backlink extract failed");
|
|
19324
|
+
});
|
|
19325
|
+
},
|
|
19326
|
+
onBacklinksPruneCache: (release) => {
|
|
19327
|
+
try {
|
|
19328
|
+
pruneCachedRelease(release);
|
|
19329
|
+
} catch (err) {
|
|
19330
|
+
app.log.error({ release, err }, "Failed to prune cached release");
|
|
19331
|
+
}
|
|
19332
|
+
},
|
|
19333
|
+
listCachedReleases: () => {
|
|
19334
|
+
const cached = listCachedReleases();
|
|
19335
|
+
const syncByRelease = /* @__PURE__ */ new Map();
|
|
19336
|
+
for (const row of opts.db.select().from(ccReleaseSyncs).all()) {
|
|
19337
|
+
syncByRelease.set(row.release, { status: row.status, updatedAt: row.updatedAt });
|
|
19338
|
+
}
|
|
19339
|
+
return cached.map((entry) => {
|
|
19340
|
+
const sync = syncByRelease.get(entry.release);
|
|
19341
|
+
return {
|
|
19342
|
+
release: entry.release,
|
|
19343
|
+
syncStatus: sync?.status ?? null,
|
|
19344
|
+
bytes: entry.bytes,
|
|
19345
|
+
lastUsedAt: entry.lastUsedAt
|
|
19346
|
+
};
|
|
19347
|
+
});
|
|
19348
|
+
},
|
|
18126
19349
|
openApiInfo: {
|
|
18127
19350
|
title: "Canonry API",
|
|
18128
19351
|
version: PKG_VERSION,
|
|
@@ -18203,7 +19426,7 @@ async function createServer(opts) {
|
|
|
18203
19426
|
const targetProjectIds = affectedProjectIds.length > 0 ? affectedProjectIds : [null];
|
|
18204
19427
|
const createdAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
18205
19428
|
opts.db.insert(auditLog).values(targetProjectIds.map((projectId) => ({
|
|
18206
|
-
id:
|
|
19429
|
+
id: crypto27.randomUUID(),
|
|
18207
19430
|
projectId,
|
|
18208
19431
|
actor: "api",
|
|
18209
19432
|
action: existing ? "provider.updated" : "provider.created",
|
|
@@ -18334,10 +19557,10 @@ async function createServer(opts) {
|
|
|
18334
19557
|
return snapshotService.createReport(input);
|
|
18335
19558
|
}
|
|
18336
19559
|
});
|
|
18337
|
-
const dirname =
|
|
18338
|
-
const assetsDir =
|
|
18339
|
-
if (
|
|
18340
|
-
const indexPath =
|
|
19560
|
+
const dirname = path15.dirname(fileURLToPath2(import.meta.url));
|
|
19561
|
+
const assetsDir = path15.join(dirname, "..", "assets");
|
|
19562
|
+
if (fs12.existsSync(assetsDir)) {
|
|
19563
|
+
const indexPath = path15.join(assetsDir, "index.html");
|
|
18341
19564
|
const injectConfig = (html) => {
|
|
18342
19565
|
const clientConfig = {};
|
|
18343
19566
|
if (basePath) clientConfig.basePath = basePath;
|
|
@@ -18355,8 +19578,8 @@ async function createServer(opts) {
|
|
|
18355
19578
|
index: false
|
|
18356
19579
|
});
|
|
18357
19580
|
const serveIndex = (_request, reply) => {
|
|
18358
|
-
if (
|
|
18359
|
-
const html =
|
|
19581
|
+
if (fs12.existsSync(indexPath)) {
|
|
19582
|
+
const html = fs12.readFileSync(indexPath, "utf-8");
|
|
18360
19583
|
return reply.type("text/html").send(injectConfig(html));
|
|
18361
19584
|
}
|
|
18362
19585
|
return reply.status(404).send({ error: "Dashboard not built" });
|
|
@@ -18376,8 +19599,8 @@ async function createServer(opts) {
|
|
|
18376
19599
|
if (basePath && !url.startsWith(basePath)) {
|
|
18377
19600
|
return reply.status(404).send({ error: "Not found", path: request.url });
|
|
18378
19601
|
}
|
|
18379
|
-
if (
|
|
18380
|
-
const html =
|
|
19602
|
+
if (fs12.existsSync(indexPath)) {
|
|
19603
|
+
const html = fs12.readFileSync(indexPath, "utf-8");
|
|
18381
19604
|
return reply.type("text/html").send(injectConfig(html));
|
|
18382
19605
|
}
|
|
18383
19606
|
return reply.status(404).send({ error: "Not found" });
|