@ainyc/canonry 2.13.2 → 2.14.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/assets/{index-CKzK0os-.js → index-BwFUCV6e.js} +116 -116
- package/assets/assets/{index-CAewPdsZ.css → index-U2SLimrz.css} +1 -1
- package/assets/index.html +2 -2
- package/dist/{chunk-DHMCIJMQ.js → chunk-3T64Y7GR.js} +125 -14
- package/dist/{chunk-UM6RDSRJ.js → chunk-FV6PY5UE.js} +20 -1
- package/dist/{chunk-XJS7NALL.js → chunk-QTS7VZXN.js} +96 -1
- package/dist/cli.js +143 -15
- package/dist/index.js +3 -3
- package/dist/{intelligence-service-54F3NGPM.js → intelligence-service-AEI46KC5.js} +1 -1
- package/dist/mcp.js +12 -1
- package/package.json +6 -6
package/assets/index.html
CHANGED
|
@@ -12,8 +12,8 @@
|
|
|
12
12
|
<link rel="icon" type="image/png" sizes="32x32" href="./favicon-32.png" />
|
|
13
13
|
<link rel="apple-touch-icon" href="./apple-touch-icon.png" />
|
|
14
14
|
<title>Canonry</title>
|
|
15
|
-
<script type="module" crossorigin src="./assets/index-
|
|
16
|
-
<link rel="stylesheet" crossorigin href="./assets/index-
|
|
15
|
+
<script type="module" crossorigin src="./assets/index-BwFUCV6e.js"></script>
|
|
16
|
+
<link rel="stylesheet" crossorigin href="./assets/index-U2SLimrz.css">
|
|
17
17
|
</head>
|
|
18
18
|
<body>
|
|
19
19
|
<div id="root"></div>
|
|
@@ -39,6 +39,7 @@ import {
|
|
|
39
39
|
locationContextSchema,
|
|
40
40
|
missingDependency,
|
|
41
41
|
normalizeProjectDomain,
|
|
42
|
+
normalizeUrlPath,
|
|
42
43
|
notFound,
|
|
43
44
|
notImplemented,
|
|
44
45
|
parseRunError,
|
|
@@ -59,7 +60,7 @@ import {
|
|
|
59
60
|
visibilityStateFromAnswerMentioned,
|
|
60
61
|
windowCutoff,
|
|
61
62
|
wordpressEnvSchema
|
|
62
|
-
} from "./chunk-
|
|
63
|
+
} from "./chunk-QTS7VZXN.js";
|
|
63
64
|
import {
|
|
64
65
|
IntelligenceService,
|
|
65
66
|
agentMemory,
|
|
@@ -97,7 +98,7 @@ import {
|
|
|
97
98
|
runs,
|
|
98
99
|
schedules,
|
|
99
100
|
usageCounters
|
|
100
|
-
} from "./chunk-
|
|
101
|
+
} from "./chunk-FV6PY5UE.js";
|
|
101
102
|
|
|
102
103
|
// src/telemetry.ts
|
|
103
104
|
import crypto from "crypto";
|
|
@@ -5371,15 +5372,14 @@ var routeCatalog = [
|
|
|
5371
5372
|
method: "post",
|
|
5372
5373
|
path: "/api/v1/backlinks/syncs",
|
|
5373
5374
|
summary: "Queue a workspace-wide Common Crawl release sync",
|
|
5374
|
-
description: "Creates a `cc_release_syncs` row and fires the sync callback. Idempotent: an existing in-flight row for the same release is returned.",
|
|
5375
|
+
description: "Creates a `cc_release_syncs` row and fires the sync callback. Idempotent: an existing in-flight row for the same release is returned. When `release` is omitted, the server auto-discovers the latest available Common Crawl release.",
|
|
5375
5376
|
tags: ["backlinks"],
|
|
5376
5377
|
requestBody: {
|
|
5377
|
-
required:
|
|
5378
|
+
required: false,
|
|
5378
5379
|
content: {
|
|
5379
5380
|
"application/json": {
|
|
5380
5381
|
schema: {
|
|
5381
5382
|
type: "object",
|
|
5382
|
-
required: ["release"],
|
|
5383
5383
|
properties: {
|
|
5384
5384
|
release: stringSchema
|
|
5385
5385
|
}
|
|
@@ -5422,6 +5422,17 @@ var routeCatalog = [
|
|
|
5422
5422
|
200: { description: "Cached release metadata returned." }
|
|
5423
5423
|
}
|
|
5424
5424
|
},
|
|
5425
|
+
{
|
|
5426
|
+
method: "get",
|
|
5427
|
+
path: "/api/v1/backlinks/latest-release",
|
|
5428
|
+
summary: "Auto-discover the latest available Common Crawl hyperlinkgraph release",
|
|
5429
|
+
description: "Probes Common Crawl by HEAD-checking quarterly release slugs and returns the newest one published. The local server caches the result for ~5 minutes so repeated calls do not hammer Common Crawl.",
|
|
5430
|
+
tags: ["backlinks"],
|
|
5431
|
+
responses: {
|
|
5432
|
+
200: { description: "Latest available release, or null when no candidate slug responded." },
|
|
5433
|
+
422: { description: "Backlinks feature is not available on this deployment." }
|
|
5434
|
+
}
|
|
5435
|
+
},
|
|
5425
5436
|
{
|
|
5426
5437
|
method: "delete",
|
|
5427
5438
|
path: "/api/v1/backlinks/cache/{release}",
|
|
@@ -8651,6 +8662,7 @@ async function ga4Routes(app, opts) {
|
|
|
8651
8662
|
projectId: project.id,
|
|
8652
8663
|
date: row.date,
|
|
8653
8664
|
landingPage: row.landingPage,
|
|
8665
|
+
landingPageNormalized: normalizeUrlPath(row.landingPage),
|
|
8654
8666
|
sessions: row.sessions,
|
|
8655
8667
|
organicSessions: row.organicSessions,
|
|
8656
8668
|
users: row.users,
|
|
@@ -8775,11 +8787,11 @@ async function ga4Routes(app, opts) {
|
|
|
8775
8787
|
periodEnd: gaTrafficSummaries.periodEnd
|
|
8776
8788
|
}).from(gaTrafficSummaries).where(eq19(gaTrafficSummaries.projectId, project.id)).get();
|
|
8777
8789
|
const rows = app.db.select({
|
|
8778
|
-
landingPage: gaTrafficSnapshots.landingPage
|
|
8790
|
+
landingPage: sql5`COALESCE(${gaTrafficSnapshots.landingPageNormalized}, ${gaTrafficSnapshots.landingPage})`,
|
|
8779
8791
|
sessions: sql5`SUM(${gaTrafficSnapshots.sessions})`,
|
|
8780
8792
|
organicSessions: sql5`SUM(${gaTrafficSnapshots.organicSessions})`,
|
|
8781
8793
|
users: sql5`SUM(${gaTrafficSnapshots.users})`
|
|
8782
|
-
}).from(gaTrafficSnapshots).where(and8(...snapshotConditions)).groupBy(gaTrafficSnapshots.landingPage).orderBy(sql5`SUM(${gaTrafficSnapshots.sessions}) DESC`).limit(limit).all();
|
|
8794
|
+
}).from(gaTrafficSnapshots).where(and8(...snapshotConditions)).groupBy(sql5`COALESCE(${gaTrafficSnapshots.landingPageNormalized}, ${gaTrafficSnapshots.landingPage})`).orderBy(sql5`SUM(${gaTrafficSnapshots.sessions}) DESC`).limit(limit).all();
|
|
8783
8795
|
const aiReferrals = app.db.select({
|
|
8784
8796
|
source: gaAiReferrals.source,
|
|
8785
8797
|
medium: gaAiReferrals.medium,
|
|
@@ -9036,11 +9048,11 @@ async function ga4Routes(app, opts) {
|
|
|
9036
9048
|
const project = resolveProject(app.db, request.params.name);
|
|
9037
9049
|
requireGa4Connection(opts, project.name, project.canonicalDomain);
|
|
9038
9050
|
const trafficPages = app.db.select({
|
|
9039
|
-
landingPage: gaTrafficSnapshots.landingPage
|
|
9051
|
+
landingPage: sql5`COALESCE(${gaTrafficSnapshots.landingPageNormalized}, ${gaTrafficSnapshots.landingPage})`,
|
|
9040
9052
|
sessions: sql5`SUM(${gaTrafficSnapshots.sessions})`,
|
|
9041
9053
|
organicSessions: sql5`SUM(${gaTrafficSnapshots.organicSessions})`,
|
|
9042
9054
|
users: sql5`SUM(${gaTrafficSnapshots.users})`
|
|
9043
|
-
}).from(gaTrafficSnapshots).where(eq19(gaTrafficSnapshots.projectId, project.id)).groupBy(gaTrafficSnapshots.landingPage).orderBy(sql5`SUM(${gaTrafficSnapshots.sessions}) DESC`).all();
|
|
9055
|
+
}).from(gaTrafficSnapshots).where(eq19(gaTrafficSnapshots.projectId, project.id)).groupBy(sql5`COALESCE(${gaTrafficSnapshots.landingPageNormalized}, ${gaTrafficSnapshots.landingPage})`).orderBy(sql5`SUM(${gaTrafficSnapshots.sessions}) DESC`).all();
|
|
9044
9056
|
return {
|
|
9045
9057
|
pages: trafficPages.map((r) => ({
|
|
9046
9058
|
landingPage: r.landingPage,
|
|
@@ -10712,6 +10724,60 @@ function forwardDomain(revDomain) {
|
|
|
10712
10724
|
function isValidReleaseId(id) {
|
|
10713
10725
|
return RELEASE_ID_REGEX.test(id);
|
|
10714
10726
|
}
|
|
10727
|
+
function formatReleaseId(year, quarter) {
|
|
10728
|
+
return `cc-main-${year}-${quarter}`;
|
|
10729
|
+
}
|
|
10730
|
+
|
|
10731
|
+
// ../integration-commoncrawl/src/release-discovery.ts
|
|
10732
|
+
var QUARTERS = [
|
|
10733
|
+
"oct-nov-dec",
|
|
10734
|
+
"jul-aug-sep",
|
|
10735
|
+
"apr-may-jun",
|
|
10736
|
+
"jan-feb-mar"
|
|
10737
|
+
];
|
|
10738
|
+
function probeCandidates(now, maxBack) {
|
|
10739
|
+
const year = now.getUTCFullYear();
|
|
10740
|
+
const out = [];
|
|
10741
|
+
for (let y = year; y >= year - maxBack; y--) {
|
|
10742
|
+
for (const q of QUARTERS) {
|
|
10743
|
+
out.push({ year: y, quarter: q });
|
|
10744
|
+
}
|
|
10745
|
+
}
|
|
10746
|
+
return out;
|
|
10747
|
+
}
|
|
10748
|
+
async function probeRelease(release, fetchImpl = fetch) {
|
|
10749
|
+
const paths = ccReleasePaths(release);
|
|
10750
|
+
const [vertex, edges] = await Promise.all([
|
|
10751
|
+
fetchImpl(paths.vertexUrl, { method: "HEAD" }),
|
|
10752
|
+
fetchImpl(paths.edgesUrl, { method: "HEAD" })
|
|
10753
|
+
]);
|
|
10754
|
+
if (!vertex.ok || !edges.ok) return null;
|
|
10755
|
+
return {
|
|
10756
|
+
release,
|
|
10757
|
+
vertexUrl: paths.vertexUrl,
|
|
10758
|
+
edgesUrl: paths.edgesUrl,
|
|
10759
|
+
vertexBytes: parseContentLength(vertex.headers.get("content-length")),
|
|
10760
|
+
edgesBytes: parseContentLength(edges.headers.get("content-length")),
|
|
10761
|
+
lastModified: vertex.headers.get("last-modified")
|
|
10762
|
+
};
|
|
10763
|
+
}
|
|
10764
|
+
async function probeLatestRelease(opts = {}) {
|
|
10765
|
+
const now = opts.now ?? /* @__PURE__ */ new Date();
|
|
10766
|
+
const maxBack = opts.maxQuartersBack ?? 3;
|
|
10767
|
+
const fetchImpl = opts.fetchImpl ?? fetch;
|
|
10768
|
+
const candidates = probeCandidates(now, maxBack);
|
|
10769
|
+
for (const { year, quarter } of candidates) {
|
|
10770
|
+
const release = formatReleaseId(year, quarter);
|
|
10771
|
+
const result = await probeRelease(release, fetchImpl);
|
|
10772
|
+
if (result) return result;
|
|
10773
|
+
}
|
|
10774
|
+
return null;
|
|
10775
|
+
}
|
|
10776
|
+
function parseContentLength(value) {
|
|
10777
|
+
if (!value) return null;
|
|
10778
|
+
const n = Number.parseInt(value, 10);
|
|
10779
|
+
return Number.isFinite(n) ? n : null;
|
|
10780
|
+
}
|
|
10715
10781
|
|
|
10716
10782
|
// ../integration-commoncrawl/src/downloader.ts
|
|
10717
10783
|
import { createHash } from "crypto";
|
|
@@ -10739,7 +10805,7 @@ async function downloadFile(opts) {
|
|
|
10739
10805
|
if (!res.ok || !res.body) {
|
|
10740
10806
|
throw new Error(`HTTP ${res.status} ${res.statusText} for ${opts.url}`);
|
|
10741
10807
|
}
|
|
10742
|
-
const total =
|
|
10808
|
+
const total = parseContentLength2(res.headers.get("content-length"));
|
|
10743
10809
|
const hasher = createHash("sha256");
|
|
10744
10810
|
let bytes = 0;
|
|
10745
10811
|
const hashAndCount = new Transform({
|
|
@@ -10790,7 +10856,7 @@ async function unlinkIfExists(p) {
|
|
|
10790
10856
|
} catch {
|
|
10791
10857
|
}
|
|
10792
10858
|
}
|
|
10793
|
-
function
|
|
10859
|
+
function parseContentLength2(value) {
|
|
10794
10860
|
if (!value) return null;
|
|
10795
10861
|
const n = Number.parseInt(value, 10);
|
|
10796
10862
|
return Number.isFinite(n) ? n : null;
|
|
@@ -11093,8 +11159,22 @@ async function backlinksRoutes(app, opts) {
|
|
|
11093
11159
|
return reply.status(200).send(result);
|
|
11094
11160
|
});
|
|
11095
11161
|
app.post("/backlinks/syncs", async (request, reply) => {
|
|
11096
|
-
|
|
11097
|
-
if (!release
|
|
11162
|
+
let release = request.body?.release;
|
|
11163
|
+
if (!release) {
|
|
11164
|
+
if (!opts.discoverLatestRelease) {
|
|
11165
|
+
throw validationError(
|
|
11166
|
+
"No `release` provided and auto-discovery is unavailable on this deployment. Pass an explicit release id (e.g., cc-main-2026-jan-feb-mar)."
|
|
11167
|
+
);
|
|
11168
|
+
}
|
|
11169
|
+
const discovered = await opts.discoverLatestRelease();
|
|
11170
|
+
if (!discovered) {
|
|
11171
|
+
throw validationError(
|
|
11172
|
+
"Could not auto-discover the latest Common Crawl release. Pass an explicit `release` body parameter."
|
|
11173
|
+
);
|
|
11174
|
+
}
|
|
11175
|
+
release = discovered.release;
|
|
11176
|
+
}
|
|
11177
|
+
if (!isValidReleaseId(release)) {
|
|
11098
11178
|
throw validationError("Invalid release id. Expected form: cc-main-YYYY-{jan-feb-mar,apr-may-jun,jul-aug-sep,oct-nov-dec}");
|
|
11099
11179
|
}
|
|
11100
11180
|
if (!opts.getBacklinksStatus || !opts.onReleaseSyncRequested) {
|
|
@@ -11145,6 +11225,13 @@ async function backlinksRoutes(app, opts) {
|
|
|
11145
11225
|
const releases = opts.listCachedReleases?.() ?? [];
|
|
11146
11226
|
return reply.send(releases);
|
|
11147
11227
|
});
|
|
11228
|
+
app.get("/backlinks/latest-release", async (_request, reply) => {
|
|
11229
|
+
if (!opts.discoverLatestRelease) {
|
|
11230
|
+
throw missingDependency(BACKLINKS_UNSUPPORTED_MESSAGE);
|
|
11231
|
+
}
|
|
11232
|
+
const discovered = await opts.discoverLatestRelease();
|
|
11233
|
+
return reply.send(discovered);
|
|
11234
|
+
});
|
|
11148
11235
|
app.delete("/backlinks/cache/:release", async (request, reply) => {
|
|
11149
11236
|
const release = request.params.release;
|
|
11150
11237
|
if (!isValidReleaseId(release)) {
|
|
@@ -11941,7 +12028,8 @@ async function apiRoutes(app, opts) {
|
|
|
11941
12028
|
onReleaseSyncRequested: opts.onReleaseSyncRequested,
|
|
11942
12029
|
onBacklinkExtractRequested: opts.onBacklinkExtractRequested,
|
|
11943
12030
|
onBacklinksPruneCache: opts.onBacklinksPruneCache,
|
|
11944
|
-
listCachedReleases: opts.listCachedReleases
|
|
12031
|
+
listCachedReleases: opts.listCachedReleases,
|
|
12032
|
+
discoverLatestRelease: opts.discoverLatestRelease
|
|
11945
12033
|
});
|
|
11946
12034
|
await api.register(doctorRoutes, {
|
|
11947
12035
|
googleConnectionStore: opts.googleConnectionStore,
|
|
@@ -19112,6 +19200,28 @@ async function createServer(opts) {
|
|
|
19112
19200
|
}));
|
|
19113
19201
|
return reply.status(204).send();
|
|
19114
19202
|
});
|
|
19203
|
+
const LATEST_RELEASE_TTL_MS = 5 * 60 * 1e3;
|
|
19204
|
+
let latestReleaseCache = null;
|
|
19205
|
+
const discoverLatestRelease = async () => {
|
|
19206
|
+
const now = Date.now();
|
|
19207
|
+
if (latestReleaseCache && latestReleaseCache.expiresAt > now) {
|
|
19208
|
+
return latestReleaseCache.value;
|
|
19209
|
+
}
|
|
19210
|
+
const probed = await probeLatestRelease().catch((err) => {
|
|
19211
|
+
app.log.warn({ err }, "Common Crawl latest-release probe failed");
|
|
19212
|
+
return null;
|
|
19213
|
+
});
|
|
19214
|
+
const value = probed ? {
|
|
19215
|
+
release: probed.release,
|
|
19216
|
+
vertexUrl: probed.vertexUrl,
|
|
19217
|
+
edgesUrl: probed.edgesUrl,
|
|
19218
|
+
vertexBytes: probed.vertexBytes,
|
|
19219
|
+
edgesBytes: probed.edgesBytes,
|
|
19220
|
+
lastModified: probed.lastModified
|
|
19221
|
+
} : null;
|
|
19222
|
+
latestReleaseCache = { value, expiresAt: now + LATEST_RELEASE_TTL_MS };
|
|
19223
|
+
return value;
|
|
19224
|
+
};
|
|
19115
19225
|
await app.register(apiRoutes, {
|
|
19116
19226
|
db: opts.db,
|
|
19117
19227
|
routePrefix: apiPrefix,
|
|
@@ -19220,6 +19330,7 @@ async function createServer(opts) {
|
|
|
19220
19330
|
};
|
|
19221
19331
|
});
|
|
19222
19332
|
},
|
|
19333
|
+
discoverLatestRelease,
|
|
19223
19334
|
openApiInfo: {
|
|
19224
19335
|
title: "Canonry API",
|
|
19225
19336
|
version: PKG_VERSION,
|
|
@@ -310,6 +310,14 @@ var gaTrafficSnapshots = sqliteTable("ga_traffic_snapshots", {
|
|
|
310
310
|
projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
|
|
311
311
|
date: text("date").notNull(),
|
|
312
312
|
landingPage: text("landing_page").notNull(),
|
|
313
|
+
/**
|
|
314
|
+
* Canonicalized form of `landingPage` produced by `normalizeUrlPath()` in
|
|
315
|
+
* `@ainyc/canonry-contracts`. Nullable so existing rows survive migration;
|
|
316
|
+
* new GA4 sync writes populate it. Per-page aggregations should
|
|
317
|
+
* `GROUP BY COALESCE(landing_page_normalized, landing_page)` so
|
|
318
|
+
* partially-backfilled state still aggregates correctly.
|
|
319
|
+
*/
|
|
320
|
+
landingPageNormalized: text("landing_page_normalized"),
|
|
313
321
|
sessions: integer("sessions").notNull().default(0),
|
|
314
322
|
organicSessions: integer("organic_sessions").notNull().default(0),
|
|
315
323
|
users: integer("users").notNull().default(0),
|
|
@@ -318,6 +326,7 @@ var gaTrafficSnapshots = sqliteTable("ga_traffic_snapshots", {
|
|
|
318
326
|
}, (table) => [
|
|
319
327
|
index("idx_ga_traffic_project_date").on(table.projectId, table.date),
|
|
320
328
|
index("idx_ga_traffic_page").on(table.landingPage),
|
|
329
|
+
index("idx_ga_traffic_page_normalized").on(table.projectId, table.date, table.landingPageNormalized),
|
|
321
330
|
index("idx_ga_traffic_run").on(table.syncRunId)
|
|
322
331
|
]);
|
|
323
332
|
var gaAiReferrals = sqliteTable("ga_ai_referrals", {
|
|
@@ -1049,7 +1058,17 @@ var MIGRATIONS = [
|
|
|
1049
1058
|
WHEN discovery_date IS NOT NULL THEN 0
|
|
1050
1059
|
ELSE NULL
|
|
1051
1060
|
END
|
|
1052
|
-
WHERE created_at < '2026-04-22T00:00:00Z'
|
|
1061
|
+
WHERE created_at < '2026-04-22T00:00:00Z'`,
|
|
1062
|
+
// v44: Canonicalized landing-page column for ga_traffic_snapshots.
|
|
1063
|
+
// Populated by GA4 sync via normalizeUrlPath() in
|
|
1064
|
+
// @ainyc/canonry-contracts. Nullable; existing rows are filled in by
|
|
1065
|
+
// `canonry backfill normalized-paths`. Read queries should
|
|
1066
|
+
// `GROUP BY COALESCE(landing_page_normalized, landing_page)` so
|
|
1067
|
+
// partially-backfilled state still aggregates correctly.
|
|
1068
|
+
// See plans/ai-attribution-research.md "Step 1 — data hygiene".
|
|
1069
|
+
`ALTER TABLE ga_traffic_snapshots ADD COLUMN landing_page_normalized TEXT`,
|
|
1070
|
+
`CREATE INDEX IF NOT EXISTS idx_ga_traffic_page_normalized
|
|
1071
|
+
ON ga_traffic_snapshots(project_id, date, landing_page_normalized)`
|
|
1053
1072
|
];
|
|
1054
1073
|
function isDuplicateColumnError(err) {
|
|
1055
1074
|
if (!(err instanceof Error)) return false;
|
|
@@ -1790,6 +1790,97 @@ function summarizeCheckResults(results) {
|
|
|
1790
1790
|
return summary;
|
|
1791
1791
|
}
|
|
1792
1792
|
|
|
1793
|
+
// ../contracts/src/url-normalize.ts
|
|
1794
|
+
var STRIP_KEYS = /* @__PURE__ */ new Set([
|
|
1795
|
+
// Click identifiers
|
|
1796
|
+
"fbclid",
|
|
1797
|
+
"gclid",
|
|
1798
|
+
"msclkid",
|
|
1799
|
+
"ttclid",
|
|
1800
|
+
"li_fat_id",
|
|
1801
|
+
"igshid",
|
|
1802
|
+
"yclid",
|
|
1803
|
+
"dclid",
|
|
1804
|
+
"gbraid",
|
|
1805
|
+
"wbraid",
|
|
1806
|
+
// Mailchimp
|
|
1807
|
+
"mc_cid",
|
|
1808
|
+
"mc_eid",
|
|
1809
|
+
// Google Analytics linkers
|
|
1810
|
+
"_ga",
|
|
1811
|
+
"_gl",
|
|
1812
|
+
// Google Tag Manager debug
|
|
1813
|
+
"gtm_latency",
|
|
1814
|
+
"gtm_debug"
|
|
1815
|
+
]);
|
|
1816
|
+
function shouldStrip(key) {
|
|
1817
|
+
if (STRIP_KEYS.has(key)) return true;
|
|
1818
|
+
if (key.startsWith("utm_")) return true;
|
|
1819
|
+
return false;
|
|
1820
|
+
}
|
|
1821
|
+
function parseQuery(query) {
|
|
1822
|
+
if (query === "") return [];
|
|
1823
|
+
return query.split("&").map((pair) => {
|
|
1824
|
+
const eq = pair.indexOf("=");
|
|
1825
|
+
if (eq === -1) return { key: pair, value: null };
|
|
1826
|
+
return { key: pair.slice(0, eq), value: pair.slice(eq + 1) };
|
|
1827
|
+
});
|
|
1828
|
+
}
|
|
1829
|
+
function encodeQuery(pairs) {
|
|
1830
|
+
return pairs.map((p) => p.value === null ? p.key : `${p.key}=${p.value}`).join("&");
|
|
1831
|
+
}
|
|
1832
|
+
function collapseRootIndex(path2) {
|
|
1833
|
+
if (path2 === "/index.html" || path2 === "/index.php") return "/";
|
|
1834
|
+
return path2;
|
|
1835
|
+
}
|
|
1836
|
+
function dropTrailingSlash(path2) {
|
|
1837
|
+
if (path2.length > 1 && path2.endsWith("/")) {
|
|
1838
|
+
return path2.replace(/\/+$/, "");
|
|
1839
|
+
}
|
|
1840
|
+
return path2;
|
|
1841
|
+
}
|
|
1842
|
+
function normalizeUrlPath(input) {
|
|
1843
|
+
if (input == null) return null;
|
|
1844
|
+
const trimmed = input.trim();
|
|
1845
|
+
if (trimmed === "") return null;
|
|
1846
|
+
if (trimmed === "(not set)") return null;
|
|
1847
|
+
let pathPart;
|
|
1848
|
+
let queryPart;
|
|
1849
|
+
if (/^https?:\/\//i.test(trimmed)) {
|
|
1850
|
+
let url;
|
|
1851
|
+
try {
|
|
1852
|
+
url = new URL(trimmed);
|
|
1853
|
+
} catch {
|
|
1854
|
+
return null;
|
|
1855
|
+
}
|
|
1856
|
+
pathPart = url.pathname || "/";
|
|
1857
|
+
queryPart = url.search.startsWith("?") ? url.search.slice(1) : url.search;
|
|
1858
|
+
} else {
|
|
1859
|
+
let raw = trimmed;
|
|
1860
|
+
const hashIdx = raw.indexOf("#");
|
|
1861
|
+
if (hashIdx !== -1) raw = raw.slice(0, hashIdx);
|
|
1862
|
+
const qIdx = raw.indexOf("?");
|
|
1863
|
+
if (qIdx === -1) {
|
|
1864
|
+
pathPart = raw;
|
|
1865
|
+
queryPart = "";
|
|
1866
|
+
} else {
|
|
1867
|
+
pathPart = raw.slice(0, qIdx);
|
|
1868
|
+
queryPart = raw.slice(qIdx + 1);
|
|
1869
|
+
}
|
|
1870
|
+
}
|
|
1871
|
+
if (pathPart === "") pathPart = "/";
|
|
1872
|
+
pathPart = collapseRootIndex(pathPart);
|
|
1873
|
+
pathPart = dropTrailingSlash(pathPart);
|
|
1874
|
+
const pairs = parseQuery(queryPart).filter((p) => !shouldStrip(p.key));
|
|
1875
|
+
pairs.sort((a, b) => {
|
|
1876
|
+
if (a.key < b.key) return -1;
|
|
1877
|
+
if (a.key > b.key) return 1;
|
|
1878
|
+
return 0;
|
|
1879
|
+
});
|
|
1880
|
+
if (pairs.length === 0) return pathPart;
|
|
1881
|
+
return `${pathPart}?${encodeQuery(pairs)}`;
|
|
1882
|
+
}
|
|
1883
|
+
|
|
1793
1884
|
// src/client.ts
|
|
1794
1885
|
function createApiClient() {
|
|
1795
1886
|
const config = loadConfig();
|
|
@@ -2409,7 +2500,10 @@ var ApiClient = class {
|
|
|
2409
2500
|
return this.request("POST", "/backlinks/install");
|
|
2410
2501
|
}
|
|
2411
2502
|
async backlinksTriggerSync(release) {
|
|
2412
|
-
return this.request("POST", "/backlinks/syncs", { release });
|
|
2503
|
+
return this.request("POST", "/backlinks/syncs", release ? { release } : void 0);
|
|
2504
|
+
}
|
|
2505
|
+
async backlinksLatestRelease() {
|
|
2506
|
+
return this.request("GET", "/backlinks/latest-release");
|
|
2413
2507
|
}
|
|
2414
2508
|
async backlinksLatestSync() {
|
|
2415
2509
|
return this.request("GET", "/backlinks/syncs/latest");
|
|
@@ -2519,6 +2613,7 @@ export {
|
|
|
2519
2613
|
CheckScopes,
|
|
2520
2614
|
CheckCategories,
|
|
2521
2615
|
summarizeCheckResults,
|
|
2616
|
+
normalizeUrlPath,
|
|
2522
2617
|
createApiClient,
|
|
2523
2618
|
ApiClient
|
|
2524
2619
|
};
|
package/dist/cli.js
CHANGED
|
@@ -17,7 +17,7 @@ import {
|
|
|
17
17
|
setGoogleAuthConfig,
|
|
18
18
|
showFirstRunNotice,
|
|
19
19
|
trackEvent
|
|
20
|
-
} from "./chunk-
|
|
20
|
+
} from "./chunk-3T64Y7GR.js";
|
|
21
21
|
import {
|
|
22
22
|
CcReleaseSyncStatuses,
|
|
23
23
|
CheckScopes,
|
|
@@ -37,6 +37,7 @@ import {
|
|
|
37
37
|
getConfigPath,
|
|
38
38
|
isEndpointMissing,
|
|
39
39
|
loadConfig,
|
|
40
|
+
normalizeUrlPath,
|
|
40
41
|
notificationEventSchema,
|
|
41
42
|
printCliError,
|
|
42
43
|
providerQuotaPolicySchema,
|
|
@@ -44,17 +45,18 @@ import {
|
|
|
44
45
|
saveConfig,
|
|
45
46
|
saveConfigPatch,
|
|
46
47
|
usageError
|
|
47
|
-
} from "./chunk-
|
|
48
|
+
} from "./chunk-QTS7VZXN.js";
|
|
48
49
|
import {
|
|
49
50
|
apiKeys,
|
|
50
51
|
competitors,
|
|
51
52
|
createClient,
|
|
53
|
+
gaTrafficSnapshots,
|
|
52
54
|
migrate,
|
|
53
55
|
parseJsonColumn,
|
|
54
56
|
projects,
|
|
55
57
|
querySnapshots,
|
|
56
58
|
runs
|
|
57
|
-
} from "./chunk-
|
|
59
|
+
} from "./chunk-FV6PY5UE.js";
|
|
58
60
|
import "./chunk-MLKGABMK.js";
|
|
59
61
|
|
|
60
62
|
// src/cli.ts
|
|
@@ -160,7 +162,7 @@ Usage: ${spec.usage}`, {
|
|
|
160
162
|
}
|
|
161
163
|
|
|
162
164
|
// src/commands/backfill.ts
|
|
163
|
-
import { and, eq, inArray } from "drizzle-orm";
|
|
165
|
+
import { and, eq, inArray, isNull } from "drizzle-orm";
|
|
164
166
|
var SNAPSHOT_BATCH_SIZE = 500;
|
|
165
167
|
async function backfillAnswerVisibilityCommand(opts) {
|
|
166
168
|
const config = loadConfig();
|
|
@@ -301,8 +303,75 @@ async function backfillAnswerVisibilityCommand(opts) {
|
|
|
301
303
|
console.log(` Reparsed: ${reparsed}`);
|
|
302
304
|
console.log(` Errors: ${providerErrors}`);
|
|
303
305
|
}
|
|
306
|
+
function backfillNormalizedPaths(db, opts) {
|
|
307
|
+
const baseConditions = [isNull(gaTrafficSnapshots.landingPageNormalized)];
|
|
308
|
+
if (opts?.projectId) {
|
|
309
|
+
baseConditions.push(eq(gaTrafficSnapshots.projectId, opts.projectId));
|
|
310
|
+
}
|
|
311
|
+
const rows = db.select({
|
|
312
|
+
id: gaTrafficSnapshots.id,
|
|
313
|
+
landingPage: gaTrafficSnapshots.landingPage
|
|
314
|
+
}).from(gaTrafficSnapshots).where(and(...baseConditions)).all();
|
|
315
|
+
let updated = 0;
|
|
316
|
+
let unchanged = 0;
|
|
317
|
+
if (rows.length > 0) {
|
|
318
|
+
db.transaction((tx) => {
|
|
319
|
+
for (const row of rows) {
|
|
320
|
+
const next = normalizeUrlPath(row.landingPage);
|
|
321
|
+
if (next === null) {
|
|
322
|
+
unchanged++;
|
|
323
|
+
continue;
|
|
324
|
+
}
|
|
325
|
+
tx.update(gaTrafficSnapshots).set({ landingPageNormalized: next }).where(eq(gaTrafficSnapshots.id, row.id)).run();
|
|
326
|
+
updated++;
|
|
327
|
+
}
|
|
328
|
+
});
|
|
329
|
+
}
|
|
330
|
+
return { examined: rows.length, updated, unchanged };
|
|
331
|
+
}
|
|
332
|
+
async function backfillNormalizedPathsCommand(opts) {
|
|
333
|
+
const config = loadConfig();
|
|
334
|
+
const db = createClient(config.database);
|
|
335
|
+
migrate(db);
|
|
336
|
+
const projectFilter = opts?.project?.trim();
|
|
337
|
+
let projectId;
|
|
338
|
+
if (projectFilter) {
|
|
339
|
+
const project = db.select({ id: projects.id }).from(projects).where(eq(projects.name, projectFilter)).get();
|
|
340
|
+
if (!project) {
|
|
341
|
+
const result2 = {
|
|
342
|
+
project: projectFilter,
|
|
343
|
+
examined: 0,
|
|
344
|
+
updated: 0,
|
|
345
|
+
unchanged: 0
|
|
346
|
+
};
|
|
347
|
+
if (opts?.format === "json") {
|
|
348
|
+
console.log(JSON.stringify(result2, null, 2));
|
|
349
|
+
return;
|
|
350
|
+
}
|
|
351
|
+
console.log(`Backfill normalized-paths: project "${projectFilter}" not found.`);
|
|
352
|
+
return;
|
|
353
|
+
}
|
|
354
|
+
projectId = project.id;
|
|
355
|
+
}
|
|
356
|
+
const { examined, updated, unchanged } = backfillNormalizedPaths(db, { projectId });
|
|
357
|
+
const result = {
|
|
358
|
+
project: projectFilter ?? null,
|
|
359
|
+
examined,
|
|
360
|
+
updated,
|
|
361
|
+
unchanged
|
|
362
|
+
};
|
|
363
|
+
if (opts?.format === "json") {
|
|
364
|
+
console.log(JSON.stringify(result, null, 2));
|
|
365
|
+
return;
|
|
366
|
+
}
|
|
367
|
+
console.log("Normalized-path backfill complete.\n");
|
|
368
|
+
if (projectFilter) console.log(` Project: ${projectFilter}`);
|
|
369
|
+
console.log(` Examined: ${examined}`);
|
|
370
|
+
console.log(` Updated: ${updated}`);
|
|
371
|
+
console.log(` Unchanged: ${unchanged}`);
|
|
372
|
+
}
|
|
304
373
|
async function backfillInsightsCommand(project, opts) {
|
|
305
|
-
const { IntelligenceService } = await import("./intelligence-service-
|
|
374
|
+
const { IntelligenceService } = await import("./intelligence-service-AEI46KC5.js");
|
|
306
375
|
const config = loadConfig();
|
|
307
376
|
const db = createClient(config.database);
|
|
308
377
|
migrate(db);
|
|
@@ -498,14 +567,28 @@ var BACKFILL_CLI_COMMANDS = [
|
|
|
498
567
|
});
|
|
499
568
|
}
|
|
500
569
|
},
|
|
570
|
+
{
|
|
571
|
+
path: ["backfill", "normalized-paths"],
|
|
572
|
+
usage: "canonry backfill normalized-paths [--project <name>] [--format json]",
|
|
573
|
+
options: {
|
|
574
|
+
project: stringOption()
|
|
575
|
+
},
|
|
576
|
+
allowPositionals: false,
|
|
577
|
+
run: async (input) => {
|
|
578
|
+
await backfillNormalizedPathsCommand({
|
|
579
|
+
project: getString(input.values, "project"),
|
|
580
|
+
format: input.format
|
|
581
|
+
});
|
|
582
|
+
}
|
|
583
|
+
},
|
|
501
584
|
{
|
|
502
585
|
path: ["backfill"],
|
|
503
|
-
usage: "canonry backfill <answer-visibility|insights> [options]",
|
|
586
|
+
usage: "canonry backfill <answer-visibility|insights|normalized-paths> [options]",
|
|
504
587
|
run: async (input) => {
|
|
505
588
|
unknownSubcommand(input.positionals[0], {
|
|
506
589
|
command: "backfill",
|
|
507
|
-
usage: "canonry backfill <answer-visibility|insights> [options]",
|
|
508
|
-
available: ["answer-visibility", "insights"]
|
|
590
|
+
usage: "canonry backfill <answer-visibility|insights|normalized-paths> [options]",
|
|
591
|
+
available: ["answer-visibility", "insights", "normalized-paths"]
|
|
509
592
|
});
|
|
510
593
|
}
|
|
511
594
|
}
|
|
@@ -638,8 +721,38 @@ async function backlinksSync(opts) {
|
|
|
638
721
|
return;
|
|
639
722
|
}
|
|
640
723
|
if (opts.wait) process.stderr.write("\n");
|
|
724
|
+
if (!opts.release) {
|
|
725
|
+
process.stderr.write(`Auto-discovered release: ${sync.release}
|
|
726
|
+
`);
|
|
727
|
+
}
|
|
641
728
|
console.log(formatSync(final));
|
|
642
729
|
}
|
|
730
|
+
function formatBytesShort(n) {
|
|
731
|
+
if (n === null) return "\u2014";
|
|
732
|
+
if (n >= 1e9) return `${(n / 1e9).toFixed(1)} GB`;
|
|
733
|
+
if (n >= 1e6) return `${(n / 1e6).toFixed(1)} MB`;
|
|
734
|
+
if (n >= 1e3) return `${(n / 1e3).toFixed(1)} KB`;
|
|
735
|
+
return `${n} B`;
|
|
736
|
+
}
|
|
737
|
+
function formatLatestRelease(result) {
|
|
738
|
+
if (!result) return "No release discovered (Common Crawl probe returned no candidates).";
|
|
739
|
+
const lines = [];
|
|
740
|
+
lines.push(`Release: ${result.release}`);
|
|
741
|
+
lines.push(`Vertex: ${result.vertexUrl}`);
|
|
742
|
+
lines.push(` ${formatBytesShort(result.vertexBytes)}`);
|
|
743
|
+
lines.push(`Edges: ${result.edgesUrl}`);
|
|
744
|
+
lines.push(` ${formatBytesShort(result.edgesBytes)}`);
|
|
745
|
+
if (result.lastModified) lines.push(`Last modified: ${result.lastModified}`);
|
|
746
|
+
return lines.join("\n");
|
|
747
|
+
}
|
|
748
|
+
async function backlinksLatestRelease(opts = {}) {
|
|
749
|
+
const result = await getClient().backlinksLatestRelease();
|
|
750
|
+
if (opts.format === "json") {
|
|
751
|
+
printJson(result);
|
|
752
|
+
return;
|
|
753
|
+
}
|
|
754
|
+
console.log(formatLatestRelease(result));
|
|
755
|
+
}
|
|
643
756
|
async function backlinksStatus(opts = {}) {
|
|
644
757
|
const sync = await getClient().backlinksLatestSync();
|
|
645
758
|
if (opts.format === "json") {
|
|
@@ -715,19 +828,14 @@ var BACKLINKS_CLI_COMMANDS = [
|
|
|
715
828
|
},
|
|
716
829
|
{
|
|
717
830
|
path: ["backlinks", "sync"],
|
|
718
|
-
usage: "canonry backlinks sync --release <id> [--wait] [--format json]",
|
|
831
|
+
usage: "canonry backlinks sync [--release <id>] [--wait] [--format json]",
|
|
719
832
|
options: {
|
|
720
833
|
release: stringOption(),
|
|
721
834
|
wait: { type: "boolean" }
|
|
722
835
|
},
|
|
723
836
|
run: async (input) => {
|
|
724
|
-
const release = requireStringOption(input, "release", {
|
|
725
|
-
message: "--release is required",
|
|
726
|
-
usage: "canonry backlinks sync --release <id> [--wait]",
|
|
727
|
-
command: "backlinks sync"
|
|
728
|
-
});
|
|
729
837
|
await backlinksSync({
|
|
730
|
-
release,
|
|
838
|
+
release: getString(input.values, "release"),
|
|
731
839
|
wait: getBoolean(input.values, "wait"),
|
|
732
840
|
format: input.format
|
|
733
841
|
});
|
|
@@ -775,6 +883,14 @@ var BACKLINKS_CLI_COMMANDS = [
|
|
|
775
883
|
await backlinksReleases({ format: input.format });
|
|
776
884
|
}
|
|
777
885
|
},
|
|
886
|
+
{
|
|
887
|
+
path: ["backlinks", "releases", "latest"],
|
|
888
|
+
usage: "canonry backlinks releases latest [--format json]",
|
|
889
|
+
options: {},
|
|
890
|
+
run: async (input) => {
|
|
891
|
+
await backlinksLatestRelease({ format: input.format });
|
|
892
|
+
}
|
|
893
|
+
},
|
|
778
894
|
{
|
|
779
895
|
path: ["backlinks", "extract"],
|
|
780
896
|
usage: "canonry backlinks extract <project> [--release <id>] [--wait] [--format json]",
|
|
@@ -7039,6 +7155,18 @@ async function serveCommand(format = "text") {
|
|
|
7039
7155
|
config.port = port;
|
|
7040
7156
|
const db = createClient(config.database);
|
|
7041
7157
|
migrate(db);
|
|
7158
|
+
try {
|
|
7159
|
+
const result = backfillNormalizedPaths(db);
|
|
7160
|
+
if (result.updated > 0 && format === "text") {
|
|
7161
|
+
console.log(
|
|
7162
|
+
`Migrated ${result.updated} GA landing-page row${result.updated === 1 ? "" : "s"} to canonical form.`
|
|
7163
|
+
);
|
|
7164
|
+
}
|
|
7165
|
+
} catch (err) {
|
|
7166
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
7167
|
+
process.stderr.write(`warning: normalized-path backfill skipped: ${msg}
|
|
7168
|
+
`);
|
|
7169
|
+
}
|
|
7042
7170
|
const app = await createServer({ config, db });
|
|
7043
7171
|
let shuttingDown = false;
|
|
7044
7172
|
const shutdown = (signal) => {
|
package/dist/index.js
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import {
|
|
2
2
|
createServer
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-3T64Y7GR.js";
|
|
4
4
|
import {
|
|
5
5
|
loadConfig
|
|
6
|
-
} from "./chunk-
|
|
7
|
-
import "./chunk-
|
|
6
|
+
} from "./chunk-QTS7VZXN.js";
|
|
7
|
+
import "./chunk-FV6PY5UE.js";
|
|
8
8
|
import "./chunk-MLKGABMK.js";
|
|
9
9
|
export {
|
|
10
10
|
createServer,
|