@ainyc/canonry 2.13.2 → 2.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/assets/index.html CHANGED
@@ -12,8 +12,8 @@
12
12
  <link rel="icon" type="image/png" sizes="32x32" href="./favicon-32.png" />
13
13
  <link rel="apple-touch-icon" href="./apple-touch-icon.png" />
14
14
  <title>Canonry</title>
15
- <script type="module" crossorigin src="./assets/index-CKzK0os-.js"></script>
16
- <link rel="stylesheet" crossorigin href="./assets/index-CAewPdsZ.css">
15
+ <script type="module" crossorigin src="./assets/index-BwFUCV6e.js"></script>
16
+ <link rel="stylesheet" crossorigin href="./assets/index-U2SLimrz.css">
17
17
  </head>
18
18
  <body>
19
19
  <div id="root"></div>
@@ -39,6 +39,7 @@ import {
39
39
  locationContextSchema,
40
40
  missingDependency,
41
41
  normalizeProjectDomain,
42
+ normalizeUrlPath,
42
43
  notFound,
43
44
  notImplemented,
44
45
  parseRunError,
@@ -59,7 +60,7 @@ import {
59
60
  visibilityStateFromAnswerMentioned,
60
61
  windowCutoff,
61
62
  wordpressEnvSchema
62
- } from "./chunk-XJS7NALL.js";
63
+ } from "./chunk-QTS7VZXN.js";
63
64
  import {
64
65
  IntelligenceService,
65
66
  agentMemory,
@@ -97,7 +98,7 @@ import {
97
98
  runs,
98
99
  schedules,
99
100
  usageCounters
100
- } from "./chunk-UM6RDSRJ.js";
101
+ } from "./chunk-FV6PY5UE.js";
101
102
 
102
103
  // src/telemetry.ts
103
104
  import crypto from "crypto";
@@ -5371,15 +5372,14 @@ var routeCatalog = [
5371
5372
  method: "post",
5372
5373
  path: "/api/v1/backlinks/syncs",
5373
5374
  summary: "Queue a workspace-wide Common Crawl release sync",
5374
- description: "Creates a `cc_release_syncs` row and fires the sync callback. Idempotent: an existing in-flight row for the same release is returned.",
5375
+ description: "Creates a `cc_release_syncs` row and fires the sync callback. Idempotent: an existing in-flight row for the same release is returned. When `release` is omitted, the server auto-discovers the latest available Common Crawl release.",
5375
5376
  tags: ["backlinks"],
5376
5377
  requestBody: {
5377
- required: true,
5378
+ required: false,
5378
5379
  content: {
5379
5380
  "application/json": {
5380
5381
  schema: {
5381
5382
  type: "object",
5382
- required: ["release"],
5383
5383
  properties: {
5384
5384
  release: stringSchema
5385
5385
  }
@@ -5422,6 +5422,17 @@ var routeCatalog = [
5422
5422
  200: { description: "Cached release metadata returned." }
5423
5423
  }
5424
5424
  },
5425
+ {
5426
+ method: "get",
5427
+ path: "/api/v1/backlinks/latest-release",
5428
+ summary: "Auto-discover the latest available Common Crawl hyperlinkgraph release",
5429
+ description: "Probes Common Crawl by HEAD-checking quarterly release slugs and returns the newest one published. The local server caches the result for ~5 minutes so repeated calls do not hammer Common Crawl.",
5430
+ tags: ["backlinks"],
5431
+ responses: {
5432
+ 200: { description: "Latest available release, or null when no candidate slug responded." },
5433
+ 422: { description: "Backlinks feature is not available on this deployment." }
5434
+ }
5435
+ },
5425
5436
  {
5426
5437
  method: "delete",
5427
5438
  path: "/api/v1/backlinks/cache/{release}",
@@ -8651,6 +8662,7 @@ async function ga4Routes(app, opts) {
8651
8662
  projectId: project.id,
8652
8663
  date: row.date,
8653
8664
  landingPage: row.landingPage,
8665
+ landingPageNormalized: normalizeUrlPath(row.landingPage),
8654
8666
  sessions: row.sessions,
8655
8667
  organicSessions: row.organicSessions,
8656
8668
  users: row.users,
@@ -8775,11 +8787,11 @@ async function ga4Routes(app, opts) {
8775
8787
  periodEnd: gaTrafficSummaries.periodEnd
8776
8788
  }).from(gaTrafficSummaries).where(eq19(gaTrafficSummaries.projectId, project.id)).get();
8777
8789
  const rows = app.db.select({
8778
- landingPage: gaTrafficSnapshots.landingPage,
8790
+ landingPage: sql5`COALESCE(${gaTrafficSnapshots.landingPageNormalized}, ${gaTrafficSnapshots.landingPage})`,
8779
8791
  sessions: sql5`SUM(${gaTrafficSnapshots.sessions})`,
8780
8792
  organicSessions: sql5`SUM(${gaTrafficSnapshots.organicSessions})`,
8781
8793
  users: sql5`SUM(${gaTrafficSnapshots.users})`
8782
- }).from(gaTrafficSnapshots).where(and8(...snapshotConditions)).groupBy(gaTrafficSnapshots.landingPage).orderBy(sql5`SUM(${gaTrafficSnapshots.sessions}) DESC`).limit(limit).all();
8794
+ }).from(gaTrafficSnapshots).where(and8(...snapshotConditions)).groupBy(sql5`COALESCE(${gaTrafficSnapshots.landingPageNormalized}, ${gaTrafficSnapshots.landingPage})`).orderBy(sql5`SUM(${gaTrafficSnapshots.sessions}) DESC`).limit(limit).all();
8783
8795
  const aiReferrals = app.db.select({
8784
8796
  source: gaAiReferrals.source,
8785
8797
  medium: gaAiReferrals.medium,
@@ -9036,11 +9048,11 @@ async function ga4Routes(app, opts) {
9036
9048
  const project = resolveProject(app.db, request.params.name);
9037
9049
  requireGa4Connection(opts, project.name, project.canonicalDomain);
9038
9050
  const trafficPages = app.db.select({
9039
- landingPage: gaTrafficSnapshots.landingPage,
9051
+ landingPage: sql5`COALESCE(${gaTrafficSnapshots.landingPageNormalized}, ${gaTrafficSnapshots.landingPage})`,
9040
9052
  sessions: sql5`SUM(${gaTrafficSnapshots.sessions})`,
9041
9053
  organicSessions: sql5`SUM(${gaTrafficSnapshots.organicSessions})`,
9042
9054
  users: sql5`SUM(${gaTrafficSnapshots.users})`
9043
- }).from(gaTrafficSnapshots).where(eq19(gaTrafficSnapshots.projectId, project.id)).groupBy(gaTrafficSnapshots.landingPage).orderBy(sql5`SUM(${gaTrafficSnapshots.sessions}) DESC`).all();
9055
+ }).from(gaTrafficSnapshots).where(eq19(gaTrafficSnapshots.projectId, project.id)).groupBy(sql5`COALESCE(${gaTrafficSnapshots.landingPageNormalized}, ${gaTrafficSnapshots.landingPage})`).orderBy(sql5`SUM(${gaTrafficSnapshots.sessions}) DESC`).all();
9044
9056
  return {
9045
9057
  pages: trafficPages.map((r) => ({
9046
9058
  landingPage: r.landingPage,
@@ -10712,6 +10724,60 @@ function forwardDomain(revDomain) {
10712
10724
  function isValidReleaseId(id) {
10713
10725
  return RELEASE_ID_REGEX.test(id);
10714
10726
  }
10727
+ function formatReleaseId(year, quarter) {
10728
+ return `cc-main-${year}-${quarter}`;
10729
+ }
10730
+
10731
+ // ../integration-commoncrawl/src/release-discovery.ts
10732
+ var QUARTERS = [
10733
+ "oct-nov-dec",
10734
+ "jul-aug-sep",
10735
+ "apr-may-jun",
10736
+ "jan-feb-mar"
10737
+ ];
10738
+ function probeCandidates(now, maxBack) {
10739
+ const year = now.getUTCFullYear();
10740
+ const out = [];
10741
+ for (let y = year; y >= year - maxBack; y--) {
10742
+ for (const q of QUARTERS) {
10743
+ out.push({ year: y, quarter: q });
10744
+ }
10745
+ }
10746
+ return out;
10747
+ }
10748
+ async function probeRelease(release, fetchImpl = fetch) {
10749
+ const paths = ccReleasePaths(release);
10750
+ const [vertex, edges] = await Promise.all([
10751
+ fetchImpl(paths.vertexUrl, { method: "HEAD" }),
10752
+ fetchImpl(paths.edgesUrl, { method: "HEAD" })
10753
+ ]);
10754
+ if (!vertex.ok || !edges.ok) return null;
10755
+ return {
10756
+ release,
10757
+ vertexUrl: paths.vertexUrl,
10758
+ edgesUrl: paths.edgesUrl,
10759
+ vertexBytes: parseContentLength(vertex.headers.get("content-length")),
10760
+ edgesBytes: parseContentLength(edges.headers.get("content-length")),
10761
+ lastModified: vertex.headers.get("last-modified")
10762
+ };
10763
+ }
10764
+ async function probeLatestRelease(opts = {}) {
10765
+ const now = opts.now ?? /* @__PURE__ */ new Date();
10766
+ const maxBack = opts.maxQuartersBack ?? 3;
10767
+ const fetchImpl = opts.fetchImpl ?? fetch;
10768
+ const candidates = probeCandidates(now, maxBack);
10769
+ for (const { year, quarter } of candidates) {
10770
+ const release = formatReleaseId(year, quarter);
10771
+ const result = await probeRelease(release, fetchImpl);
10772
+ if (result) return result;
10773
+ }
10774
+ return null;
10775
+ }
10776
+ function parseContentLength(value) {
10777
+ if (!value) return null;
10778
+ const n = Number.parseInt(value, 10);
10779
+ return Number.isFinite(n) ? n : null;
10780
+ }
10715
10781
 
10716
10782
  // ../integration-commoncrawl/src/downloader.ts
10717
10783
  import { createHash } from "crypto";
@@ -10739,7 +10805,7 @@ async function downloadFile(opts) {
10739
10805
  if (!res.ok || !res.body) {
10740
10806
  throw new Error(`HTTP ${res.status} ${res.statusText} for ${opts.url}`);
10741
10807
  }
10742
- const total = parseContentLength(res.headers.get("content-length"));
10808
+ const total = parseContentLength2(res.headers.get("content-length"));
10743
10809
  const hasher = createHash("sha256");
10744
10810
  let bytes = 0;
10745
10811
  const hashAndCount = new Transform({
@@ -10790,7 +10856,7 @@ async function unlinkIfExists(p) {
10790
10856
  } catch {
10791
10857
  }
10792
10858
  }
10793
- function parseContentLength(value) {
10859
+ function parseContentLength2(value) {
10794
10860
  if (!value) return null;
10795
10861
  const n = Number.parseInt(value, 10);
10796
10862
  return Number.isFinite(n) ? n : null;
@@ -11093,8 +11159,22 @@ async function backlinksRoutes(app, opts) {
11093
11159
  return reply.status(200).send(result);
11094
11160
  });
11095
11161
  app.post("/backlinks/syncs", async (request, reply) => {
11096
- const release = request.body?.release;
11097
- if (!release || !isValidReleaseId(release)) {
11162
+ let release = request.body?.release;
11163
+ if (!release) {
11164
+ if (!opts.discoverLatestRelease) {
11165
+ throw validationError(
11166
+ "No `release` provided and auto-discovery is unavailable on this deployment. Pass an explicit release id (e.g., cc-main-2026-jan-feb-mar)."
11167
+ );
11168
+ }
11169
+ const discovered = await opts.discoverLatestRelease();
11170
+ if (!discovered) {
11171
+ throw validationError(
11172
+ "Could not auto-discover the latest Common Crawl release. Pass an explicit `release` body parameter."
11173
+ );
11174
+ }
11175
+ release = discovered.release;
11176
+ }
11177
+ if (!isValidReleaseId(release)) {
11098
11178
  throw validationError("Invalid release id. Expected form: cc-main-YYYY-{jan-feb-mar,apr-may-jun,jul-aug-sep,oct-nov-dec}");
11099
11179
  }
11100
11180
  if (!opts.getBacklinksStatus || !opts.onReleaseSyncRequested) {
@@ -11145,6 +11225,13 @@ async function backlinksRoutes(app, opts) {
11145
11225
  const releases = opts.listCachedReleases?.() ?? [];
11146
11226
  return reply.send(releases);
11147
11227
  });
11228
+ app.get("/backlinks/latest-release", async (_request, reply) => {
11229
+ if (!opts.discoverLatestRelease) {
11230
+ throw missingDependency(BACKLINKS_UNSUPPORTED_MESSAGE);
11231
+ }
11232
+ const discovered = await opts.discoverLatestRelease();
11233
+ return reply.send(discovered);
11234
+ });
11148
11235
  app.delete("/backlinks/cache/:release", async (request, reply) => {
11149
11236
  const release = request.params.release;
11150
11237
  if (!isValidReleaseId(release)) {
@@ -11941,7 +12028,8 @@ async function apiRoutes(app, opts) {
11941
12028
  onReleaseSyncRequested: opts.onReleaseSyncRequested,
11942
12029
  onBacklinkExtractRequested: opts.onBacklinkExtractRequested,
11943
12030
  onBacklinksPruneCache: opts.onBacklinksPruneCache,
11944
- listCachedReleases: opts.listCachedReleases
12031
+ listCachedReleases: opts.listCachedReleases,
12032
+ discoverLatestRelease: opts.discoverLatestRelease
11945
12033
  });
11946
12034
  await api.register(doctorRoutes, {
11947
12035
  googleConnectionStore: opts.googleConnectionStore,
@@ -19112,6 +19200,28 @@ async function createServer(opts) {
19112
19200
  }));
19113
19201
  return reply.status(204).send();
19114
19202
  });
19203
+ const LATEST_RELEASE_TTL_MS = 5 * 60 * 1e3;
19204
+ let latestReleaseCache = null;
19205
+ const discoverLatestRelease = async () => {
19206
+ const now = Date.now();
19207
+ if (latestReleaseCache && latestReleaseCache.expiresAt > now) {
19208
+ return latestReleaseCache.value;
19209
+ }
19210
+ const probed = await probeLatestRelease().catch((err) => {
19211
+ app.log.warn({ err }, "Common Crawl latest-release probe failed");
19212
+ return null;
19213
+ });
19214
+ const value = probed ? {
19215
+ release: probed.release,
19216
+ vertexUrl: probed.vertexUrl,
19217
+ edgesUrl: probed.edgesUrl,
19218
+ vertexBytes: probed.vertexBytes,
19219
+ edgesBytes: probed.edgesBytes,
19220
+ lastModified: probed.lastModified
19221
+ } : null;
19222
+ latestReleaseCache = { value, expiresAt: now + LATEST_RELEASE_TTL_MS };
19223
+ return value;
19224
+ };
19115
19225
  await app.register(apiRoutes, {
19116
19226
  db: opts.db,
19117
19227
  routePrefix: apiPrefix,
@@ -19220,6 +19330,7 @@ async function createServer(opts) {
19220
19330
  };
19221
19331
  });
19222
19332
  },
19333
+ discoverLatestRelease,
19223
19334
  openApiInfo: {
19224
19335
  title: "Canonry API",
19225
19336
  version: PKG_VERSION,
@@ -310,6 +310,14 @@ var gaTrafficSnapshots = sqliteTable("ga_traffic_snapshots", {
310
310
  projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
311
311
  date: text("date").notNull(),
312
312
  landingPage: text("landing_page").notNull(),
313
+ /**
314
+ * Canonicalized form of `landingPage` produced by `normalizeUrlPath()` in
315
+ * `@ainyc/canonry-contracts`. Nullable so existing rows survive migration;
316
+ * new GA4 sync writes populate it. Per-page aggregations should
317
+ * `GROUP BY COALESCE(landing_page_normalized, landing_page)` so
318
+ * partially-backfilled state still aggregates correctly.
319
+ */
320
+ landingPageNormalized: text("landing_page_normalized"),
313
321
  sessions: integer("sessions").notNull().default(0),
314
322
  organicSessions: integer("organic_sessions").notNull().default(0),
315
323
  users: integer("users").notNull().default(0),
@@ -318,6 +326,7 @@ var gaTrafficSnapshots = sqliteTable("ga_traffic_snapshots", {
318
326
  }, (table) => [
319
327
  index("idx_ga_traffic_project_date").on(table.projectId, table.date),
320
328
  index("idx_ga_traffic_page").on(table.landingPage),
329
+ index("idx_ga_traffic_page_normalized").on(table.projectId, table.date, table.landingPageNormalized),
321
330
  index("idx_ga_traffic_run").on(table.syncRunId)
322
331
  ]);
323
332
  var gaAiReferrals = sqliteTable("ga_ai_referrals", {
@@ -1049,7 +1058,17 @@ var MIGRATIONS = [
1049
1058
  WHEN discovery_date IS NOT NULL THEN 0
1050
1059
  ELSE NULL
1051
1060
  END
1052
- WHERE created_at < '2026-04-22T00:00:00Z'`
1061
+ WHERE created_at < '2026-04-22T00:00:00Z'`,
1062
+ // v44: Canonicalized landing-page column for ga_traffic_snapshots.
1063
+ // Populated by GA4 sync via normalizeUrlPath() in
1064
+ // @ainyc/canonry-contracts. Nullable; existing rows are filled in by
1065
+ // `canonry backfill normalized-paths`. Read queries should
1066
+ // `GROUP BY COALESCE(landing_page_normalized, landing_page)` so
1067
+ // partially-backfilled state still aggregates correctly.
1068
+ // See plans/ai-attribution-research.md "Step 1 — data hygiene".
1069
+ `ALTER TABLE ga_traffic_snapshots ADD COLUMN landing_page_normalized TEXT`,
1070
+ `CREATE INDEX IF NOT EXISTS idx_ga_traffic_page_normalized
1071
+ ON ga_traffic_snapshots(project_id, date, landing_page_normalized)`
1053
1072
  ];
1054
1073
  function isDuplicateColumnError(err) {
1055
1074
  if (!(err instanceof Error)) return false;
@@ -1790,6 +1790,97 @@ function summarizeCheckResults(results) {
1790
1790
  return summary;
1791
1791
  }
1792
1792
 
1793
+ // ../contracts/src/url-normalize.ts
1794
+ var STRIP_KEYS = /* @__PURE__ */ new Set([
1795
+ // Click identifiers
1796
+ "fbclid",
1797
+ "gclid",
1798
+ "msclkid",
1799
+ "ttclid",
1800
+ "li_fat_id",
1801
+ "igshid",
1802
+ "yclid",
1803
+ "dclid",
1804
+ "gbraid",
1805
+ "wbraid",
1806
+ // Mailchimp
1807
+ "mc_cid",
1808
+ "mc_eid",
1809
+ // Google Analytics linkers
1810
+ "_ga",
1811
+ "_gl",
1812
+ // Google Tag Manager debug
1813
+ "gtm_latency",
1814
+ "gtm_debug"
1815
+ ]);
1816
+ function shouldStrip(key) {
1817
+ if (STRIP_KEYS.has(key)) return true;
1818
+ if (key.startsWith("utm_")) return true;
1819
+ return false;
1820
+ }
1821
+ function parseQuery(query) {
1822
+ if (query === "") return [];
1823
+ return query.split("&").map((pair) => {
1824
+ const eq = pair.indexOf("=");
1825
+ if (eq === -1) return { key: pair, value: null };
1826
+ return { key: pair.slice(0, eq), value: pair.slice(eq + 1) };
1827
+ });
1828
+ }
1829
+ function encodeQuery(pairs) {
1830
+ return pairs.map((p) => p.value === null ? p.key : `${p.key}=${p.value}`).join("&");
1831
+ }
1832
+ function collapseRootIndex(path2) {
1833
+ if (path2 === "/index.html" || path2 === "/index.php") return "/";
1834
+ return path2;
1835
+ }
1836
+ function dropTrailingSlash(path2) {
1837
+ if (path2.length > 1 && path2.endsWith("/")) {
1838
+ return path2.replace(/\/+$/, "");
1839
+ }
1840
+ return path2;
1841
+ }
1842
+ function normalizeUrlPath(input) {
1843
+ if (input == null) return null;
1844
+ const trimmed = input.trim();
1845
+ if (trimmed === "") return null;
1846
+ if (trimmed === "(not set)") return null;
1847
+ let pathPart;
1848
+ let queryPart;
1849
+ if (/^https?:\/\//i.test(trimmed)) {
1850
+ let url;
1851
+ try {
1852
+ url = new URL(trimmed);
1853
+ } catch {
1854
+ return null;
1855
+ }
1856
+ pathPart = url.pathname || "/";
1857
+ queryPart = url.search.startsWith("?") ? url.search.slice(1) : url.search;
1858
+ } else {
1859
+ let raw = trimmed;
1860
+ const hashIdx = raw.indexOf("#");
1861
+ if (hashIdx !== -1) raw = raw.slice(0, hashIdx);
1862
+ const qIdx = raw.indexOf("?");
1863
+ if (qIdx === -1) {
1864
+ pathPart = raw;
1865
+ queryPart = "";
1866
+ } else {
1867
+ pathPart = raw.slice(0, qIdx);
1868
+ queryPart = raw.slice(qIdx + 1);
1869
+ }
1870
+ }
1871
+ if (pathPart === "") pathPart = "/";
1872
+ pathPart = collapseRootIndex(pathPart);
1873
+ pathPart = dropTrailingSlash(pathPart);
1874
+ const pairs = parseQuery(queryPart).filter((p) => !shouldStrip(p.key));
1875
+ pairs.sort((a, b) => {
1876
+ if (a.key < b.key) return -1;
1877
+ if (a.key > b.key) return 1;
1878
+ return 0;
1879
+ });
1880
+ if (pairs.length === 0) return pathPart;
1881
+ return `${pathPart}?${encodeQuery(pairs)}`;
1882
+ }
1883
+
1793
1884
  // src/client.ts
1794
1885
  function createApiClient() {
1795
1886
  const config = loadConfig();
@@ -2409,7 +2500,10 @@ var ApiClient = class {
2409
2500
  return this.request("POST", "/backlinks/install");
2410
2501
  }
2411
2502
  async backlinksTriggerSync(release) {
2412
- return this.request("POST", "/backlinks/syncs", { release });
2503
+ return this.request("POST", "/backlinks/syncs", release ? { release } : void 0);
2504
+ }
2505
+ async backlinksLatestRelease() {
2506
+ return this.request("GET", "/backlinks/latest-release");
2413
2507
  }
2414
2508
  async backlinksLatestSync() {
2415
2509
  return this.request("GET", "/backlinks/syncs/latest");
@@ -2519,6 +2613,7 @@ export {
2519
2613
  CheckScopes,
2520
2614
  CheckCategories,
2521
2615
  summarizeCheckResults,
2616
+ normalizeUrlPath,
2522
2617
  createApiClient,
2523
2618
  ApiClient
2524
2619
  };
package/dist/cli.js CHANGED
@@ -17,7 +17,7 @@ import {
17
17
  setGoogleAuthConfig,
18
18
  showFirstRunNotice,
19
19
  trackEvent
20
- } from "./chunk-DHMCIJMQ.js";
20
+ } from "./chunk-3T64Y7GR.js";
21
21
  import {
22
22
  CcReleaseSyncStatuses,
23
23
  CheckScopes,
@@ -37,6 +37,7 @@ import {
37
37
  getConfigPath,
38
38
  isEndpointMissing,
39
39
  loadConfig,
40
+ normalizeUrlPath,
40
41
  notificationEventSchema,
41
42
  printCliError,
42
43
  providerQuotaPolicySchema,
@@ -44,17 +45,18 @@ import {
44
45
  saveConfig,
45
46
  saveConfigPatch,
46
47
  usageError
47
- } from "./chunk-XJS7NALL.js";
48
+ } from "./chunk-QTS7VZXN.js";
48
49
  import {
49
50
  apiKeys,
50
51
  competitors,
51
52
  createClient,
53
+ gaTrafficSnapshots,
52
54
  migrate,
53
55
  parseJsonColumn,
54
56
  projects,
55
57
  querySnapshots,
56
58
  runs
57
- } from "./chunk-UM6RDSRJ.js";
59
+ } from "./chunk-FV6PY5UE.js";
58
60
  import "./chunk-MLKGABMK.js";
59
61
 
60
62
  // src/cli.ts
@@ -160,7 +162,7 @@ Usage: ${spec.usage}`, {
160
162
  }
161
163
 
162
164
  // src/commands/backfill.ts
163
- import { and, eq, inArray } from "drizzle-orm";
165
+ import { and, eq, inArray, isNull } from "drizzle-orm";
164
166
  var SNAPSHOT_BATCH_SIZE = 500;
165
167
  async function backfillAnswerVisibilityCommand(opts) {
166
168
  const config = loadConfig();
@@ -301,8 +303,75 @@ async function backfillAnswerVisibilityCommand(opts) {
301
303
  console.log(` Reparsed: ${reparsed}`);
302
304
  console.log(` Errors: ${providerErrors}`);
303
305
  }
306
+ function backfillNormalizedPaths(db, opts) {
307
+ const baseConditions = [isNull(gaTrafficSnapshots.landingPageNormalized)];
308
+ if (opts?.projectId) {
309
+ baseConditions.push(eq(gaTrafficSnapshots.projectId, opts.projectId));
310
+ }
311
+ const rows = db.select({
312
+ id: gaTrafficSnapshots.id,
313
+ landingPage: gaTrafficSnapshots.landingPage
314
+ }).from(gaTrafficSnapshots).where(and(...baseConditions)).all();
315
+ let updated = 0;
316
+ let unchanged = 0;
317
+ if (rows.length > 0) {
318
+ db.transaction((tx) => {
319
+ for (const row of rows) {
320
+ const next = normalizeUrlPath(row.landingPage);
321
+ if (next === null) {
322
+ unchanged++;
323
+ continue;
324
+ }
325
+ tx.update(gaTrafficSnapshots).set({ landingPageNormalized: next }).where(eq(gaTrafficSnapshots.id, row.id)).run();
326
+ updated++;
327
+ }
328
+ });
329
+ }
330
+ return { examined: rows.length, updated, unchanged };
331
+ }
332
+ async function backfillNormalizedPathsCommand(opts) {
333
+ const config = loadConfig();
334
+ const db = createClient(config.database);
335
+ migrate(db);
336
+ const projectFilter = opts?.project?.trim();
337
+ let projectId;
338
+ if (projectFilter) {
339
+ const project = db.select({ id: projects.id }).from(projects).where(eq(projects.name, projectFilter)).get();
340
+ if (!project) {
341
+ const result2 = {
342
+ project: projectFilter,
343
+ examined: 0,
344
+ updated: 0,
345
+ unchanged: 0
346
+ };
347
+ if (opts?.format === "json") {
348
+ console.log(JSON.stringify(result2, null, 2));
349
+ return;
350
+ }
351
+ console.log(`Backfill normalized-paths: project "${projectFilter}" not found.`);
352
+ return;
353
+ }
354
+ projectId = project.id;
355
+ }
356
+ const { examined, updated, unchanged } = backfillNormalizedPaths(db, { projectId });
357
+ const result = {
358
+ project: projectFilter ?? null,
359
+ examined,
360
+ updated,
361
+ unchanged
362
+ };
363
+ if (opts?.format === "json") {
364
+ console.log(JSON.stringify(result, null, 2));
365
+ return;
366
+ }
367
+ console.log("Normalized-path backfill complete.\n");
368
+ if (projectFilter) console.log(` Project: ${projectFilter}`);
369
+ console.log(` Examined: ${examined}`);
370
+ console.log(` Updated: ${updated}`);
371
+ console.log(` Unchanged: ${unchanged}`);
372
+ }
304
373
  async function backfillInsightsCommand(project, opts) {
305
- const { IntelligenceService } = await import("./intelligence-service-54F3NGPM.js");
374
+ const { IntelligenceService } = await import("./intelligence-service-AEI46KC5.js");
306
375
  const config = loadConfig();
307
376
  const db = createClient(config.database);
308
377
  migrate(db);
@@ -498,14 +567,28 @@ var BACKFILL_CLI_COMMANDS = [
498
567
  });
499
568
  }
500
569
  },
570
+ {
571
+ path: ["backfill", "normalized-paths"],
572
+ usage: "canonry backfill normalized-paths [--project <name>] [--format json]",
573
+ options: {
574
+ project: stringOption()
575
+ },
576
+ allowPositionals: false,
577
+ run: async (input) => {
578
+ await backfillNormalizedPathsCommand({
579
+ project: getString(input.values, "project"),
580
+ format: input.format
581
+ });
582
+ }
583
+ },
501
584
  {
502
585
  path: ["backfill"],
503
- usage: "canonry backfill <answer-visibility|insights> [options]",
586
+ usage: "canonry backfill <answer-visibility|insights|normalized-paths> [options]",
504
587
  run: async (input) => {
505
588
  unknownSubcommand(input.positionals[0], {
506
589
  command: "backfill",
507
- usage: "canonry backfill <answer-visibility|insights> [options]",
508
- available: ["answer-visibility", "insights"]
590
+ usage: "canonry backfill <answer-visibility|insights|normalized-paths> [options]",
591
+ available: ["answer-visibility", "insights", "normalized-paths"]
509
592
  });
510
593
  }
511
594
  }
@@ -638,8 +721,38 @@ async function backlinksSync(opts) {
638
721
  return;
639
722
  }
640
723
  if (opts.wait) process.stderr.write("\n");
724
+ if (!opts.release) {
725
+ process.stderr.write(`Auto-discovered release: ${sync.release}
726
+ `);
727
+ }
641
728
  console.log(formatSync(final));
642
729
  }
730
+ function formatBytesShort(n) {
731
+ if (n === null) return "\u2014";
732
+ if (n >= 1e9) return `${(n / 1e9).toFixed(1)} GB`;
733
+ if (n >= 1e6) return `${(n / 1e6).toFixed(1)} MB`;
734
+ if (n >= 1e3) return `${(n / 1e3).toFixed(1)} KB`;
735
+ return `${n} B`;
736
+ }
737
+ function formatLatestRelease(result) {
738
+ if (!result) return "No release discovered (Common Crawl probe returned no candidates).";
739
+ const lines = [];
740
+ lines.push(`Release: ${result.release}`);
741
+ lines.push(`Vertex: ${result.vertexUrl}`);
742
+ lines.push(` ${formatBytesShort(result.vertexBytes)}`);
743
+ lines.push(`Edges: ${result.edgesUrl}`);
744
+ lines.push(` ${formatBytesShort(result.edgesBytes)}`);
745
+ if (result.lastModified) lines.push(`Last modified: ${result.lastModified}`);
746
+ return lines.join("\n");
747
+ }
748
+ async function backlinksLatestRelease(opts = {}) {
749
+ const result = await getClient().backlinksLatestRelease();
750
+ if (opts.format === "json") {
751
+ printJson(result);
752
+ return;
753
+ }
754
+ console.log(formatLatestRelease(result));
755
+ }
643
756
  async function backlinksStatus(opts = {}) {
644
757
  const sync = await getClient().backlinksLatestSync();
645
758
  if (opts.format === "json") {
@@ -715,19 +828,14 @@ var BACKLINKS_CLI_COMMANDS = [
715
828
  },
716
829
  {
717
830
  path: ["backlinks", "sync"],
718
- usage: "canonry backlinks sync --release <id> [--wait] [--format json]",
831
+ usage: "canonry backlinks sync [--release <id>] [--wait] [--format json]",
719
832
  options: {
720
833
  release: stringOption(),
721
834
  wait: { type: "boolean" }
722
835
  },
723
836
  run: async (input) => {
724
- const release = requireStringOption(input, "release", {
725
- message: "--release is required",
726
- usage: "canonry backlinks sync --release <id> [--wait]",
727
- command: "backlinks sync"
728
- });
729
837
  await backlinksSync({
730
- release,
838
+ release: getString(input.values, "release"),
731
839
  wait: getBoolean(input.values, "wait"),
732
840
  format: input.format
733
841
  });
@@ -775,6 +883,14 @@ var BACKLINKS_CLI_COMMANDS = [
775
883
  await backlinksReleases({ format: input.format });
776
884
  }
777
885
  },
886
+ {
887
+ path: ["backlinks", "releases", "latest"],
888
+ usage: "canonry backlinks releases latest [--format json]",
889
+ options: {},
890
+ run: async (input) => {
891
+ await backlinksLatestRelease({ format: input.format });
892
+ }
893
+ },
778
894
  {
779
895
  path: ["backlinks", "extract"],
780
896
  usage: "canonry backlinks extract <project> [--release <id>] [--wait] [--format json]",
@@ -7039,6 +7155,18 @@ async function serveCommand(format = "text") {
7039
7155
  config.port = port;
7040
7156
  const db = createClient(config.database);
7041
7157
  migrate(db);
7158
+ try {
7159
+ const result = backfillNormalizedPaths(db);
7160
+ if (result.updated > 0 && format === "text") {
7161
+ console.log(
7162
+ `Migrated ${result.updated} GA landing-page row${result.updated === 1 ? "" : "s"} to canonical form.`
7163
+ );
7164
+ }
7165
+ } catch (err) {
7166
+ const msg = err instanceof Error ? err.message : String(err);
7167
+ process.stderr.write(`warning: normalized-path backfill skipped: ${msg}
7168
+ `);
7169
+ }
7042
7170
  const app = await createServer({ config, db });
7043
7171
  let shuttingDown = false;
7044
7172
  const shutdown = (signal) => {
package/dist/index.js CHANGED
@@ -1,10 +1,10 @@
1
1
  import {
2
2
  createServer
3
- } from "./chunk-DHMCIJMQ.js";
3
+ } from "./chunk-3T64Y7GR.js";
4
4
  import {
5
5
  loadConfig
6
- } from "./chunk-XJS7NALL.js";
7
- import "./chunk-UM6RDSRJ.js";
6
+ } from "./chunk-QTS7VZXN.js";
7
+ import "./chunk-FV6PY5UE.js";
8
8
  import "./chunk-MLKGABMK.js";
9
9
  export {
10
10
  createServer,