@ainyc/canonry 2.14.0 → 2.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -39,6 +39,7 @@ import {
39
39
  locationContextSchema,
40
40
  missingDependency,
41
41
  normalizeProjectDomain,
42
+ normalizeUrlPath,
42
43
  notFound,
43
44
  notImplemented,
44
45
  parseRunError,
@@ -59,7 +60,7 @@ import {
59
60
  visibilityStateFromAnswerMentioned,
60
61
  windowCutoff,
61
62
  wordpressEnvSchema
62
- } from "./chunk-RNMMN2WI.js";
63
+ } from "./chunk-QTS7VZXN.js";
63
64
  import {
64
65
  IntelligenceService,
65
66
  agentMemory,
@@ -97,7 +98,7 @@ import {
97
98
  runs,
98
99
  schedules,
99
100
  usageCounters
100
- } from "./chunk-UM6RDSRJ.js";
101
+ } from "./chunk-FV6PY5UE.js";
101
102
 
102
103
  // src/telemetry.ts
103
104
  import crypto from "crypto";
@@ -8661,6 +8662,7 @@ async function ga4Routes(app, opts) {
8661
8662
  projectId: project.id,
8662
8663
  date: row.date,
8663
8664
  landingPage: row.landingPage,
8665
+ landingPageNormalized: normalizeUrlPath(row.landingPage),
8664
8666
  sessions: row.sessions,
8665
8667
  organicSessions: row.organicSessions,
8666
8668
  users: row.users,
@@ -8785,11 +8787,11 @@ async function ga4Routes(app, opts) {
8785
8787
  periodEnd: gaTrafficSummaries.periodEnd
8786
8788
  }).from(gaTrafficSummaries).where(eq19(gaTrafficSummaries.projectId, project.id)).get();
8787
8789
  const rows = app.db.select({
8788
- landingPage: gaTrafficSnapshots.landingPage,
8790
+ landingPage: sql5`COALESCE(${gaTrafficSnapshots.landingPageNormalized}, ${gaTrafficSnapshots.landingPage})`,
8789
8791
  sessions: sql5`SUM(${gaTrafficSnapshots.sessions})`,
8790
8792
  organicSessions: sql5`SUM(${gaTrafficSnapshots.organicSessions})`,
8791
8793
  users: sql5`SUM(${gaTrafficSnapshots.users})`
8792
- }).from(gaTrafficSnapshots).where(and8(...snapshotConditions)).groupBy(gaTrafficSnapshots.landingPage).orderBy(sql5`SUM(${gaTrafficSnapshots.sessions}) DESC`).limit(limit).all();
8794
+ }).from(gaTrafficSnapshots).where(and8(...snapshotConditions)).groupBy(sql5`COALESCE(${gaTrafficSnapshots.landingPageNormalized}, ${gaTrafficSnapshots.landingPage})`).orderBy(sql5`SUM(${gaTrafficSnapshots.sessions}) DESC`).limit(limit).all();
8793
8795
  const aiReferrals = app.db.select({
8794
8796
  source: gaAiReferrals.source,
8795
8797
  medium: gaAiReferrals.medium,
@@ -9046,11 +9048,11 @@ async function ga4Routes(app, opts) {
9046
9048
  const project = resolveProject(app.db, request.params.name);
9047
9049
  requireGa4Connection(opts, project.name, project.canonicalDomain);
9048
9050
  const trafficPages = app.db.select({
9049
- landingPage: gaTrafficSnapshots.landingPage,
9051
+ landingPage: sql5`COALESCE(${gaTrafficSnapshots.landingPageNormalized}, ${gaTrafficSnapshots.landingPage})`,
9050
9052
  sessions: sql5`SUM(${gaTrafficSnapshots.sessions})`,
9051
9053
  organicSessions: sql5`SUM(${gaTrafficSnapshots.organicSessions})`,
9052
9054
  users: sql5`SUM(${gaTrafficSnapshots.users})`
9053
- }).from(gaTrafficSnapshots).where(eq19(gaTrafficSnapshots.projectId, project.id)).groupBy(gaTrafficSnapshots.landingPage).orderBy(sql5`SUM(${gaTrafficSnapshots.sessions}) DESC`).all();
9055
+ }).from(gaTrafficSnapshots).where(eq19(gaTrafficSnapshots.projectId, project.id)).groupBy(sql5`COALESCE(${gaTrafficSnapshots.landingPageNormalized}, ${gaTrafficSnapshots.landingPage})`).orderBy(sql5`SUM(${gaTrafficSnapshots.sessions}) DESC`).all();
9054
9056
  return {
9055
9057
  pages: trafficPages.map((r) => ({
9056
9058
  landingPage: r.landingPage,
@@ -310,6 +310,14 @@ var gaTrafficSnapshots = sqliteTable("ga_traffic_snapshots", {
310
310
  projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
311
311
  date: text("date").notNull(),
312
312
  landingPage: text("landing_page").notNull(),
313
+ /**
314
+ * Canonicalized form of `landingPage` produced by `normalizeUrlPath()` in
315
+ * `@ainyc/canonry-contracts`. Nullable so existing rows survive migration;
316
+ * new GA4 sync writes populate it. Per-page aggregations should
317
+ * `GROUP BY COALESCE(landing_page_normalized, landing_page)` so
318
+ * partially-backfilled state still aggregates correctly.
319
+ */
320
+ landingPageNormalized: text("landing_page_normalized"),
313
321
  sessions: integer("sessions").notNull().default(0),
314
322
  organicSessions: integer("organic_sessions").notNull().default(0),
315
323
  users: integer("users").notNull().default(0),
@@ -318,6 +326,7 @@ var gaTrafficSnapshots = sqliteTable("ga_traffic_snapshots", {
318
326
  }, (table) => [
319
327
  index("idx_ga_traffic_project_date").on(table.projectId, table.date),
320
328
  index("idx_ga_traffic_page").on(table.landingPage),
329
+ index("idx_ga_traffic_page_normalized").on(table.projectId, table.date, table.landingPageNormalized),
321
330
  index("idx_ga_traffic_run").on(table.syncRunId)
322
331
  ]);
323
332
  var gaAiReferrals = sqliteTable("ga_ai_referrals", {
@@ -1049,7 +1058,17 @@ var MIGRATIONS = [
1049
1058
  WHEN discovery_date IS NOT NULL THEN 0
1050
1059
  ELSE NULL
1051
1060
  END
1052
- WHERE created_at < '2026-04-22T00:00:00Z'`
1061
+ WHERE created_at < '2026-04-22T00:00:00Z'`,
1062
+ // v44: Canonicalized landing-page column for ga_traffic_snapshots.
1063
+ // Populated by GA4 sync via normalizeUrlPath() in
1064
+ // @ainyc/canonry-contracts. Nullable; existing rows are filled in by
1065
+ // `canonry backfill normalized-paths`. Read queries should
1066
+ // `GROUP BY COALESCE(landing_page_normalized, landing_page)` so
1067
+ // partially-backfilled state still aggregates correctly.
1068
+ // See plans/ai-attribution-research.md "Step 1 — data hygiene".
1069
+ `ALTER TABLE ga_traffic_snapshots ADD COLUMN landing_page_normalized TEXT`,
1070
+ `CREATE INDEX IF NOT EXISTS idx_ga_traffic_page_normalized
1071
+ ON ga_traffic_snapshots(project_id, date, landing_page_normalized)`
1053
1072
  ];
1054
1073
  function isDuplicateColumnError(err) {
1055
1074
  if (!(err instanceof Error)) return false;
@@ -1790,6 +1790,97 @@ function summarizeCheckResults(results) {
1790
1790
  return summary;
1791
1791
  }
1792
1792
 
1793
+ // ../contracts/src/url-normalize.ts
1794
+ var STRIP_KEYS = /* @__PURE__ */ new Set([
1795
+ // Click identifiers
1796
+ "fbclid",
1797
+ "gclid",
1798
+ "msclkid",
1799
+ "ttclid",
1800
+ "li_fat_id",
1801
+ "igshid",
1802
+ "yclid",
1803
+ "dclid",
1804
+ "gbraid",
1805
+ "wbraid",
1806
+ // Mailchimp
1807
+ "mc_cid",
1808
+ "mc_eid",
1809
+ // Google Analytics linkers
1810
+ "_ga",
1811
+ "_gl",
1812
+ // Google Tag Manager debug
1813
+ "gtm_latency",
1814
+ "gtm_debug"
1815
+ ]);
1816
+ function shouldStrip(key) {
1817
+ if (STRIP_KEYS.has(key)) return true;
1818
+ if (key.startsWith("utm_")) return true;
1819
+ return false;
1820
+ }
1821
+ function parseQuery(query) {
1822
+ if (query === "") return [];
1823
+ return query.split("&").map((pair) => {
1824
+ const eq = pair.indexOf("=");
1825
+ if (eq === -1) return { key: pair, value: null };
1826
+ return { key: pair.slice(0, eq), value: pair.slice(eq + 1) };
1827
+ });
1828
+ }
1829
+ function encodeQuery(pairs) {
1830
+ return pairs.map((p) => p.value === null ? p.key : `${p.key}=${p.value}`).join("&");
1831
+ }
1832
+ function collapseRootIndex(path2) {
1833
+ if (path2 === "/index.html" || path2 === "/index.php") return "/";
1834
+ return path2;
1835
+ }
1836
+ function dropTrailingSlash(path2) {
1837
+ if (path2.length > 1 && path2.endsWith("/")) {
1838
+ return path2.replace(/\/+$/, "");
1839
+ }
1840
+ return path2;
1841
+ }
1842
+ function normalizeUrlPath(input) {
1843
+ if (input == null) return null;
1844
+ const trimmed = input.trim();
1845
+ if (trimmed === "") return null;
1846
+ if (trimmed === "(not set)") return null;
1847
+ let pathPart;
1848
+ let queryPart;
1849
+ if (/^https?:\/\//i.test(trimmed)) {
1850
+ let url;
1851
+ try {
1852
+ url = new URL(trimmed);
1853
+ } catch {
1854
+ return null;
1855
+ }
1856
+ pathPart = url.pathname || "/";
1857
+ queryPart = url.search.startsWith("?") ? url.search.slice(1) : url.search;
1858
+ } else {
1859
+ let raw = trimmed;
1860
+ const hashIdx = raw.indexOf("#");
1861
+ if (hashIdx !== -1) raw = raw.slice(0, hashIdx);
1862
+ const qIdx = raw.indexOf("?");
1863
+ if (qIdx === -1) {
1864
+ pathPart = raw;
1865
+ queryPart = "";
1866
+ } else {
1867
+ pathPart = raw.slice(0, qIdx);
1868
+ queryPart = raw.slice(qIdx + 1);
1869
+ }
1870
+ }
1871
+ if (pathPart === "") pathPart = "/";
1872
+ pathPart = collapseRootIndex(pathPart);
1873
+ pathPart = dropTrailingSlash(pathPart);
1874
+ const pairs = parseQuery(queryPart).filter((p) => !shouldStrip(p.key));
1875
+ pairs.sort((a, b) => {
1876
+ if (a.key < b.key) return -1;
1877
+ if (a.key > b.key) return 1;
1878
+ return 0;
1879
+ });
1880
+ if (pairs.length === 0) return pathPart;
1881
+ return `${pathPart}?${encodeQuery(pairs)}`;
1882
+ }
1883
+
1793
1884
  // src/client.ts
1794
1885
  function createApiClient() {
1795
1886
  const config = loadConfig();
@@ -2522,6 +2613,7 @@ export {
2522
2613
  CheckScopes,
2523
2614
  CheckCategories,
2524
2615
  summarizeCheckResults,
2616
+ normalizeUrlPath,
2525
2617
  createApiClient,
2526
2618
  ApiClient
2527
2619
  };
package/dist/cli.js CHANGED
@@ -17,7 +17,7 @@ import {
17
17
  setGoogleAuthConfig,
18
18
  showFirstRunNotice,
19
19
  trackEvent
20
- } from "./chunk-LD7Y4K4G.js";
20
+ } from "./chunk-3T64Y7GR.js";
21
21
  import {
22
22
  CcReleaseSyncStatuses,
23
23
  CheckScopes,
@@ -37,6 +37,7 @@ import {
37
37
  getConfigPath,
38
38
  isEndpointMissing,
39
39
  loadConfig,
40
+ normalizeUrlPath,
40
41
  notificationEventSchema,
41
42
  printCliError,
42
43
  providerQuotaPolicySchema,
@@ -44,17 +45,18 @@ import {
44
45
  saveConfig,
45
46
  saveConfigPatch,
46
47
  usageError
47
- } from "./chunk-RNMMN2WI.js";
48
+ } from "./chunk-QTS7VZXN.js";
48
49
  import {
49
50
  apiKeys,
50
51
  competitors,
51
52
  createClient,
53
+ gaTrafficSnapshots,
52
54
  migrate,
53
55
  parseJsonColumn,
54
56
  projects,
55
57
  querySnapshots,
56
58
  runs
57
- } from "./chunk-UM6RDSRJ.js";
59
+ } from "./chunk-FV6PY5UE.js";
58
60
  import "./chunk-MLKGABMK.js";
59
61
 
60
62
  // src/cli.ts
@@ -160,7 +162,7 @@ Usage: ${spec.usage}`, {
160
162
  }
161
163
 
162
164
  // src/commands/backfill.ts
163
- import { and, eq, inArray } from "drizzle-orm";
165
+ import { and, eq, inArray, isNull } from "drizzle-orm";
164
166
  var SNAPSHOT_BATCH_SIZE = 500;
165
167
  async function backfillAnswerVisibilityCommand(opts) {
166
168
  const config = loadConfig();
@@ -301,8 +303,75 @@ async function backfillAnswerVisibilityCommand(opts) {
301
303
  console.log(` Reparsed: ${reparsed}`);
302
304
  console.log(` Errors: ${providerErrors}`);
303
305
  }
306
+ function backfillNormalizedPaths(db, opts) {
307
+ const baseConditions = [isNull(gaTrafficSnapshots.landingPageNormalized)];
308
+ if (opts?.projectId) {
309
+ baseConditions.push(eq(gaTrafficSnapshots.projectId, opts.projectId));
310
+ }
311
+ const rows = db.select({
312
+ id: gaTrafficSnapshots.id,
313
+ landingPage: gaTrafficSnapshots.landingPage
314
+ }).from(gaTrafficSnapshots).where(and(...baseConditions)).all();
315
+ let updated = 0;
316
+ let unchanged = 0;
317
+ if (rows.length > 0) {
318
+ db.transaction((tx) => {
319
+ for (const row of rows) {
320
+ const next = normalizeUrlPath(row.landingPage);
321
+ if (next === null) {
322
+ unchanged++;
323
+ continue;
324
+ }
325
+ tx.update(gaTrafficSnapshots).set({ landingPageNormalized: next }).where(eq(gaTrafficSnapshots.id, row.id)).run();
326
+ updated++;
327
+ }
328
+ });
329
+ }
330
+ return { examined: rows.length, updated, unchanged };
331
+ }
332
+ async function backfillNormalizedPathsCommand(opts) {
333
+ const config = loadConfig();
334
+ const db = createClient(config.database);
335
+ migrate(db);
336
+ const projectFilter = opts?.project?.trim();
337
+ let projectId;
338
+ if (projectFilter) {
339
+ const project = db.select({ id: projects.id }).from(projects).where(eq(projects.name, projectFilter)).get();
340
+ if (!project) {
341
+ const result2 = {
342
+ project: projectFilter,
343
+ examined: 0,
344
+ updated: 0,
345
+ unchanged: 0
346
+ };
347
+ if (opts?.format === "json") {
348
+ console.log(JSON.stringify(result2, null, 2));
349
+ return;
350
+ }
351
+ console.log(`Backfill normalized-paths: project "${projectFilter}" not found.`);
352
+ return;
353
+ }
354
+ projectId = project.id;
355
+ }
356
+ const { examined, updated, unchanged } = backfillNormalizedPaths(db, { projectId });
357
+ const result = {
358
+ project: projectFilter ?? null,
359
+ examined,
360
+ updated,
361
+ unchanged
362
+ };
363
+ if (opts?.format === "json") {
364
+ console.log(JSON.stringify(result, null, 2));
365
+ return;
366
+ }
367
+ console.log("Normalized-path backfill complete.\n");
368
+ if (projectFilter) console.log(` Project: ${projectFilter}`);
369
+ console.log(` Examined: ${examined}`);
370
+ console.log(` Updated: ${updated}`);
371
+ console.log(` Unchanged: ${unchanged}`);
372
+ }
304
373
  async function backfillInsightsCommand(project, opts) {
305
- const { IntelligenceService } = await import("./intelligence-service-54F3NGPM.js");
374
+ const { IntelligenceService } = await import("./intelligence-service-AEI46KC5.js");
306
375
  const config = loadConfig();
307
376
  const db = createClient(config.database);
308
377
  migrate(db);
@@ -498,14 +567,28 @@ var BACKFILL_CLI_COMMANDS = [
498
567
  });
499
568
  }
500
569
  },
570
+ {
571
+ path: ["backfill", "normalized-paths"],
572
+ usage: "canonry backfill normalized-paths [--project <name>] [--format json]",
573
+ options: {
574
+ project: stringOption()
575
+ },
576
+ allowPositionals: false,
577
+ run: async (input) => {
578
+ await backfillNormalizedPathsCommand({
579
+ project: getString(input.values, "project"),
580
+ format: input.format
581
+ });
582
+ }
583
+ },
501
584
  {
502
585
  path: ["backfill"],
503
- usage: "canonry backfill <answer-visibility|insights> [options]",
586
+ usage: "canonry backfill <answer-visibility|insights|normalized-paths> [options]",
504
587
  run: async (input) => {
505
588
  unknownSubcommand(input.positionals[0], {
506
589
  command: "backfill",
507
- usage: "canonry backfill <answer-visibility|insights> [options]",
508
- available: ["answer-visibility", "insights"]
590
+ usage: "canonry backfill <answer-visibility|insights|normalized-paths> [options]",
591
+ available: ["answer-visibility", "insights", "normalized-paths"]
509
592
  });
510
593
  }
511
594
  }
@@ -7072,6 +7155,18 @@ async function serveCommand(format = "text") {
7072
7155
  config.port = port;
7073
7156
  const db = createClient(config.database);
7074
7157
  migrate(db);
7158
+ try {
7159
+ const result = backfillNormalizedPaths(db);
7160
+ if (result.updated > 0 && format === "text") {
7161
+ console.log(
7162
+ `Migrated ${result.updated} GA landing-page row${result.updated === 1 ? "" : "s"} to canonical form.`
7163
+ );
7164
+ }
7165
+ } catch (err) {
7166
+ const msg = err instanceof Error ? err.message : String(err);
7167
+ process.stderr.write(`warning: normalized-path backfill skipped: ${msg}
7168
+ `);
7169
+ }
7075
7170
  const app = await createServer({ config, db });
7076
7171
  let shuttingDown = false;
7077
7172
  const shutdown = (signal) => {
package/dist/index.js CHANGED
@@ -1,10 +1,10 @@
1
1
  import {
2
2
  createServer
3
- } from "./chunk-LD7Y4K4G.js";
3
+ } from "./chunk-3T64Y7GR.js";
4
4
  import {
5
5
  loadConfig
6
- } from "./chunk-RNMMN2WI.js";
7
- import "./chunk-UM6RDSRJ.js";
6
+ } from "./chunk-QTS7VZXN.js";
7
+ import "./chunk-FV6PY5UE.js";
8
8
  import "./chunk-MLKGABMK.js";
9
9
  export {
10
10
  createServer,
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  IntelligenceService
3
- } from "./chunk-UM6RDSRJ.js";
3
+ } from "./chunk-FV6PY5UE.js";
4
4
  import "./chunk-MLKGABMK.js";
5
5
  export {
6
6
  IntelligenceService
package/dist/mcp.js CHANGED
@@ -10,7 +10,7 @@ import {
10
10
  projectUpsertRequestSchema,
11
11
  runTriggerRequestSchema,
12
12
  scheduleUpsertRequestSchema
13
- } from "./chunk-RNMMN2WI.js";
13
+ } from "./chunk-QTS7VZXN.js";
14
14
  import "./chunk-MLKGABMK.js";
15
15
 
16
16
  // src/mcp/cli.ts
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ainyc/canonry",
3
- "version": "2.14.0",
3
+ "version": "2.14.1",
4
4
  "type": "module",
5
5
  "description": "Agent-first open-source AEO operating platform - track how answer engines cite your domain",
6
6
  "license": "FSL-1.1-ALv2",
@@ -63,17 +63,17 @@
63
63
  "@ainyc/canonry-api-routes": "0.0.0",
64
64
  "@ainyc/canonry-contracts": "0.0.0",
65
65
  "@ainyc/canonry-db": "0.0.0",
66
- "@ainyc/canonry-intelligence": "0.0.0",
67
66
  "@ainyc/canonry-integration-bing": "0.0.0",
68
67
  "@ainyc/canonry-integration-commoncrawl": "0.0.0",
68
+ "@ainyc/canonry-intelligence": "0.0.0",
69
69
  "@ainyc/canonry-integration-google": "0.0.0",
70
- "@ainyc/canonry-integration-wordpress": "0.0.0",
71
70
  "@ainyc/canonry-provider-cdp": "0.0.0",
72
71
  "@ainyc/canonry-provider-claude": "0.0.0",
73
72
  "@ainyc/canonry-provider-gemini": "0.0.0",
73
+ "@ainyc/canonry-integration-wordpress": "0.0.0",
74
74
  "@ainyc/canonry-provider-local": "0.0.0",
75
- "@ainyc/canonry-provider-perplexity": "0.0.0",
76
- "@ainyc/canonry-provider-openai": "0.0.0"
75
+ "@ainyc/canonry-provider-openai": "0.0.0",
76
+ "@ainyc/canonry-provider-perplexity": "0.0.0"
77
77
  },
78
78
  "scripts": {
79
79
  "build": "tsx scripts/copy-agent-assets.ts && tsup && tsx build-web.ts",