@ainyc/canonry 4.51.4 → 4.54.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. package/assets/agent-workspace/skills/canonry/references/server-side-traffic.md +8 -5
  2. package/assets/assets/{BacklinksPage-9TlM08Wf.js → BacklinksPage-BXFT4pLI.js} +1 -1
  3. package/assets/assets/ProjectPage-DAtd9Vay.js +6 -0
  4. package/assets/assets/{RunRow-D7qdWWRl.js → RunRow-38dDceGl.js} +1 -1
  5. package/assets/assets/{RunsPage-CvewepfU.js → RunsPage-AJnFLtaE.js} +1 -1
  6. package/assets/assets/{SettingsPage-C7BvAhiB.js → SettingsPage-FT9ZAvFH.js} +1 -1
  7. package/assets/assets/{TrafficPage-DC3NhFOh.js → TrafficPage-B4A3oO8M.js} +1 -1
  8. package/assets/assets/TrafficSourceDetailPage-8NYU1TA6.js +1 -0
  9. package/assets/assets/{arrow-left-Agb02DMK.js → arrow-left-DgI0X1Q1.js} +1 -1
  10. package/assets/assets/{index-DeGyEwik.css → index-Bm3JQsW0.css} +1 -1
  11. package/assets/assets/{index-DTCZ93Ne.js → index-DLPKqyhx.js} +50 -50
  12. package/assets/assets/{server-traffic-C-0Ndjpw.js → server-traffic-GqiQYm6x.js} +1 -1
  13. package/assets/assets/{trash-2-lkrXVRRm.js → trash-2-BwPzJ8NI.js} +1 -1
  14. package/assets/index.html +2 -2
  15. package/dist/{chunk-WBO5S3IX.js → chunk-CRO6Q25G.js} +533 -65
  16. package/dist/{chunk-HMZKIOLG.js → chunk-J7MX3YOH.js} +1 -1
  17. package/dist/{chunk-QZ5XSM6C.js → chunk-JHAHNKSN.js} +103 -1
  18. package/dist/{chunk-FYGBW3SM.js → chunk-VZPDBHBW.js} +29 -1
  19. package/dist/cli.js +40 -27
  20. package/dist/index.js +4 -4
  21. package/dist/{intelligence-service-2XL2M7QP.js → intelligence-service-OCREQUCQ.js} +2 -2
  22. package/dist/mcp.js +2 -2
  23. package/package.json +12 -12
  24. package/assets/assets/ProjectPage-CD591qDz.js +0 -6
  25. package/assets/assets/TrafficSourceDetailPage-BvtTA6rs.js +0 -1
@@ -6,7 +6,7 @@ import {
6
6
  loadConfig,
7
7
  loadConfigRaw,
8
8
  saveConfigPatch
9
- } from "./chunk-HMZKIOLG.js";
9
+ } from "./chunk-J7MX3YOH.js";
10
10
  import {
11
11
  DEFAULT_RUN_HISTORY_LIMIT,
12
12
  IntelligenceService,
@@ -14,6 +14,7 @@ import {
14
14
  agentMemory,
15
15
  agentSessions,
16
16
  aiReferralEventsHourly,
17
+ aiUserFetchEventsHourly,
17
18
  apiKeys,
18
19
  auditLog,
19
20
  backlinkDomains,
@@ -83,7 +84,7 @@ import {
83
84
  smoothedRunDelta,
84
85
  trafficSources,
85
86
  usageCounters
86
- } from "./chunk-QZ5XSM6C.js";
87
+ } from "./chunk-JHAHNKSN.js";
87
88
  import {
88
89
  AGENT_MEMORY_VALUE_MAX_BYTES,
89
90
  AGENT_PROVIDER_IDS,
@@ -284,7 +285,7 @@ import {
284
285
  wordpressSchemaDeployResultDtoSchema,
285
286
  wordpressSchemaStatusResultDtoSchema,
286
287
  wordpressStatusDtoSchema
287
- } from "./chunk-FYGBW3SM.js";
288
+ } from "./chunk-VZPDBHBW.js";
288
289
 
289
290
  // src/telemetry.ts
290
291
  import crypto from "crypto";
@@ -5455,11 +5456,14 @@ function renderServerActivity(report, audience) {
5455
5456
  const crawlerTrustSummary = `${formatNumber(sa.verifiedCrawlerHits.current)} verified \xB7 ${formatNumber(sa.unverifiedCrawlerHits.current)} unverified`;
5456
5457
  const crawlerDelta = formatDelta(crawlerRequests, "requests");
5457
5458
  const crawlerSubtitle = crawlerDelta ? `${escapeHtml(crawlerTrustSummary)} \xB7 ${crawlerDelta}` : escapeHtml(crawlerTrustSummary);
5458
- const clientOperators = sa.byOperator.filter((o) => o.verifiedHits > 0 || o.unverifiedHits > 0 || o.referralArrivals > 0).slice(0, 5);
5459
+ const userFetchDelta = formatDelta(sa.aiUserFetchHits, "requests");
5460
+ const userFetchSubtitle = userFetchDelta || escapeHtml("ChatGPT-User, Perplexity-User, MistralAI-User");
5461
+ const clientOperators = sa.byOperator.filter((o) => o.verifiedHits > 0 || o.unverifiedHits > 0 || o.userFetchHits > 0 || o.referralArrivals > 0).slice(0, 5);
5459
5462
  const clientOperatorRows = clientOperators.map((o) => `
5460
5463
  <tr>
5461
5464
  <td>${escapeHtml(o.operator)}</td>
5462
5465
  <td class="numeric">${formatNumber(o.verifiedHits + o.unverifiedHits)}</td>
5466
+ <td class="numeric">${formatNumber(o.userFetchHits)}</td>
5463
5467
  <td class="numeric">${formatNumber(o.referralArrivals)}</td>
5464
5468
  </tr>`).join("");
5465
5469
  return section(
@@ -5470,6 +5474,11 @@ function renderServerActivity(report, audience) {
5470
5474
  <div class="value">${formatNumber(crawlerRequests.current)}</div>
5471
5475
  <div class="subtitle">${crawlerSubtitle}</div>
5472
5476
  </div>
5477
+ <div class="metric">
5478
+ <div class="label">AI user-fetch requests</div>
5479
+ <div class="value">${formatNumber(sa.aiUserFetchHits.current)}</div>
5480
+ <div class="subtitle">${userFetchSubtitle}</div>
5481
+ </div>
5473
5482
  <div class="metric">
5474
5483
  <div class="label">AI referral sessions</div>
5475
5484
  <div class="value">${formatNumber(sa.referralArrivals.current)}</div>
@@ -5478,10 +5487,10 @@ function renderServerActivity(report, audience) {
5478
5487
  </div>
5479
5488
  ${clientOperatorRows ? `<div class="chart-card"><h3>By AI tool</h3>
5480
5489
  <table class="report-table">
5481
- <thead><tr><th>AI tool</th><th class="numeric">Bot requests (7d)</th><th class="numeric">Referral sessions</th></tr></thead>
5490
+ <thead><tr><th>AI tool</th><th class="numeric">Bot requests (7d)</th><th class="numeric">User fetches (7d)</th><th class="numeric">Referral sessions</th></tr></thead>
5482
5491
  <tbody>${clientOperatorRows}</tbody>
5483
5492
  </table>
5484
- <p class="meta">Verified requests are reverse-DNS confirmed. Unverified requests are user-agent claims shown separately in agency diagnostics.</p>
5493
+ <p class="meta">Bot requests are bulk crawl (GPTBot, PerplexityBot, \u2026). User fetches are on-demand reads triggered by real users inside an AI surface (ChatGPT-User, Perplexity-User, \u2026). Verified requests are reverse-DNS confirmed; unverified requests are UA claims shown separately in agency diagnostics.</p>
5485
5494
  </div>` : ""}`
5486
5495
  );
5487
5496
  }
@@ -5493,6 +5502,7 @@ function renderServerActivity(report, audience) {
5493
5502
  <td>${escapeHtml(o.operator)}</td>
5494
5503
  <td class="numeric">${formatNumber(o.verifiedHits)}</td>
5495
5504
  <td class="numeric meta">${formatNumber(o.unverifiedHits)}</td>
5505
+ <td class="numeric">${formatNumber(o.userFetchHits)}</td>
5496
5506
  <td class="numeric">${formatNumber(o.referralArrivals)}</td>
5497
5507
  <td class="numeric ${toneClass}">${deltaText}</td>
5498
5508
  </tr>`;
@@ -5533,6 +5543,11 @@ function renderServerActivity(report, audience) {
5533
5543
  <div class="value">${formatNumber(sa.unverifiedCrawlerHits.current)}</div>
5534
5544
  <div class="subtitle">${formatDelta(sa.unverifiedCrawlerHits, "hits")}</div>
5535
5545
  </div>
5546
+ <div class="metric">
5547
+ <div class="label">AI user-fetch hits (7d)</div>
5548
+ <div class="value">${formatNumber(sa.aiUserFetchHits.current)}</div>
5549
+ <div class="subtitle">${formatDelta(sa.aiUserFetchHits, "hits")}</div>
5550
+ </div>
5536
5551
  <div class="metric">
5537
5552
  <div class="label">AI-referral sessions (7d)</div>
5538
5553
  <div class="value">${formatNumber(sa.referralArrivals.current)}</div>
@@ -5541,9 +5556,9 @@ function renderServerActivity(report, audience) {
5541
5556
  </div>
5542
5557
  ${trendChart}
5543
5558
  ${operatorRows ? `<div class="chart-card"><h3>Per AI operator</h3>
5544
- <p class="meta">Verified means rDNS-confirmed. Unverified bots claim the user-agent but couldn't be verified \u2014 could be the real bot or an imitator.</p>
5559
+ <p class="meta">Verified means rDNS-confirmed. Unverified bots claim the user-agent but couldn't be verified \u2014 could be the real bot or an imitator. User fetches are on-demand reads from an AI surface on behalf of a real user (ChatGPT-User, Perplexity-User, \u2026) \u2014 disjoint from bulk crawl.</p>
5545
5560
  <table class="report-table">
5546
- <thead><tr><th>Operator</th><th class="numeric">Verified hits</th><th class="numeric">Unverified</th><th class="numeric">Referral sessions</th><th class="numeric">7d delta</th></tr></thead>
5561
+ <thead><tr><th>Operator</th><th class="numeric">Verified hits</th><th class="numeric">Unverified</th><th class="numeric">User fetches</th><th class="numeric">Referral sessions</th><th class="numeric">7d delta</th></tr></thead>
5547
5562
  <tbody>${operatorRows}</tbody>
5548
5563
  </table>
5549
5564
  </div>` : ""}
@@ -6385,10 +6400,21 @@ function buildServerActivity(db, projectId) {
6385
6400
  )
6386
6401
  ).get()?.total ?? 0
6387
6402
  );
6403
+ const sumUserFetches = (windowStartIso, windowEndIso, exclusiveEnd = false) => Number(
6404
+ db.select({ total: sql5`COALESCE(SUM(${aiUserFetchEventsHourly.hits}), 0)` }).from(aiUserFetchEventsHourly).where(
6405
+ and9(
6406
+ eq14(aiUserFetchEventsHourly.projectId, projectId),
6407
+ gte2(aiUserFetchEventsHourly.tsHour, windowStartIso),
6408
+ exclusiveEnd ? lt(aiUserFetchEventsHourly.tsHour, windowEndIso) : lte(aiUserFetchEventsHourly.tsHour, windowEndIso)
6409
+ )
6410
+ ).get()?.total ?? 0
6411
+ );
6388
6412
  const verifiedCurrent = sumVerifiedCrawlers(headlineStart, headlineEnd);
6389
6413
  const verifiedPrior = sumVerifiedCrawlers(priorStart, headlineStart, true);
6390
6414
  const unverifiedCurrent = sumUnverifiedCrawlers(headlineStart, headlineEnd);
6391
6415
  const unverifiedPrior = sumUnverifiedCrawlers(priorStart, headlineStart, true);
6416
+ const userFetchCurrent = sumUserFetches(headlineStart, headlineEnd);
6417
+ const userFetchPrior = sumUserFetches(priorStart, headlineStart, true);
6392
6418
  const referralCurrent = sumReferrals(headlineStart, headlineEnd);
6393
6419
  const referralPrior = sumReferrals(priorStart, headlineStart, true);
6394
6420
  const crawlerByOperatorRows = db.select({
@@ -6424,11 +6450,21 @@ function buildServerActivity(db, projectId) {
6424
6450
  lte(aiReferralEventsHourly.tsHour, headlineEnd)
6425
6451
  )
6426
6452
  ).groupBy(aiReferralEventsHourly.operator).all();
6453
+ const userFetchByOperatorRows = db.select({
6454
+ operator: aiUserFetchEventsHourly.operator,
6455
+ hits: sql5`COALESCE(SUM(${aiUserFetchEventsHourly.hits}), 0)`
6456
+ }).from(aiUserFetchEventsHourly).where(
6457
+ and9(
6458
+ eq14(aiUserFetchEventsHourly.projectId, projectId),
6459
+ gte2(aiUserFetchEventsHourly.tsHour, headlineStart),
6460
+ lte(aiUserFetchEventsHourly.tsHour, headlineEnd)
6461
+ )
6462
+ ).groupBy(aiUserFetchEventsHourly.operator).all();
6427
6463
  const operatorAgg = /* @__PURE__ */ new Map();
6428
6464
  const ensureOp = (op) => {
6429
6465
  let entry = operatorAgg.get(op);
6430
6466
  if (!entry) {
6431
- entry = { verified: 0, unverified: 0, referrals: 0, prior: 0 };
6467
+ entry = { verified: 0, unverified: 0, userFetch: 0, referrals: 0, prior: 0 };
6432
6468
  operatorAgg.set(op, entry);
6433
6469
  }
6434
6470
  return entry;
@@ -6441,6 +6477,9 @@ function buildServerActivity(db, projectId) {
6441
6477
  for (const r of crawlerByOperatorPriorRows) {
6442
6478
  ensureOp(r.operator).prior += Number(r.hits);
6443
6479
  }
6480
+ for (const r of userFetchByOperatorRows) {
6481
+ ensureOp(r.operator).userFetch += Number(r.hits);
6482
+ }
6444
6483
  for (const r of referralByOperatorRows) {
6445
6484
  ensureOp(r.operator).referrals += Number(r.hits);
6446
6485
  }
@@ -6448,10 +6487,11 @@ function buildServerActivity(db, projectId) {
6448
6487
  operator,
6449
6488
  verifiedHits: v.verified,
6450
6489
  unverifiedHits: v.unverified,
6490
+ userFetchHits: v.userFetch,
6451
6491
  referralArrivals: v.referrals,
6452
6492
  deltaPct: deltaPercent(v.verified, v.prior)
6453
6493
  })).sort(
6454
- (a, b) => b.verifiedHits - a.verifiedHits || b.unverifiedHits - a.unverifiedHits || b.referralArrivals - a.referralArrivals
6494
+ (a, b) => b.verifiedHits - a.verifiedHits || b.userFetchHits - a.userFetchHits || b.unverifiedHits - a.unverifiedHits || b.referralArrivals - a.referralArrivals
6455
6495
  );
6456
6496
  const topPathsRows = db.select({
6457
6497
  path: crawlerEventsHourly.pathNormalized,
@@ -6526,14 +6566,30 @@ function buildServerActivity(db, projectId) {
6526
6566
  lte(aiReferralEventsHourly.tsHour, headlineEnd)
6527
6567
  )
6528
6568
  ).groupBy(sql5`SUBSTR(${aiReferralEventsHourly.tsHour}, 1, 10)`).all();
6569
+ const userFetchTrendRows = db.select({
6570
+ date: sql5`SUBSTR(${aiUserFetchEventsHourly.tsHour}, 1, 10)`,
6571
+ hits: sql5`COALESCE(SUM(${aiUserFetchEventsHourly.hits}), 0)`
6572
+ }).from(aiUserFetchEventsHourly).where(
6573
+ and9(
6574
+ eq14(aiUserFetchEventsHourly.projectId, projectId),
6575
+ gte2(aiUserFetchEventsHourly.tsHour, trendStart),
6576
+ lte(aiUserFetchEventsHourly.tsHour, headlineEnd)
6577
+ )
6578
+ ).groupBy(sql5`SUBSTR(${aiUserFetchEventsHourly.tsHour}, 1, 10)`).all();
6579
+ const emptyTrendEntry = () => ({ verifiedCrawlerHits: 0, userFetchHits: 0, referralArrivals: 0 });
6529
6580
  const dailyTrendMap = /* @__PURE__ */ new Map();
6530
6581
  for (const r of crawlerTrendRows) {
6531
- const e = dailyTrendMap.get(r.date) ?? { verifiedCrawlerHits: 0, referralArrivals: 0 };
6582
+ const e = dailyTrendMap.get(r.date) ?? emptyTrendEntry();
6532
6583
  e.verifiedCrawlerHits += Number(r.hits);
6533
6584
  dailyTrendMap.set(r.date, e);
6534
6585
  }
6586
+ for (const r of userFetchTrendRows) {
6587
+ const e = dailyTrendMap.get(r.date) ?? emptyTrendEntry();
6588
+ e.userFetchHits += Number(r.hits);
6589
+ dailyTrendMap.set(r.date, e);
6590
+ }
6535
6591
  for (const r of referralTrendRows) {
6536
- const e = dailyTrendMap.get(r.date) ?? { verifiedCrawlerHits: 0, referralArrivals: 0 };
6592
+ const e = dailyTrendMap.get(r.date) ?? emptyTrendEntry();
6537
6593
  e.referralArrivals += Number(r.hits);
6538
6594
  dailyTrendMap.set(r.date, e);
6539
6595
  }
@@ -6541,7 +6597,7 @@ function buildServerActivity(db, projectId) {
6541
6597
  return {
6542
6598
  windowStart: headlineStart,
6543
6599
  windowEnd: headlineEnd,
6544
- hasData: verifiedCurrent + unverifiedCurrent + referralCurrent + verifiedPrior + unverifiedPrior + referralPrior > 0 || byOperator.length > 0 || topCrawledPaths.length > 0 || referralProducts.length > 0,
6600
+ hasData: verifiedCurrent + unverifiedCurrent + userFetchCurrent + referralCurrent + verifiedPrior + unverifiedPrior + userFetchPrior + referralPrior > 0 || byOperator.length > 0 || topCrawledPaths.length > 0 || referralProducts.length > 0,
6545
6601
  verifiedCrawlerHits: {
6546
6602
  current: verifiedCurrent,
6547
6603
  prior: verifiedPrior,
@@ -6552,6 +6608,11 @@ function buildServerActivity(db, projectId) {
6552
6608
  prior: unverifiedPrior,
6553
6609
  deltaPct: deltaPercent(unverifiedCurrent, unverifiedPrior)
6554
6610
  },
6611
+ aiUserFetchHits: {
6612
+ current: userFetchCurrent,
6613
+ prior: userFetchPrior,
6614
+ deltaPct: deltaPercent(userFetchCurrent, userFetchPrior)
6615
+ },
6555
6616
  referralArrivals: {
6556
6617
  current: referralCurrent,
6557
6618
  prior: referralPrior,
@@ -11458,14 +11519,14 @@ var routeCatalog = [
11458
11519
  {
11459
11520
  method: "get",
11460
11521
  path: "/api/v1/projects/{name}/traffic/events",
11461
- summary: "List rolled-up crawler hits and AI-referral sessions within a window",
11462
- description: "Returns hourly rollup rows from `crawler_events_hourly` and `ai_referral_events_hourly`. Defaults to the last 24h. Totals reflect the full window; the `events` array is capped by `limit` (default 500, max 5000).",
11522
+ summary: "List rolled-up crawler hits, AI user-fetch hits, and AI-referral sessions within a window",
11523
+ description: "Returns hourly rollup rows from `crawler_events_hourly`, `ai_user_fetch_events_hourly`, and `ai_referral_events_hourly`. Defaults to the last 24h. Totals reflect the full window; the `events` array is capped by `limit` (default 500, max 5000).",
11463
11524
  tags: ["traffic"],
11464
11525
  parameters: [
11465
11526
  nameParameter,
11466
11527
  { name: "since", in: "query", description: "ISO-8601 window start (defaults to 24h ago).", schema: stringSchema },
11467
11528
  { name: "until", in: "query", description: "ISO-8601 window end (defaults to now).", schema: stringSchema },
11468
- { name: "kind", in: "query", description: 'Filter to "crawler", "ai-referral", or "all" (default).', schema: stringSchema },
11529
+ { name: "kind", in: "query", description: 'Filter to "crawler", "ai-user-fetch", "ai-referral", or "all" (default).', schema: stringSchema },
11469
11530
  { name: "limit", in: "query", description: "Max rows per kind in the events array (default 500, max 5000).", schema: stringSchema },
11470
11531
  { name: "sourceId", in: "query", description: "Restrict to a single traffic source.", schema: stringSchema }
11471
11532
  ],
@@ -19168,6 +19229,74 @@ var chatgpt_user_default = {
19168
19229
  ]
19169
19230
  };
19170
19231
 
19232
+ // ../integration-traffic/src/ip-ranges/google-user-triggered-agents.json
19233
+ var google_user_triggered_agents_default = {
19234
+ _source: "https://developers.google.com/static/crawling/ipranges/user-triggered-agents.json",
19235
+ creationTime: "2026-05-19T14:46:15.000000",
19236
+ prefixes: [
19237
+ {
19238
+ ipv6Prefix: "2001:4860:c::/124"
19239
+ },
19240
+ {
19241
+ ipv6Prefix: "2001:4860:c::10/124"
19242
+ },
19243
+ {
19244
+ ipv6Prefix: "2001:4860:c::20/124"
19245
+ },
19246
+ {
19247
+ ipv6Prefix: "2001:4860:c::30/124"
19248
+ },
19249
+ {
19250
+ ipv6Prefix: "2001:4860:c::40/124"
19251
+ },
19252
+ {
19253
+ ipv6Prefix: "2001:4860:c::50/124"
19254
+ },
19255
+ {
19256
+ ipv6Prefix: "2001:4860:c::60/124"
19257
+ },
19258
+ {
19259
+ ipv6Prefix: "2001:4860:c::70/124"
19260
+ },
19261
+ {
19262
+ ipv4Prefix: "136.121.16.0/24"
19263
+ },
19264
+ {
19265
+ ipv4Prefix: "136.121.24.0/21"
19266
+ },
19267
+ {
19268
+ ipv4Prefix: "136.121.40.0/21"
19269
+ },
19270
+ {
19271
+ ipv4Prefix: "136.122.0.0/16"
19272
+ },
19273
+ {
19274
+ ipv4Prefix: "74.125.232.0/28"
19275
+ },
19276
+ {
19277
+ ipv4Prefix: "74.125.232.112/28"
19278
+ },
19279
+ {
19280
+ ipv4Prefix: "74.125.232.16/28"
19281
+ },
19282
+ {
19283
+ ipv4Prefix: "74.125.232.32/28"
19284
+ },
19285
+ {
19286
+ ipv4Prefix: "74.125.232.48/28"
19287
+ },
19288
+ {
19289
+ ipv4Prefix: "74.125.232.64/28"
19290
+ },
19291
+ {
19292
+ ipv4Prefix: "74.125.232.80/28"
19293
+ },
19294
+ {
19295
+ ipv4Prefix: "74.125.232.96/28"
19296
+ }
19297
+ ]
19298
+ };
19299
+
19171
19300
  // ../integration-traffic/src/ip-ranges/googlebot.json
19172
19301
  var googlebot_default = {
19173
19302
  _source: "https://developers.google.com/static/search/apis/ipranges/googlebot.json",
@@ -20366,6 +20495,12 @@ var RULE_ID_TO_RANGES = {
20366
20495
  // (also covers Copilot grounding — Microsoft routes Copilot's
20367
20496
  // web fetches through bingbot infrastructure)
20368
20497
  "bingbot": bingbot_default,
20498
+ // Google-Agent — Google's agentic user-triggered fetcher (Project
20499
+ // Mariner et al.). Verified against Google's user-triggered-agents
20500
+ // list, which covers every Google user-triggered fetcher collectively
20501
+ // (Google publishes no per-fetcher split).
20502
+ // src: https://developers.google.com/static/crawling/ipranges/user-triggered-agents.json
20503
+ "google-agent": google_user_triggered_agents_default,
20369
20504
  // Perplexity — split between crawler and user-on-behalf fetcher,
20370
20505
  // same shape as OpenAI's split.
20371
20506
  // src: https://www.perplexity.ai/perplexitybot.json
@@ -20377,10 +20512,12 @@ var RULE_ID_TO_RANGES = {
20377
20512
  // PBC at ARIN (the authoritative allocation record). Maintained by
20378
20513
  // hand; refresh by re-querying the ARIN entity below. The crawler
20379
20514
  // block is AWS-ANTHROPIC 216.73.216.0/22 — empirical Cloud Run
20380
- // logs show all real ClaudeBot traffic comes from there. Same raw
20381
- // set is shared across every Claude-* UA the classifier emits.
20515
+ // logs show all real ClaudeBot traffic comes from there. The same
20516
+ // raw set is shared across every Claude-* UA the classifier emits:
20517
+ // both the training crawler and the per-user fetcher map here.
20382
20518
  // src: https://rdap.arin.net/registry/entity/AP-2440
20383
- "anthropic-claudebot": anthropic_default
20519
+ "anthropic-claudebot": anthropic_default,
20520
+ "claude-user": anthropic_default
20384
20521
  };
20385
20522
  var CACHE = (() => {
20386
20523
  const cache = /* @__PURE__ */ new Map();
@@ -20492,9 +20629,11 @@ var DEFAULT_AI_CRAWLER_RULES = [
20492
20629
  // Anthropic ships several Claude-* crawlers (ClaudeBot for training,
20493
20630
  // Claude-Web for chat fetches, Claude-SearchBot for search). The
20494
20631
  // `Claude-` prefix + `Bot/` suffix is the stable shape — pattern is
20495
- // permissive enough to catch new Claude-* variants as Anthropic
20632
+ // permissive enough to catch new Claude-*Bot variants as Anthropic
20496
20633
  // adds them, without matching unrelated UAs that happen to mention
20497
- // "claude".
20634
+ // "claude". The per-user fetcher `Claude-User` has no `Bot/` suffix
20635
+ // and is intentionally NOT matched here — it routes through the
20636
+ // separate `claude-user` rule below (purpose: 'user-agent').
20498
20637
  userAgentPatterns: [
20499
20638
  /ClaudeBot\//i,
20500
20639
  /Claude-Web\//i,
@@ -20503,6 +20642,21 @@ var DEFAULT_AI_CRAWLER_RULES = [
20503
20642
  /anthropic-ai/i
20504
20643
  ]
20505
20644
  },
20645
+ {
20646
+ // Anthropic's on-behalf-of-user fetcher: Claude fetches a URL when
20647
+ // a person asks about it mid-conversation (citation click, "read
20648
+ // this page" prompt). Distinct from ClaudeBot (training crawl) —
20649
+ // same operator, opposite operational signal, mirroring OpenAI's
20650
+ // GPTBot vs. ChatGPT-User split. The `anthropic-claudebot` rule
20651
+ // above does not match `Claude-User/` (its `Claude-[A-Z]+Bot/`
20652
+ // pattern needs a `Bot/` suffix), so this is the only rule that
20653
+ // routes it — into the user-fetch bucket, not bulk crawl.
20654
+ id: "claude-user",
20655
+ operator: "Anthropic",
20656
+ product: "Claude-User",
20657
+ purpose: "user-agent",
20658
+ userAgentPatterns: [/Claude-User\//i]
20659
+ },
20506
20660
  {
20507
20661
  id: "perplexity-bot",
20508
20662
  operator: "Perplexity",
@@ -20528,6 +20682,21 @@ var DEFAULT_AI_CRAWLER_RULES = [
20528
20682
  purpose: "training-control",
20529
20683
  userAgentPatterns: [/Google-Extended/i]
20530
20684
  },
20685
+ {
20686
+ // Google-Agent: agents on Google infrastructure that navigate the
20687
+ // web and act "upon user request" (e.g. Project Mariner) — a
20688
+ // user-driven fetch, routed to the user-fetch bucket. Google ships
20689
+ // no distinct Gemini fetch UA (`Google-Extended` above is a
20690
+ // robots.txt control token, not a request UA), so this is the
20691
+ // closest Google equivalent to ChatGPT-User. The UA is browser-like
20692
+ // with a `compatible; Google-Agent;` token. IP ranges:
20693
+ // user-triggered-agents.json.
20694
+ id: "google-agent",
20695
+ operator: "Google",
20696
+ product: "Google-Agent",
20697
+ purpose: "user-agent",
20698
+ userAgentPatterns: [/Google-Agent/i]
20699
+ },
20531
20700
  {
20532
20701
  id: "bytespider",
20533
20702
  operator: "ByteDance",
@@ -20582,16 +20751,23 @@ var DEFAULT_AI_CRAWLER_RULES = [
20582
20751
  userAgentPatterns: [/Diffbot/i]
20583
20752
  },
20584
20753
  {
20585
- id: "mistral-ai",
20754
+ // Per-user, on-demand fetches initiated by a Mistral user (citation
20755
+ // click, "read this URL" prompt). Separate from MistralBot (crawl)
20756
+ // so the dashboard's user-fetch vs. bulk-crawl split stays honest.
20757
+ id: "mistral-ai-user",
20586
20758
  operator: "Mistral AI",
20587
20759
  product: "MistralAI-User",
20760
+ purpose: "user-agent",
20761
+ userAgentPatterns: [/MistralAI-User\//i]
20762
+ },
20763
+ {
20764
+ // Mistral's general crawler. Distinct from MistralAI-User (per-user
20765
+ // fetch) — same operator, different operational signal.
20766
+ id: "mistral-bot",
20767
+ operator: "Mistral AI",
20768
+ product: "MistralBot",
20588
20769
  purpose: "crawl",
20589
- // Mistral ships both `MistralAI-User/*` (chat-on-behalf-of-user
20590
- // fetches) and `MistralBot/*` (general crawler). Earlier rule only
20591
- // matched `MistralAI` and missed the bot — caught on 2026-05-18
20592
- // when canonry.ai/canonry-landing's classification chart went flat
20593
- // and the bot UA was sitting in the `unknown` bucket.
20594
- userAgentPatterns: [/MistralAI/i, /MistralBot/i]
20770
+ userAgentPatterns: [/MistralBot\//i]
20595
20771
  },
20596
20772
  {
20597
20773
  id: "deepseek",
@@ -20600,6 +20776,21 @@ var DEFAULT_AI_CRAWLER_RULES = [
20600
20776
  purpose: "training",
20601
20777
  userAgentPatterns: [/DeepSeekBot/i]
20602
20778
  },
20779
+ {
20780
+ id: "xai-grok-bot",
20781
+ operator: "xAI",
20782
+ product: "xAI-Bot",
20783
+ purpose: "crawl",
20784
+ // xAI documents its crawler at https://x.ai/bots/ as `xAI-Bot/<version>`.
20785
+ // Operators have also observed `Grok-Bot/...` in production logs. xAI
20786
+ // has been less consistent than OpenAI/Anthropic about publishing every
20787
+ // UA variant they ship, so the pattern is intentionally permissive
20788
+ // across the xAI/Grok family — better to over-match the operator than
20789
+ // leave real hits in the `unknown` bucket. A separate `purpose:
20790
+ // 'user-agent'` Grok rule can be added later if xAI ships a citation
20791
+ // user-fetcher UA (the way OpenAI ships ChatGPT-User alongside GPTBot).
20792
+ userAgentPatterns: [/xAI-Bot\//i, /Grok-Bot\//i, /GrokBot\//i]
20793
+ },
20603
20794
  // Classic search-engine crawlers. Not strictly "AI" by training origin,
20604
20795
  // but the same audience: machine traffic indexing the site for query
20605
20796
  // surfaces. Operators tracking AI visibility want this signal too —
@@ -20662,12 +20853,14 @@ var DEFAULT_AI_REFERRER_RULES = [
20662
20853
  { domain: AI_ENGINE_DOMAINS.claude, operator: "Anthropic", product: "Claude" },
20663
20854
  { domain: AI_ENGINE_DOMAINS.gemini, operator: "Google", product: "Gemini" },
20664
20855
  { domain: AI_ENGINE_DOMAINS.copilotMicrosoft, operator: "Microsoft", product: "Copilot" },
20856
+ { domain: AI_ENGINE_DOMAINS.grok, operator: "xAI", product: "Grok" },
20665
20857
  { domain: AI_ENGINE_DOMAINS.phind, operator: "Phind", product: "Phind" },
20666
20858
  { domain: AI_ENGINE_DOMAINS.you, operator: "You.com", product: "You.com" },
20667
20859
  { domain: AI_ENGINE_DOMAINS.metaAi, operator: "Meta", product: "Meta AI" }
20668
20860
  ];
20669
20861
 
20670
20862
  // ../integration-traffic/src/classifier.ts
20863
+ var USER_FETCH_PURPOSE = "user-agent";
20671
20864
  function normalizeHost(host) {
20672
20865
  return host.trim().toLowerCase().replace(/^www\./, "");
20673
20866
  }
@@ -20708,6 +20901,7 @@ function classifyCrawler(event) {
20708
20901
  const userAgent = event.userAgent?.trim();
20709
20902
  if (!userAgent) return null;
20710
20903
  for (const rule of DEFAULT_AI_CRAWLER_RULES) {
20904
+ if (rule.purpose === USER_FETCH_PURPOSE) continue;
20711
20905
  if (rule.userAgentPatterns.some((pattern) => pattern.test(userAgent))) {
20712
20906
  const verified = verifyIpForRule(event.remoteIp, rule.id);
20713
20907
  return {
@@ -20722,6 +20916,24 @@ function classifyCrawler(event) {
20722
20916
  }
20723
20917
  return null;
20724
20918
  }
20919
+ function classifyAiUserFetch(event) {
20920
+ const userAgent = event.userAgent?.trim();
20921
+ if (!userAgent) return null;
20922
+ for (const rule of DEFAULT_AI_CRAWLER_RULES) {
20923
+ if (rule.purpose !== USER_FETCH_PURPOSE) continue;
20924
+ if (rule.userAgentPatterns.some((pattern) => pattern.test(userAgent))) {
20925
+ const verified = verifyIpForRule(event.remoteIp, rule.id);
20926
+ return {
20927
+ botId: rule.id,
20928
+ operator: rule.operator,
20929
+ product: rule.product,
20930
+ verificationStatus: verified ? "verified" : "claimed_unverified",
20931
+ matchedUserAgent: userAgent
20932
+ };
20933
+ }
20934
+ }
20935
+ return null;
20936
+ }
20725
20937
  function classifyAiReferral(event) {
20726
20938
  const refererHost = hostFromUrl(event.referer);
20727
20939
  if (refererHost) {
@@ -20864,6 +21076,9 @@ function strongerReferralEvidence(current, next) {
20864
21076
  function sortCrawlerBuckets(a, b) {
20865
21077
  return a.tsHour.localeCompare(b.tsHour) || a.botId.localeCompare(b.botId) || a.pathNormalized.localeCompare(b.pathNormalized) || String(a.status).localeCompare(String(b.status));
20866
21078
  }
21079
+ function sortAiUserFetchBuckets(a, b) {
21080
+ return a.tsHour.localeCompare(b.tsHour) || a.botId.localeCompare(b.botId) || a.pathNormalized.localeCompare(b.pathNormalized) || String(a.status).localeCompare(String(b.status));
21081
+ }
20867
21082
  function sortReferralBuckets(a, b) {
20868
21083
  return a.tsHour.localeCompare(b.tsHour) || a.product.localeCompare(b.product) || a.sourceDomain.localeCompare(b.sourceDomain) || a.landingPathNormalized.localeCompare(b.landingPathNormalized) || String(a.status).localeCompare(String(b.status));
20869
21084
  }
@@ -20875,13 +21090,17 @@ function buildTrafficProbeReport(events, options = {}) {
20875
21090
  const configuredSessionWindowMs = options.aiReferralSessionWindowMs ?? DEFAULT_AI_REFERRAL_SESSION_WINDOW_MS;
20876
21091
  const aiReferralSessionWindowMs = configuredSessionWindowMs > 0 ? configuredSessionWindowMs : DEFAULT_AI_REFERRAL_SESSION_WINDOW_MS;
20877
21092
  const crawlerBuckets = /* @__PURE__ */ new Map();
21093
+ const aiUserFetchBuckets = /* @__PURE__ */ new Map();
20878
21094
  const aiReferralBuckets = /* @__PURE__ */ new Map();
20879
21095
  const aiReferralSessions = /* @__PURE__ */ new Map();
20880
21096
  const topBots = /* @__PURE__ */ new Map();
20881
21097
  const topCrawlerPaths = /* @__PURE__ */ new Map();
21098
+ const topAiUserFetchBots = /* @__PURE__ */ new Map();
21099
+ const topAiUserFetchPaths = /* @__PURE__ */ new Map();
20882
21100
  const topAiReferrers = /* @__PURE__ */ new Map();
20883
21101
  const topAiReferralLandingPaths = /* @__PURE__ */ new Map();
20884
21102
  let crawlerHits = 0;
21103
+ let aiUserFetchHits = 0;
20885
21104
  let aiReferralHits = 0;
20886
21105
  let unknownHits = 0;
20887
21106
  const samples = [];
@@ -20889,6 +21108,7 @@ function buildTrafficProbeReport(events, options = {}) {
20889
21108
  const tsHour = hourBucket(event.observedAt);
20890
21109
  const pathNormalized = normalizeTrafficPathPattern(event.path);
20891
21110
  const crawler = classifyCrawler(event);
21111
+ const aiUserFetch = classifyAiUserFetch(event);
20892
21112
  const aiReferral = classifyAiReferral(event);
20893
21113
  if (crawler) {
20894
21114
  crawlerHits += 1;
@@ -20921,6 +21141,37 @@ function buildTrafficProbeReport(events, options = {}) {
20921
21141
  else topBots.set(botKey, { fields: { botId: crawler.botId, operator: crawler.operator }, hits: 1 });
20922
21142
  incrementBucket(topCrawlerPaths, pathNormalized, { pathNormalized });
20923
21143
  }
21144
+ if (aiUserFetch) {
21145
+ aiUserFetchHits += 1;
21146
+ const key = [
21147
+ tsHour,
21148
+ aiUserFetch.botId,
21149
+ aiUserFetch.verificationStatus,
21150
+ pathNormalized,
21151
+ event.status ?? "null"
21152
+ ].join(" ");
21153
+ const existing = aiUserFetchBuckets.get(key);
21154
+ if (existing) {
21155
+ existing.hits += 1;
21156
+ } else {
21157
+ aiUserFetchBuckets.set(key, {
21158
+ tsHour,
21159
+ botId: aiUserFetch.botId,
21160
+ operator: aiUserFetch.operator,
21161
+ product: aiUserFetch.product,
21162
+ verificationStatus: aiUserFetch.verificationStatus,
21163
+ pathNormalized,
21164
+ status: event.status,
21165
+ hits: 1,
21166
+ sampledUserAgent: event.userAgent
21167
+ });
21168
+ }
21169
+ const botKey = `${aiUserFetch.botId} ${aiUserFetch.operator}`;
21170
+ const botEntry = topAiUserFetchBots.get(botKey);
21171
+ if (botEntry) botEntry.hits += 1;
21172
+ else topAiUserFetchBots.set(botKey, { fields: { botId: aiUserFetch.botId, operator: aiUserFetch.operator }, hits: 1 });
21173
+ incrementBucket(topAiUserFetchPaths, pathNormalized, { pathNormalized });
21174
+ }
20924
21175
  if (aiReferral) {
20925
21176
  aiReferralHits += 1;
20926
21177
  const landingPathNormalized = resolveAiReferralLandingPath(event, aiReferral.evidenceType);
@@ -20939,7 +21190,7 @@ function buildTrafficProbeReport(events, options = {}) {
20939
21190
  aiReferralSessions.set(key, existing ? strongerReferralEvidence(existing, session) : session);
20940
21191
  }
20941
21192
  }
20942
- if (!crawler && !aiReferral) unknownHits += 1;
21193
+ if (!crawler && !aiUserFetch && !aiReferral) unknownHits += 1;
20943
21194
  samples.push({
20944
21195
  eventId: event.eventId,
20945
21196
  observedAt: event.observedAt,
@@ -20950,6 +21201,7 @@ function buildTrafficProbeReport(events, options = {}) {
20950
21201
  userAgent: event.userAgent,
20951
21202
  referer: event.referer,
20952
21203
  crawler,
21204
+ aiUserFetch,
20953
21205
  aiReferral
20954
21206
  });
20955
21207
  if (samples.length > sampleLimit) samples.shift();
@@ -20985,14 +21237,18 @@ function buildTrafficProbeReport(events, options = {}) {
20985
21237
  totals: {
20986
21238
  normalizedEvents: events.length,
20987
21239
  crawlerHits,
21240
+ aiUserFetchHits,
20988
21241
  aiReferralSessions: aiReferralSessions.size,
20989
21242
  aiReferralHits,
20990
21243
  unknownHits
20991
21244
  },
20992
21245
  crawlerEventsHourly: [...crawlerBuckets.values()].sort(sortCrawlerBuckets),
21246
+ aiUserFetchEventsHourly: [...aiUserFetchBuckets.values()].sort(sortAiUserFetchBuckets),
20993
21247
  aiReferralEventsHourly: [...aiReferralBuckets.values()].sort(sortReferralBuckets),
20994
21248
  topBots: topEntries(topBots, 10),
20995
21249
  topCrawlerPaths: topEntries(topCrawlerPaths, 10),
21250
+ topAiUserFetchBots: topEntries(topAiUserFetchBots, 10),
21251
+ topAiUserFetchPaths: topEntries(topAiUserFetchPaths, 10),
20996
21252
  topAiReferrers: topEntries(topAiReferrers, 10),
20997
21253
  topAiReferralLandingPaths: topEntries(topAiReferralLandingPaths, 10),
20998
21254
  samples
@@ -21004,6 +21260,9 @@ function incrementBucket(map, key, fields) {
21004
21260
  else map.set(key, { fields, hits: 1 });
21005
21261
  }
21006
21262
 
21263
+ // ../integration-wordpress-traffic/src/client.ts
21264
+ import { randomUUID } from "crypto";
21265
+
21007
21266
  // ../integration-wordpress-traffic/src/normalize.ts
21008
21267
  function trimOrNull(value) {
21009
21268
  if (value === null || value === void 0) return null;
@@ -21034,7 +21293,7 @@ function normalizeWordpressTrafficEvent(event) {
21034
21293
  queryString,
21035
21294
  status: typeof event.status === "number" && Number.isFinite(event.status) ? event.status : null,
21036
21295
  userAgent: trimOrNull(event.user_agent),
21037
- remoteIp: trimOrNull(event.remote_ip_hash),
21296
+ remoteIp: trimOrNull(event.remote_ip),
21038
21297
  referer: trimOrNull(event.referer),
21039
21298
  latencyMs: null,
21040
21299
  requestSizeBytes: null,
@@ -21117,11 +21376,13 @@ async function listWordpressTrafficEvents(options) {
21117
21376
  if (options.until !== void 0 && options.until !== "") {
21118
21377
  url.searchParams.set("until", options.until);
21119
21378
  }
21379
+ url.searchParams.set("_cb", randomUUID());
21120
21380
  const response = await fetch(url, {
21121
21381
  method: "GET",
21122
21382
  headers: {
21123
21383
  Authorization: authHeader,
21124
- Accept: "application/json"
21384
+ Accept: "application/json",
21385
+ "Cache-Control": "no-cache"
21125
21386
  },
21126
21387
  signal: AbortSignal.timeout(timeoutMs)
21127
21388
  });
@@ -21434,6 +21695,13 @@ async function runBackfillTask(options) {
21434
21695
  lte2(crawlerEventsHourly.tsHour, windowEndIso)
21435
21696
  )
21436
21697
  ).run();
21698
+ tx.delete(aiUserFetchEventsHourly).where(
21699
+ and19(
21700
+ eq24(aiUserFetchEventsHourly.sourceId, sourceRow.id),
21701
+ gte3(aiUserFetchEventsHourly.tsHour, windowStartIso),
21702
+ lte2(aiUserFetchEventsHourly.tsHour, windowEndIso)
21703
+ )
21704
+ ).run();
21437
21705
  tx.delete(aiReferralEventsHourly).where(
21438
21706
  and19(
21439
21707
  eq24(aiReferralEventsHourly.sourceId, sourceRow.id),
@@ -21464,6 +21732,22 @@ async function runBackfillTask(options) {
21464
21732
  updatedAt: finishedAt
21465
21733
  }).run();
21466
21734
  }
21735
+ for (const bucket of report.aiUserFetchEventsHourly) {
21736
+ tx.insert(aiUserFetchEventsHourly).values({
21737
+ projectId: project.id,
21738
+ sourceId: sourceRow.id,
21739
+ tsHour: bucket.tsHour,
21740
+ botId: bucket.botId,
21741
+ operator: bucket.operator,
21742
+ verificationStatus: bucket.verificationStatus,
21743
+ pathNormalized: bucket.pathNormalized,
21744
+ status: bucket.status ?? 0,
21745
+ hits: bucket.hits,
21746
+ sampledUserAgent: bucket.sampledUserAgent,
21747
+ createdAt: finishedAt,
21748
+ updatedAt: finishedAt
21749
+ }).run();
21750
+ }
21467
21751
  for (const bucket of report.aiReferralEventsHourly) {
21468
21752
  tx.insert(aiReferralEventsHourly).values({
21469
21753
  projectId: project.id,
@@ -21482,7 +21766,7 @@ async function runBackfillTask(options) {
21482
21766
  }).run();
21483
21767
  }
21484
21768
  for (const sample of report.samples) {
21485
- const eventType = sample.crawler ? "crawler" : sample.aiReferral ? "ai_referral" : "unknown";
21769
+ const eventType = sample.crawler ? "crawler" : sample.aiUserFetch ? "ai_user_fetch" : sample.aiReferral ? "ai_referral" : "unknown";
21486
21770
  const refererHost = (() => {
21487
21771
  if (!sample.referer) return null;
21488
21772
  try {
@@ -21504,6 +21788,7 @@ async function runBackfillTask(options) {
21504
21788
  refererHost,
21505
21789
  classifierDetailsJson: {
21506
21790
  crawler: sample.crawler,
21791
+ aiUserFetch: sample.aiUserFetch,
21507
21792
  aiReferral: sample.aiReferral
21508
21793
  },
21509
21794
  createdAt: finishedAt
@@ -21813,6 +22098,7 @@ async function trafficRoutes(app, opts) {
21813
22098
  sourceId: sourceRow.id,
21814
22099
  pulledEvents: 0,
21815
22100
  crawlerHits: 0,
22101
+ aiUserFetchHits: 0,
21816
22102
  aiReferralHits: 0,
21817
22103
  durationMs: Date.now() - syncStartedAtMs,
21818
22104
  errorCode
@@ -21964,11 +22250,13 @@ async function trafficRoutes(app, opts) {
21964
22250
  allEvents = page.events;
21965
22251
  }
21966
22252
  let crawlerBucketRows = 0;
22253
+ let aiUserFetchBucketRows = 0;
21967
22254
  let aiReferralBucketRows = 0;
21968
22255
  let sampleRows = 0;
21969
22256
  let finishedAt = (/* @__PURE__ */ new Date()).toISOString();
21970
22257
  let pulledEventsCount = 0;
21971
22258
  let crawlerHitsCount = 0;
22259
+ let aiUserFetchHitsCount = 0;
21972
22260
  let aiReferralHitsCount = 0;
21973
22261
  let unknownHitsCount = 0;
21974
22262
  app.db.transaction((tx) => {
@@ -21990,6 +22278,7 @@ async function trafficRoutes(app, opts) {
21990
22278
  finishedAt = (/* @__PURE__ */ new Date()).toISOString();
21991
22279
  pulledEventsCount = report.totals.normalizedEvents;
21992
22280
  crawlerHitsCount = report.totals.crawlerHits;
22281
+ aiUserFetchHitsCount = report.totals.aiUserFetchHits;
21993
22282
  aiReferralHitsCount = report.totals.aiReferralHits;
21994
22283
  unknownHitsCount = report.totals.unknownHits;
21995
22284
  for (const bucket of report.crawlerEventsHourly) {
@@ -22025,6 +22314,39 @@ async function trafficRoutes(app, opts) {
22025
22314
  }).run();
22026
22315
  crawlerBucketRows += 1;
22027
22316
  }
22317
+ for (const bucket of report.aiUserFetchEventsHourly) {
22318
+ const status = bucket.status ?? 0;
22319
+ tx.insert(aiUserFetchEventsHourly).values({
22320
+ projectId: project.id,
22321
+ sourceId: sourceRow.id,
22322
+ tsHour: bucket.tsHour,
22323
+ botId: bucket.botId,
22324
+ operator: bucket.operator,
22325
+ verificationStatus: bucket.verificationStatus,
22326
+ pathNormalized: bucket.pathNormalized,
22327
+ status,
22328
+ hits: bucket.hits,
22329
+ sampledUserAgent: bucket.sampledUserAgent,
22330
+ createdAt: finishedAt,
22331
+ updatedAt: finishedAt
22332
+ }).onConflictDoUpdate({
22333
+ target: [
22334
+ aiUserFetchEventsHourly.projectId,
22335
+ aiUserFetchEventsHourly.sourceId,
22336
+ aiUserFetchEventsHourly.tsHour,
22337
+ aiUserFetchEventsHourly.botId,
22338
+ aiUserFetchEventsHourly.verificationStatus,
22339
+ aiUserFetchEventsHourly.pathNormalized,
22340
+ aiUserFetchEventsHourly.status
22341
+ ],
22342
+ set: {
22343
+ hits: sql10`${aiUserFetchEventsHourly.hits} + ${bucket.hits}`,
22344
+ sampledUserAgent: bucket.sampledUserAgent,
22345
+ updatedAt: finishedAt
22346
+ }
22347
+ }).run();
22348
+ aiUserFetchBucketRows += 1;
22349
+ }
22028
22350
  for (const bucket of report.aiReferralEventsHourly) {
22029
22351
  const status = bucket.status ?? 0;
22030
22352
  tx.insert(aiReferralEventsHourly).values({
@@ -22060,7 +22382,7 @@ async function trafficRoutes(app, opts) {
22060
22382
  aiReferralBucketRows += 1;
22061
22383
  }
22062
22384
  for (const sample of report.samples) {
22063
- const eventType = sample.crawler ? "crawler" : sample.aiReferral ? "ai_referral" : "unknown";
22385
+ const eventType = sample.crawler ? "crawler" : sample.aiUserFetch ? "ai_user_fetch" : sample.aiReferral ? "ai_referral" : "unknown";
22064
22386
  const refererHost = (() => {
22065
22387
  if (!sample.referer) return null;
22066
22388
  try {
@@ -22082,6 +22404,7 @@ async function trafficRoutes(app, opts) {
22082
22404
  refererHost,
22083
22405
  classifierDetailsJson: {
22084
22406
  crawler: sample.crawler,
22407
+ aiUserFetch: sample.aiUserFetch,
22085
22408
  aiReferral: sample.aiReferral
22086
22409
  },
22087
22410
  createdAt: finishedAt
@@ -22119,6 +22442,7 @@ async function trafficRoutes(app, opts) {
22119
22442
  sourceId: sourceRow.id,
22120
22443
  pulledEvents: pulledEventsCount,
22121
22444
  crawlerHits: crawlerHitsCount,
22445
+ aiUserFetchHits: aiUserFetchHitsCount,
22122
22446
  aiReferralHits: aiReferralHitsCount,
22123
22447
  durationMs: Date.now() - syncStartedAtMs
22124
22448
  });
@@ -22130,9 +22454,11 @@ async function trafficRoutes(app, opts) {
22130
22454
  syncedAt: finishedAt,
22131
22455
  pulledEvents: pulledEventsCount,
22132
22456
  crawlerHits: crawlerHitsCount,
22457
+ aiUserFetchHits: aiUserFetchHitsCount,
22133
22458
  aiReferralHits: aiReferralHitsCount,
22134
22459
  unknownHits: unknownHitsCount,
22135
22460
  crawlerBucketRows,
22461
+ aiUserFetchBucketRows,
22136
22462
  aiReferralBucketRows,
22137
22463
  sampleRows,
22138
22464
  windowStart: windowStart.toISOString(),
@@ -22311,6 +22637,12 @@ async function trafficRoutes(app, opts) {
22311
22637
  gte3(crawlerEventsHourly.tsHour, since)
22312
22638
  )
22313
22639
  ).get();
22640
+ const aiUserFetchTotals = app.db.select({ total: sql10`COALESCE(SUM(${aiUserFetchEventsHourly.hits}), 0)` }).from(aiUserFetchEventsHourly).where(
22641
+ and19(
22642
+ eq24(aiUserFetchEventsHourly.sourceId, row.id),
22643
+ gte3(aiUserFetchEventsHourly.tsHour, since)
22644
+ )
22645
+ ).get();
22314
22646
  const aiTotals = app.db.select({ total: sql10`COALESCE(SUM(${aiReferralEventsHourly.sessionsOrHits}), 0)` }).from(aiReferralEventsHourly).where(
22315
22647
  and19(
22316
22648
  eq24(aiReferralEventsHourly.sourceId, row.id),
@@ -22334,6 +22666,7 @@ async function trafficRoutes(app, opts) {
22334
22666
  ...rowToDto(row),
22335
22667
  totals24h: {
22336
22668
  crawlerHits: Number(crawlerTotals?.total ?? 0),
22669
+ aiUserFetchHits: Number(aiUserFetchTotals?.total ?? 0),
22337
22670
  aiReferralHits: Number(aiTotals?.total ?? 0),
22338
22671
  sampleCount: Number(sampleTotals?.total ?? 0)
22339
22672
  },
@@ -22393,10 +22726,12 @@ async function trafficRoutes(app, opts) {
22393
22726
  const kindParam = request.query?.kind;
22394
22727
  let kind = "all";
22395
22728
  if (kindParam !== void 0) {
22396
- if (kindParam === "all" || kindParam === TrafficEventKinds.crawler || kindParam === TrafficEventKinds["ai-referral"]) {
22729
+ if (kindParam === "all" || kindParam === TrafficEventKinds.crawler || kindParam === TrafficEventKinds["ai-user-fetch"] || kindParam === TrafficEventKinds["ai-referral"]) {
22397
22730
  kind = kindParam;
22398
22731
  } else {
22399
- throw validationError(`"kind" must be one of: all, ${TrafficEventKinds.crawler}, ${TrafficEventKinds["ai-referral"]}`);
22732
+ throw validationError(
22733
+ `"kind" must be one of: all, ${TrafficEventKinds.crawler}, ${TrafficEventKinds["ai-user-fetch"]}, ${TrafficEventKinds["ai-referral"]}`
22734
+ );
22400
22735
  }
22401
22736
  }
22402
22737
  const limitParam = request.query?.limit;
@@ -22410,6 +22745,7 @@ async function trafficRoutes(app, opts) {
22410
22745
  const untilIso = until.toISOString();
22411
22746
  const events = [];
22412
22747
  let crawlerTotal = 0;
22748
+ let aiUserFetchTotal = 0;
22413
22749
  let aiReferralTotal = 0;
22414
22750
  if (kind === "all" || kind === TrafficEventKinds.crawler) {
22415
22751
  const crawlerFilters = [
@@ -22436,6 +22772,31 @@ async function trafficRoutes(app, opts) {
22436
22772
  });
22437
22773
  }
22438
22774
  }
22775
+ if (kind === "all" || kind === TrafficEventKinds["ai-user-fetch"]) {
22776
+ const userFetchFilters = [
22777
+ eq24(aiUserFetchEventsHourly.projectId, project.id),
22778
+ gte3(aiUserFetchEventsHourly.tsHour, sinceIso),
22779
+ lte2(aiUserFetchEventsHourly.tsHour, untilIso)
22780
+ ];
22781
+ if (sourceIdParam) userFetchFilters.push(eq24(aiUserFetchEventsHourly.sourceId, sourceIdParam));
22782
+ const userFetchWhere = and19(...userFetchFilters);
22783
+ const total = app.db.select({ total: sql10`COALESCE(SUM(${aiUserFetchEventsHourly.hits}), 0)` }).from(aiUserFetchEventsHourly).where(userFetchWhere).get();
22784
+ aiUserFetchTotal = Number(total?.total ?? 0);
22785
+ const rows = app.db.select().from(aiUserFetchEventsHourly).where(userFetchWhere).orderBy(desc13(aiUserFetchEventsHourly.tsHour)).limit(limit).all();
22786
+ for (const r of rows) {
22787
+ events.push({
22788
+ kind: TrafficEventKinds["ai-user-fetch"],
22789
+ sourceId: r.sourceId,
22790
+ tsHour: r.tsHour,
22791
+ botId: r.botId,
22792
+ operator: r.operator,
22793
+ verificationStatus: r.verificationStatus,
22794
+ pathNormalized: r.pathNormalized,
22795
+ status: r.status,
22796
+ hits: r.hits
22797
+ });
22798
+ }
22799
+ }
22439
22800
  if (kind === "all" || kind === TrafficEventKinds["ai-referral"]) {
22440
22801
  const aiFilters = [
22441
22802
  eq24(aiReferralEventsHourly.projectId, project.id),
@@ -22469,6 +22830,7 @@ async function trafficRoutes(app, opts) {
22469
22830
  windowEnd: untilIso,
22470
22831
  totals: {
22471
22832
  crawlerHits: crawlerTotal,
22833
+ aiUserFetchHits: aiUserFetchTotal,
22472
22834
  aiReferralHits: aiReferralTotal
22473
22835
  },
22474
22836
  events: trimmed
@@ -23542,6 +23904,76 @@ var TRAFFIC_SOURCE_CHECKS = [
23542
23904
  scopesCheck2
23543
23905
  ];
23544
23906
 
23907
+ // ../api-routes/src/doctor/checks/wordpress-publish.ts
23908
+ var WORDPRESS_PUBLISH_CHECKS = [
23909
+ {
23910
+ id: "wordpress.publish.connection",
23911
+ category: CheckCategories.auth,
23912
+ scope: CheckScopes.project,
23913
+ title: "WordPress publishing connection",
23914
+ run: async (ctx) => {
23915
+ if (!ctx.project) {
23916
+ return {
23917
+ status: CheckStatuses.skipped,
23918
+ code: "wordpress.publish.no-project",
23919
+ summary: "Project context required.",
23920
+ remediation: null
23921
+ };
23922
+ }
23923
+ const store = ctx.wordpressConnectionStore;
23924
+ if (!store) {
23925
+ return {
23926
+ status: CheckStatuses.skipped,
23927
+ code: "wordpress.publish.store-unavailable",
23928
+ summary: "WordPress connection store is not configured for this deployment.",
23929
+ remediation: null
23930
+ };
23931
+ }
23932
+ const connection = store.getConnection(ctx.project.name);
23933
+ if (!connection) {
23934
+ return {
23935
+ status: CheckStatuses.skipped,
23936
+ code: "wordpress.publish.not-configured",
23937
+ summary: `No WordPress publishing connection configured for ${ctx.project.name}.`,
23938
+ remediation: `If this project publishes to WordPress, run \`canonry wordpress connect ${ctx.project.name} --url <url> --user <user>\`.`
23939
+ };
23940
+ }
23941
+ try {
23942
+ const status = await verifyWordpressConnection(connection);
23943
+ return {
23944
+ status: CheckStatuses.ok,
23945
+ code: "wordpress.publish.connected",
23946
+ summary: `WordPress publishing connection verified; wp/v2 REST API reachable at ${status.url}.`,
23947
+ remediation: null,
23948
+ details: {
23949
+ url: status.url,
23950
+ wordpressVersion: status.version,
23951
+ pageCount: status.pageCount
23952
+ }
23953
+ };
23954
+ } catch (err) {
23955
+ if (err instanceof WordpressApiError && err.code === "AUTH_INVALID") {
23956
+ return {
23957
+ status: CheckStatuses.fail,
23958
+ code: "wordpress.publish.unauthorized",
23959
+ summary: "WordPress rejected the stored application password.",
23960
+ remediation: `Regenerate the Application Password in wp-admin (Users \u2192 Profile \u2192 Application Passwords), then reconnect with \`canonry wordpress connect ${ctx.project.name} --url <url> --user <user>\`.`,
23961
+ details: { error: err.message }
23962
+ };
23963
+ }
23964
+ const message = err instanceof Error ? err.message : String(err);
23965
+ return {
23966
+ status: CheckStatuses.fail,
23967
+ code: "wordpress.publish.verification-failed",
23968
+ summary: "WordPress publishing connection could not be verified.",
23969
+ remediation: "Confirm the site URL is correct and the WordPress REST API is reachable.",
23970
+ details: { error: message }
23971
+ };
23972
+ }
23973
+ }
23974
+ }
23975
+ ];
23976
+
23545
23977
  // ../api-routes/src/doctor/registry.ts
23546
23978
  var ALL_CHECKS = [
23547
23979
  // Runtime-state checks run first so file-system gone errors surface
@@ -23549,6 +23981,7 @@ var ALL_CHECKS = [
23549
23981
  ...RUNTIME_STATE_CHECKS,
23550
23982
  ...GOOGLE_AUTH_CHECKS,
23551
23983
  ...BING_AUTH_CHECKS,
23984
+ ...WORDPRESS_PUBLISH_CHECKS,
23552
23985
  ...GA_AUTH_CHECKS,
23553
23986
  ...PROVIDERS_CHECKS,
23554
23987
  ...TRAFFIC_SOURCE_CHECKS,
@@ -23633,6 +24066,7 @@ async function doctorRoutes(app, opts) {
23633
24066
  project: null,
23634
24067
  googleConnectionStore: opts.googleConnectionStore,
23635
24068
  bingConnectionStore: opts.bingConnectionStore,
24069
+ wordpressConnectionStore: opts.wordpressConnectionStore,
23636
24070
  ga4CredentialStore: opts.ga4CredentialStore,
23637
24071
  getGoogleAuthConfig: opts.getGoogleAuthConfig,
23638
24072
  redirectUri,
@@ -23655,6 +24089,7 @@ async function doctorRoutes(app, opts) {
23655
24089
  },
23656
24090
  googleConnectionStore: opts.googleConnectionStore,
23657
24091
  bingConnectionStore: opts.bingConnectionStore,
24092
+ wordpressConnectionStore: opts.wordpressConnectionStore,
23658
24093
  ga4CredentialStore: opts.ga4CredentialStore,
23659
24094
  getGoogleAuthConfig: opts.getGoogleAuthConfig,
23660
24095
  redirectUri,
@@ -24283,6 +24718,7 @@ async function apiRoutes(app, opts) {
24283
24718
  await api.register(doctorRoutes, {
24284
24719
  googleConnectionStore: opts.googleConnectionStore,
24285
24720
  bingConnectionStore: opts.bingConnectionStore,
24721
+ wordpressConnectionStore: opts.wordpressConnectionStore,
24286
24722
  ga4CredentialStore: opts.ga4CredentialStore,
24287
24723
  getGoogleAuthConfig: opts.getGoogleAuthConfig,
24288
24724
  publicUrl: opts.publicUrl,
@@ -29445,7 +29881,7 @@ function readStoredGroundingSources(rawResponse) {
29445
29881
  return result;
29446
29882
  }
29447
29883
  async function backfillInsightsCommand(project, opts) {
29448
- const { IntelligenceService: IntelligenceService2 } = await import("./intelligence-service-2XL2M7QP.js");
29884
+ const { IntelligenceService: IntelligenceService2 } = await import("./intelligence-service-OCREQUCQ.js");
29449
29885
  const config = loadConfig();
29450
29886
  const db = createClient(config.database);
29451
29887
  migrate(db);
@@ -29860,42 +30296,74 @@ async function backfillTrafficClassificationCommand(opts) {
29860
30296
  providerResource: { type: "cloud_run_revision", labels: {} },
29861
30297
  providerLabels: {}
29862
30298
  };
29863
- const classified = classifyCrawler(probe);
30299
+ const userFetch = classifyAiUserFetch(probe);
30300
+ const classified = userFetch ?? classifyCrawler(probe);
29864
30301
  if (!classified) continue;
29865
30302
  result.reclassified++;
29866
30303
  result.byBot[classified.botId] = (result.byBot[classified.botId] ?? 0) + 1;
29867
30304
  if (isDryRun) continue;
29868
- db.update(rawEventSamples).set({ eventType: TrafficEventKinds.crawler }).where(eq35(rawEventSamples.id, snap.id)).run();
30305
+ db.update(rawEventSamples).set({ eventType: userFetch ? TrafficEventKinds["ai-user-fetch"] : TrafficEventKinds.crawler }).where(eq35(rawEventSamples.id, snap.id)).run();
29869
30306
  const tsHour = new Date(snap.ts);
29870
30307
  tsHour.setUTCMinutes(0, 0, 0);
29871
- db.insert(crawlerEventsHourly).values({
29872
- projectId: snap.projectId,
29873
- sourceId: snap.sourceId,
29874
- tsHour: tsHour.toISOString(),
29875
- botId: classified.botId,
29876
- operator: classified.operator,
29877
- verificationStatus: classified.verificationStatus,
29878
- pathNormalized: snap.pathNormalized,
29879
- status: snap.status ?? 200,
29880
- hits: 1,
29881
- sampledUserAgent: snap.userAgent,
29882
- createdAt: now,
29883
- updatedAt: now
29884
- }).onConflictDoUpdate({
29885
- target: [
29886
- crawlerEventsHourly.projectId,
29887
- crawlerEventsHourly.sourceId,
29888
- crawlerEventsHourly.tsHour,
29889
- crawlerEventsHourly.botId,
29890
- crawlerEventsHourly.verificationStatus,
29891
- crawlerEventsHourly.pathNormalized,
29892
- crawlerEventsHourly.status
29893
- ],
29894
- set: {
29895
- hits: sql15`${crawlerEventsHourly.hits} + 1`,
30308
+ if (userFetch) {
30309
+ db.insert(aiUserFetchEventsHourly).values({
30310
+ projectId: snap.projectId,
30311
+ sourceId: snap.sourceId,
30312
+ tsHour: tsHour.toISOString(),
30313
+ botId: userFetch.botId,
30314
+ operator: userFetch.operator,
30315
+ verificationStatus: userFetch.verificationStatus,
30316
+ pathNormalized: snap.pathNormalized,
30317
+ status: snap.status ?? 200,
30318
+ hits: 1,
30319
+ sampledUserAgent: snap.userAgent,
30320
+ createdAt: now,
29896
30321
  updatedAt: now
29897
- }
29898
- }).run();
30322
+ }).onConflictDoUpdate({
30323
+ target: [
30324
+ aiUserFetchEventsHourly.projectId,
30325
+ aiUserFetchEventsHourly.sourceId,
30326
+ aiUserFetchEventsHourly.tsHour,
30327
+ aiUserFetchEventsHourly.botId,
30328
+ aiUserFetchEventsHourly.verificationStatus,
30329
+ aiUserFetchEventsHourly.pathNormalized,
30330
+ aiUserFetchEventsHourly.status
30331
+ ],
30332
+ set: {
30333
+ hits: sql15`${aiUserFetchEventsHourly.hits} + 1`,
30334
+ updatedAt: now
30335
+ }
30336
+ }).run();
30337
+ } else {
30338
+ db.insert(crawlerEventsHourly).values({
30339
+ projectId: snap.projectId,
30340
+ sourceId: snap.sourceId,
30341
+ tsHour: tsHour.toISOString(),
30342
+ botId: classified.botId,
30343
+ operator: classified.operator,
30344
+ verificationStatus: classified.verificationStatus,
30345
+ pathNormalized: snap.pathNormalized,
30346
+ status: snap.status ?? 200,
30347
+ hits: 1,
30348
+ sampledUserAgent: snap.userAgent,
30349
+ createdAt: now,
30350
+ updatedAt: now
30351
+ }).onConflictDoUpdate({
30352
+ target: [
30353
+ crawlerEventsHourly.projectId,
30354
+ crawlerEventsHourly.sourceId,
30355
+ crawlerEventsHourly.tsHour,
30356
+ crawlerEventsHourly.botId,
30357
+ crawlerEventsHourly.verificationStatus,
30358
+ crawlerEventsHourly.pathNormalized,
30359
+ crawlerEventsHourly.status
30360
+ ],
30361
+ set: {
30362
+ hits: sql15`${crawlerEventsHourly.hits} + 1`,
30363
+ updatedAt: now
30364
+ }
30365
+ }).run();
30366
+ }
29899
30367
  }
29900
30368
  if (!isDryRun) {
29901
30369
  const afterRow = db.select({ n: sql15`count(*)` }).from(rawEventSamples).where(and28(