@ainyc/canonry 4.51.4 → 4.53.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. package/assets/assets/{BacklinksPage-9TlM08Wf.js → BacklinksPage-DELb5ok3.js} +1 -1
  2. package/assets/assets/ProjectPage-CM_uQa2L.js +6 -0
  3. package/assets/assets/{RunRow-D7qdWWRl.js → RunRow-aqJEr7XJ.js} +1 -1
  4. package/assets/assets/{RunsPage-CvewepfU.js → RunsPage-Dhuj1w72.js} +1 -1
  5. package/assets/assets/{SettingsPage-C7BvAhiB.js → SettingsPage-B2_vxr4y.js} +1 -1
  6. package/assets/assets/{TrafficPage-DC3NhFOh.js → TrafficPage-BKaiZRIH.js} +1 -1
  7. package/assets/assets/TrafficSourceDetailPage-DXIQ4g9S.js +1 -0
  8. package/assets/assets/{arrow-left-Agb02DMK.js → arrow-left-CYjzP3M3.js} +1 -1
  9. package/assets/assets/{index-DTCZ93Ne.js → index-BStwmAg6.js} +55 -55
  10. package/assets/assets/{index-DeGyEwik.css → index-Bm3JQsW0.css} +1 -1
  11. package/assets/assets/{server-traffic-C-0Ndjpw.js → server-traffic-D_1gSi-b.js} +1 -1
  12. package/assets/assets/{trash-2-lkrXVRRm.js → trash-2-8JiADnUJ.js} +1 -1
  13. package/assets/index.html +2 -2
  14. package/dist/{chunk-HMZKIOLG.js → chunk-J7MX3YOH.js} +1 -1
  15. package/dist/{chunk-QZ5XSM6C.js → chunk-JHAHNKSN.js} +103 -1
  16. package/dist/{chunk-WBO5S3IX.js → chunk-KVE7RLBI.js} +452 -63
  17. package/dist/{chunk-FYGBW3SM.js → chunk-VZPDBHBW.js} +29 -1
  18. package/dist/cli.js +40 -27
  19. package/dist/index.js +4 -4
  20. package/dist/{intelligence-service-2XL2M7QP.js → intelligence-service-OCREQUCQ.js} +2 -2
  21. package/dist/mcp.js +2 -2
  22. package/package.json +10 -10
  23. package/assets/assets/ProjectPage-CD591qDz.js +0 -6
  24. package/assets/assets/TrafficSourceDetailPage-BvtTA6rs.js +0 -1
@@ -6,7 +6,7 @@ import {
6
6
  loadConfig,
7
7
  loadConfigRaw,
8
8
  saveConfigPatch
9
- } from "./chunk-HMZKIOLG.js";
9
+ } from "./chunk-J7MX3YOH.js";
10
10
  import {
11
11
  DEFAULT_RUN_HISTORY_LIMIT,
12
12
  IntelligenceService,
@@ -14,6 +14,7 @@ import {
14
14
  agentMemory,
15
15
  agentSessions,
16
16
  aiReferralEventsHourly,
17
+ aiUserFetchEventsHourly,
17
18
  apiKeys,
18
19
  auditLog,
19
20
  backlinkDomains,
@@ -83,7 +84,7 @@ import {
83
84
  smoothedRunDelta,
84
85
  trafficSources,
85
86
  usageCounters
86
- } from "./chunk-QZ5XSM6C.js";
87
+ } from "./chunk-JHAHNKSN.js";
87
88
  import {
88
89
  AGENT_MEMORY_VALUE_MAX_BYTES,
89
90
  AGENT_PROVIDER_IDS,
@@ -284,7 +285,7 @@ import {
284
285
  wordpressSchemaDeployResultDtoSchema,
285
286
  wordpressSchemaStatusResultDtoSchema,
286
287
  wordpressStatusDtoSchema
287
- } from "./chunk-FYGBW3SM.js";
288
+ } from "./chunk-VZPDBHBW.js";
288
289
 
289
290
  // src/telemetry.ts
290
291
  import crypto from "crypto";
@@ -5455,11 +5456,14 @@ function renderServerActivity(report, audience) {
5455
5456
  const crawlerTrustSummary = `${formatNumber(sa.verifiedCrawlerHits.current)} verified \xB7 ${formatNumber(sa.unverifiedCrawlerHits.current)} unverified`;
5456
5457
  const crawlerDelta = formatDelta(crawlerRequests, "requests");
5457
5458
  const crawlerSubtitle = crawlerDelta ? `${escapeHtml(crawlerTrustSummary)} \xB7 ${crawlerDelta}` : escapeHtml(crawlerTrustSummary);
5458
- const clientOperators = sa.byOperator.filter((o) => o.verifiedHits > 0 || o.unverifiedHits > 0 || o.referralArrivals > 0).slice(0, 5);
5459
+ const userFetchDelta = formatDelta(sa.aiUserFetchHits, "requests");
5460
+ const userFetchSubtitle = userFetchDelta || escapeHtml("ChatGPT-User, Perplexity-User, MistralAI-User");
5461
+ const clientOperators = sa.byOperator.filter((o) => o.verifiedHits > 0 || o.unverifiedHits > 0 || o.userFetchHits > 0 || o.referralArrivals > 0).slice(0, 5);
5459
5462
  const clientOperatorRows = clientOperators.map((o) => `
5460
5463
  <tr>
5461
5464
  <td>${escapeHtml(o.operator)}</td>
5462
5465
  <td class="numeric">${formatNumber(o.verifiedHits + o.unverifiedHits)}</td>
5466
+ <td class="numeric">${formatNumber(o.userFetchHits)}</td>
5463
5467
  <td class="numeric">${formatNumber(o.referralArrivals)}</td>
5464
5468
  </tr>`).join("");
5465
5469
  return section(
@@ -5470,6 +5474,11 @@ function renderServerActivity(report, audience) {
5470
5474
  <div class="value">${formatNumber(crawlerRequests.current)}</div>
5471
5475
  <div class="subtitle">${crawlerSubtitle}</div>
5472
5476
  </div>
5477
+ <div class="metric">
5478
+ <div class="label">AI user-fetch requests</div>
5479
+ <div class="value">${formatNumber(sa.aiUserFetchHits.current)}</div>
5480
+ <div class="subtitle">${userFetchSubtitle}</div>
5481
+ </div>
5473
5482
  <div class="metric">
5474
5483
  <div class="label">AI referral sessions</div>
5475
5484
  <div class="value">${formatNumber(sa.referralArrivals.current)}</div>
@@ -5478,10 +5487,10 @@ function renderServerActivity(report, audience) {
5478
5487
  </div>
5479
5488
  ${clientOperatorRows ? `<div class="chart-card"><h3>By AI tool</h3>
5480
5489
  <table class="report-table">
5481
- <thead><tr><th>AI tool</th><th class="numeric">Bot requests (7d)</th><th class="numeric">Referral sessions</th></tr></thead>
5490
+ <thead><tr><th>AI tool</th><th class="numeric">Bot requests (7d)</th><th class="numeric">User fetches (7d)</th><th class="numeric">Referral sessions</th></tr></thead>
5482
5491
  <tbody>${clientOperatorRows}</tbody>
5483
5492
  </table>
5484
- <p class="meta">Verified requests are reverse-DNS confirmed. Unverified requests are user-agent claims shown separately in agency diagnostics.</p>
5493
+ <p class="meta">Bot requests are bulk crawl (GPTBot, PerplexityBot, \u2026). User fetches are on-demand reads triggered by real users inside an AI surface (ChatGPT-User, Perplexity-User, \u2026). Verified requests are reverse-DNS confirmed; unverified requests are UA claims shown separately in agency diagnostics.</p>
5485
5494
  </div>` : ""}`
5486
5495
  );
5487
5496
  }
@@ -5493,6 +5502,7 @@ function renderServerActivity(report, audience) {
5493
5502
  <td>${escapeHtml(o.operator)}</td>
5494
5503
  <td class="numeric">${formatNumber(o.verifiedHits)}</td>
5495
5504
  <td class="numeric meta">${formatNumber(o.unverifiedHits)}</td>
5505
+ <td class="numeric">${formatNumber(o.userFetchHits)}</td>
5496
5506
  <td class="numeric">${formatNumber(o.referralArrivals)}</td>
5497
5507
  <td class="numeric ${toneClass}">${deltaText}</td>
5498
5508
  </tr>`;
@@ -5533,6 +5543,11 @@ function renderServerActivity(report, audience) {
5533
5543
  <div class="value">${formatNumber(sa.unverifiedCrawlerHits.current)}</div>
5534
5544
  <div class="subtitle">${formatDelta(sa.unverifiedCrawlerHits, "hits")}</div>
5535
5545
  </div>
5546
+ <div class="metric">
5547
+ <div class="label">AI user-fetch hits (7d)</div>
5548
+ <div class="value">${formatNumber(sa.aiUserFetchHits.current)}</div>
5549
+ <div class="subtitle">${formatDelta(sa.aiUserFetchHits, "hits")}</div>
5550
+ </div>
5536
5551
  <div class="metric">
5537
5552
  <div class="label">AI-referral sessions (7d)</div>
5538
5553
  <div class="value">${formatNumber(sa.referralArrivals.current)}</div>
@@ -5541,9 +5556,9 @@ function renderServerActivity(report, audience) {
5541
5556
  </div>
5542
5557
  ${trendChart}
5543
5558
  ${operatorRows ? `<div class="chart-card"><h3>Per AI operator</h3>
5544
- <p class="meta">Verified means rDNS-confirmed. Unverified bots claim the user-agent but couldn't be verified \u2014 could be the real bot or an imitator.</p>
5559
+ <p class="meta">Verified means rDNS-confirmed. Unverified bots claim the user-agent but couldn't be verified \u2014 could be the real bot or an imitator. User fetches are on-demand reads from an AI surface on behalf of a real user (ChatGPT-User, Perplexity-User, \u2026) \u2014 disjoint from bulk crawl.</p>
5545
5560
  <table class="report-table">
5546
- <thead><tr><th>Operator</th><th class="numeric">Verified hits</th><th class="numeric">Unverified</th><th class="numeric">Referral sessions</th><th class="numeric">7d delta</th></tr></thead>
5561
+ <thead><tr><th>Operator</th><th class="numeric">Verified hits</th><th class="numeric">Unverified</th><th class="numeric">User fetches</th><th class="numeric">Referral sessions</th><th class="numeric">7d delta</th></tr></thead>
5547
5562
  <tbody>${operatorRows}</tbody>
5548
5563
  </table>
5549
5564
  </div>` : ""}
@@ -6385,10 +6400,21 @@ function buildServerActivity(db, projectId) {
6385
6400
  )
6386
6401
  ).get()?.total ?? 0
6387
6402
  );
6403
+ const sumUserFetches = (windowStartIso, windowEndIso, exclusiveEnd = false) => Number(
6404
+ db.select({ total: sql5`COALESCE(SUM(${aiUserFetchEventsHourly.hits}), 0)` }).from(aiUserFetchEventsHourly).where(
6405
+ and9(
6406
+ eq14(aiUserFetchEventsHourly.projectId, projectId),
6407
+ gte2(aiUserFetchEventsHourly.tsHour, windowStartIso),
6408
+ exclusiveEnd ? lt(aiUserFetchEventsHourly.tsHour, windowEndIso) : lte(aiUserFetchEventsHourly.tsHour, windowEndIso)
6409
+ )
6410
+ ).get()?.total ?? 0
6411
+ );
6388
6412
  const verifiedCurrent = sumVerifiedCrawlers(headlineStart, headlineEnd);
6389
6413
  const verifiedPrior = sumVerifiedCrawlers(priorStart, headlineStart, true);
6390
6414
  const unverifiedCurrent = sumUnverifiedCrawlers(headlineStart, headlineEnd);
6391
6415
  const unverifiedPrior = sumUnverifiedCrawlers(priorStart, headlineStart, true);
6416
+ const userFetchCurrent = sumUserFetches(headlineStart, headlineEnd);
6417
+ const userFetchPrior = sumUserFetches(priorStart, headlineStart, true);
6392
6418
  const referralCurrent = sumReferrals(headlineStart, headlineEnd);
6393
6419
  const referralPrior = sumReferrals(priorStart, headlineStart, true);
6394
6420
  const crawlerByOperatorRows = db.select({
@@ -6424,11 +6450,21 @@ function buildServerActivity(db, projectId) {
6424
6450
  lte(aiReferralEventsHourly.tsHour, headlineEnd)
6425
6451
  )
6426
6452
  ).groupBy(aiReferralEventsHourly.operator).all();
6453
+ const userFetchByOperatorRows = db.select({
6454
+ operator: aiUserFetchEventsHourly.operator,
6455
+ hits: sql5`COALESCE(SUM(${aiUserFetchEventsHourly.hits}), 0)`
6456
+ }).from(aiUserFetchEventsHourly).where(
6457
+ and9(
6458
+ eq14(aiUserFetchEventsHourly.projectId, projectId),
6459
+ gte2(aiUserFetchEventsHourly.tsHour, headlineStart),
6460
+ lte(aiUserFetchEventsHourly.tsHour, headlineEnd)
6461
+ )
6462
+ ).groupBy(aiUserFetchEventsHourly.operator).all();
6427
6463
  const operatorAgg = /* @__PURE__ */ new Map();
6428
6464
  const ensureOp = (op) => {
6429
6465
  let entry = operatorAgg.get(op);
6430
6466
  if (!entry) {
6431
- entry = { verified: 0, unverified: 0, referrals: 0, prior: 0 };
6467
+ entry = { verified: 0, unverified: 0, userFetch: 0, referrals: 0, prior: 0 };
6432
6468
  operatorAgg.set(op, entry);
6433
6469
  }
6434
6470
  return entry;
@@ -6441,6 +6477,9 @@ function buildServerActivity(db, projectId) {
6441
6477
  for (const r of crawlerByOperatorPriorRows) {
6442
6478
  ensureOp(r.operator).prior += Number(r.hits);
6443
6479
  }
6480
+ for (const r of userFetchByOperatorRows) {
6481
+ ensureOp(r.operator).userFetch += Number(r.hits);
6482
+ }
6444
6483
  for (const r of referralByOperatorRows) {
6445
6484
  ensureOp(r.operator).referrals += Number(r.hits);
6446
6485
  }
@@ -6448,10 +6487,11 @@ function buildServerActivity(db, projectId) {
6448
6487
  operator,
6449
6488
  verifiedHits: v.verified,
6450
6489
  unverifiedHits: v.unverified,
6490
+ userFetchHits: v.userFetch,
6451
6491
  referralArrivals: v.referrals,
6452
6492
  deltaPct: deltaPercent(v.verified, v.prior)
6453
6493
  })).sort(
6454
- (a, b) => b.verifiedHits - a.verifiedHits || b.unverifiedHits - a.unverifiedHits || b.referralArrivals - a.referralArrivals
6494
+ (a, b) => b.verifiedHits - a.verifiedHits || b.userFetchHits - a.userFetchHits || b.unverifiedHits - a.unverifiedHits || b.referralArrivals - a.referralArrivals
6455
6495
  );
6456
6496
  const topPathsRows = db.select({
6457
6497
  path: crawlerEventsHourly.pathNormalized,
@@ -6526,14 +6566,30 @@ function buildServerActivity(db, projectId) {
6526
6566
  lte(aiReferralEventsHourly.tsHour, headlineEnd)
6527
6567
  )
6528
6568
  ).groupBy(sql5`SUBSTR(${aiReferralEventsHourly.tsHour}, 1, 10)`).all();
6569
+ const userFetchTrendRows = db.select({
6570
+ date: sql5`SUBSTR(${aiUserFetchEventsHourly.tsHour}, 1, 10)`,
6571
+ hits: sql5`COALESCE(SUM(${aiUserFetchEventsHourly.hits}), 0)`
6572
+ }).from(aiUserFetchEventsHourly).where(
6573
+ and9(
6574
+ eq14(aiUserFetchEventsHourly.projectId, projectId),
6575
+ gte2(aiUserFetchEventsHourly.tsHour, trendStart),
6576
+ lte(aiUserFetchEventsHourly.tsHour, headlineEnd)
6577
+ )
6578
+ ).groupBy(sql5`SUBSTR(${aiUserFetchEventsHourly.tsHour}, 1, 10)`).all();
6579
+ const emptyTrendEntry = () => ({ verifiedCrawlerHits: 0, userFetchHits: 0, referralArrivals: 0 });
6529
6580
  const dailyTrendMap = /* @__PURE__ */ new Map();
6530
6581
  for (const r of crawlerTrendRows) {
6531
- const e = dailyTrendMap.get(r.date) ?? { verifiedCrawlerHits: 0, referralArrivals: 0 };
6582
+ const e = dailyTrendMap.get(r.date) ?? emptyTrendEntry();
6532
6583
  e.verifiedCrawlerHits += Number(r.hits);
6533
6584
  dailyTrendMap.set(r.date, e);
6534
6585
  }
6586
+ for (const r of userFetchTrendRows) {
6587
+ const e = dailyTrendMap.get(r.date) ?? emptyTrendEntry();
6588
+ e.userFetchHits += Number(r.hits);
6589
+ dailyTrendMap.set(r.date, e);
6590
+ }
6535
6591
  for (const r of referralTrendRows) {
6536
- const e = dailyTrendMap.get(r.date) ?? { verifiedCrawlerHits: 0, referralArrivals: 0 };
6592
+ const e = dailyTrendMap.get(r.date) ?? emptyTrendEntry();
6537
6593
  e.referralArrivals += Number(r.hits);
6538
6594
  dailyTrendMap.set(r.date, e);
6539
6595
  }
@@ -6541,7 +6597,7 @@ function buildServerActivity(db, projectId) {
6541
6597
  return {
6542
6598
  windowStart: headlineStart,
6543
6599
  windowEnd: headlineEnd,
6544
- hasData: verifiedCurrent + unverifiedCurrent + referralCurrent + verifiedPrior + unverifiedPrior + referralPrior > 0 || byOperator.length > 0 || topCrawledPaths.length > 0 || referralProducts.length > 0,
6600
+ hasData: verifiedCurrent + unverifiedCurrent + userFetchCurrent + referralCurrent + verifiedPrior + unverifiedPrior + userFetchPrior + referralPrior > 0 || byOperator.length > 0 || topCrawledPaths.length > 0 || referralProducts.length > 0,
6545
6601
  verifiedCrawlerHits: {
6546
6602
  current: verifiedCurrent,
6547
6603
  prior: verifiedPrior,
@@ -6552,6 +6608,11 @@ function buildServerActivity(db, projectId) {
6552
6608
  prior: unverifiedPrior,
6553
6609
  deltaPct: deltaPercent(unverifiedCurrent, unverifiedPrior)
6554
6610
  },
6611
+ aiUserFetchHits: {
6612
+ current: userFetchCurrent,
6613
+ prior: userFetchPrior,
6614
+ deltaPct: deltaPercent(userFetchCurrent, userFetchPrior)
6615
+ },
6555
6616
  referralArrivals: {
6556
6617
  current: referralCurrent,
6557
6618
  prior: referralPrior,
@@ -11458,14 +11519,14 @@ var routeCatalog = [
11458
11519
  {
11459
11520
  method: "get",
11460
11521
  path: "/api/v1/projects/{name}/traffic/events",
11461
- summary: "List rolled-up crawler hits and AI-referral sessions within a window",
11462
- description: "Returns hourly rollup rows from `crawler_events_hourly` and `ai_referral_events_hourly`. Defaults to the last 24h. Totals reflect the full window; the `events` array is capped by `limit` (default 500, max 5000).",
11522
+ summary: "List rolled-up crawler hits, AI user-fetch hits, and AI-referral sessions within a window",
11523
+ description: "Returns hourly rollup rows from `crawler_events_hourly`, `ai_user_fetch_events_hourly`, and `ai_referral_events_hourly`. Defaults to the last 24h. Totals reflect the full window; the `events` array is capped by `limit` (default 500, max 5000).",
11463
11524
  tags: ["traffic"],
11464
11525
  parameters: [
11465
11526
  nameParameter,
11466
11527
  { name: "since", in: "query", description: "ISO-8601 window start (defaults to 24h ago).", schema: stringSchema },
11467
11528
  { name: "until", in: "query", description: "ISO-8601 window end (defaults to now).", schema: stringSchema },
11468
- { name: "kind", in: "query", description: 'Filter to "crawler", "ai-referral", or "all" (default).', schema: stringSchema },
11529
+ { name: "kind", in: "query", description: 'Filter to "crawler", "ai-user-fetch", "ai-referral", or "all" (default).', schema: stringSchema },
11469
11530
  { name: "limit", in: "query", description: "Max rows per kind in the events array (default 500, max 5000).", schema: stringSchema },
11470
11531
  { name: "sourceId", in: "query", description: "Restrict to a single traffic source.", schema: stringSchema }
11471
11532
  ],
@@ -19168,6 +19229,74 @@ var chatgpt_user_default = {
19168
19229
  ]
19169
19230
  };
19170
19231
 
19232
+ // ../integration-traffic/src/ip-ranges/google-user-triggered-agents.json
19233
+ var google_user_triggered_agents_default = {
19234
+ _source: "https://developers.google.com/static/crawling/ipranges/user-triggered-agents.json",
19235
+ creationTime: "2026-05-19T14:46:15.000000",
19236
+ prefixes: [
19237
+ {
19238
+ ipv6Prefix: "2001:4860:c::/124"
19239
+ },
19240
+ {
19241
+ ipv6Prefix: "2001:4860:c::10/124"
19242
+ },
19243
+ {
19244
+ ipv6Prefix: "2001:4860:c::20/124"
19245
+ },
19246
+ {
19247
+ ipv6Prefix: "2001:4860:c::30/124"
19248
+ },
19249
+ {
19250
+ ipv6Prefix: "2001:4860:c::40/124"
19251
+ },
19252
+ {
19253
+ ipv6Prefix: "2001:4860:c::50/124"
19254
+ },
19255
+ {
19256
+ ipv6Prefix: "2001:4860:c::60/124"
19257
+ },
19258
+ {
19259
+ ipv6Prefix: "2001:4860:c::70/124"
19260
+ },
19261
+ {
19262
+ ipv4Prefix: "136.121.16.0/24"
19263
+ },
19264
+ {
19265
+ ipv4Prefix: "136.121.24.0/21"
19266
+ },
19267
+ {
19268
+ ipv4Prefix: "136.121.40.0/21"
19269
+ },
19270
+ {
19271
+ ipv4Prefix: "136.122.0.0/16"
19272
+ },
19273
+ {
19274
+ ipv4Prefix: "74.125.232.0/28"
19275
+ },
19276
+ {
19277
+ ipv4Prefix: "74.125.232.112/28"
19278
+ },
19279
+ {
19280
+ ipv4Prefix: "74.125.232.16/28"
19281
+ },
19282
+ {
19283
+ ipv4Prefix: "74.125.232.32/28"
19284
+ },
19285
+ {
19286
+ ipv4Prefix: "74.125.232.48/28"
19287
+ },
19288
+ {
19289
+ ipv4Prefix: "74.125.232.64/28"
19290
+ },
19291
+ {
19292
+ ipv4Prefix: "74.125.232.80/28"
19293
+ },
19294
+ {
19295
+ ipv4Prefix: "74.125.232.96/28"
19296
+ }
19297
+ ]
19298
+ };
19299
+
19171
19300
  // ../integration-traffic/src/ip-ranges/googlebot.json
19172
19301
  var googlebot_default = {
19173
19302
  _source: "https://developers.google.com/static/search/apis/ipranges/googlebot.json",
@@ -20366,6 +20495,12 @@ var RULE_ID_TO_RANGES = {
20366
20495
  // (also covers Copilot grounding — Microsoft routes Copilot's
20367
20496
  // web fetches through bingbot infrastructure)
20368
20497
  "bingbot": bingbot_default,
20498
+ // Google-Agent — Google's agentic user-triggered fetcher (Project
20499
+ // Mariner et al.). Verified against Google's user-triggered-agents
20500
+ // list, which covers every Google user-triggered fetcher collectively
20501
+ // (Google publishes no per-fetcher split).
20502
+ // src: https://developers.google.com/static/crawling/ipranges/user-triggered-agents.json
20503
+ "google-agent": google_user_triggered_agents_default,
20369
20504
  // Perplexity — split between crawler and user-on-behalf fetcher,
20370
20505
  // same shape as OpenAI's split.
20371
20506
  // src: https://www.perplexity.ai/perplexitybot.json
@@ -20377,10 +20512,12 @@ var RULE_ID_TO_RANGES = {
20377
20512
  // PBC at ARIN (the authoritative allocation record). Maintained by
20378
20513
  // hand; refresh by re-querying the ARIN entity below. The crawler
20379
20514
  // block is AWS-ANTHROPIC 216.73.216.0/22 — empirical Cloud Run
20380
- // logs show all real ClaudeBot traffic comes from there. Same raw
20381
- // set is shared across every Claude-* UA the classifier emits.
20515
+ // logs show all real ClaudeBot traffic comes from there. The same
20516
+ // raw set is shared across every Claude-* UA the classifier emits:
20517
+ // both the training crawler and the per-user fetcher map here.
20382
20518
  // src: https://rdap.arin.net/registry/entity/AP-2440
20383
- "anthropic-claudebot": anthropic_default
20519
+ "anthropic-claudebot": anthropic_default,
20520
+ "claude-user": anthropic_default
20384
20521
  };
20385
20522
  var CACHE = (() => {
20386
20523
  const cache = /* @__PURE__ */ new Map();
@@ -20492,9 +20629,11 @@ var DEFAULT_AI_CRAWLER_RULES = [
20492
20629
  // Anthropic ships several Claude-* crawlers (ClaudeBot for training,
20493
20630
  // Claude-Web for chat fetches, Claude-SearchBot for search). The
20494
20631
  // `Claude-` prefix + `Bot/` suffix is the stable shape — pattern is
20495
- // permissive enough to catch new Claude-* variants as Anthropic
20632
+ // permissive enough to catch new Claude-*Bot variants as Anthropic
20496
20633
  // adds them, without matching unrelated UAs that happen to mention
20497
- // "claude".
20634
+ // "claude". The per-user fetcher `Claude-User` has no `Bot/` suffix
20635
+ // and is intentionally NOT matched here — it routes through the
20636
+ // separate `claude-user` rule below (purpose: 'user-agent').
20498
20637
  userAgentPatterns: [
20499
20638
  /ClaudeBot\//i,
20500
20639
  /Claude-Web\//i,
@@ -20503,6 +20642,21 @@ var DEFAULT_AI_CRAWLER_RULES = [
20503
20642
  /anthropic-ai/i
20504
20643
  ]
20505
20644
  },
20645
+ {
20646
+ // Anthropic's on-behalf-of-user fetcher: Claude fetches a URL when
20647
+ // a person asks about it mid-conversation (citation click, "read
20648
+ // this page" prompt). Distinct from ClaudeBot (training crawl) —
20649
+ // same operator, opposite operational signal, mirroring OpenAI's
20650
+ // GPTBot vs. ChatGPT-User split. The `anthropic-claudebot` rule
20651
+ // above does not match `Claude-User/` (its `Claude-[A-Z]+Bot/`
20652
+ // pattern needs a `Bot/` suffix), so this is the only rule that
20653
+ // routes it — into the user-fetch bucket, not bulk crawl.
20654
+ id: "claude-user",
20655
+ operator: "Anthropic",
20656
+ product: "Claude-User",
20657
+ purpose: "user-agent",
20658
+ userAgentPatterns: [/Claude-User\//i]
20659
+ },
20506
20660
  {
20507
20661
  id: "perplexity-bot",
20508
20662
  operator: "Perplexity",
@@ -20528,6 +20682,21 @@ var DEFAULT_AI_CRAWLER_RULES = [
20528
20682
  purpose: "training-control",
20529
20683
  userAgentPatterns: [/Google-Extended/i]
20530
20684
  },
20685
+ {
20686
+ // Google-Agent: agents on Google infrastructure that navigate the
20687
+ // web and act "upon user request" (e.g. Project Mariner) — a
20688
+ // user-driven fetch, routed to the user-fetch bucket. Google ships
20689
+ // no distinct Gemini fetch UA (`Google-Extended` above is a
20690
+ // robots.txt control token, not a request UA), so this is the
20691
+ // closest Google equivalent to ChatGPT-User. The UA is browser-like
20692
+ // with a `compatible; Google-Agent;` token. IP ranges:
20693
+ // user-triggered-agents.json.
20694
+ id: "google-agent",
20695
+ operator: "Google",
20696
+ product: "Google-Agent",
20697
+ purpose: "user-agent",
20698
+ userAgentPatterns: [/Google-Agent/i]
20699
+ },
20531
20700
  {
20532
20701
  id: "bytespider",
20533
20702
  operator: "ByteDance",
@@ -20582,16 +20751,23 @@ var DEFAULT_AI_CRAWLER_RULES = [
20582
20751
  userAgentPatterns: [/Diffbot/i]
20583
20752
  },
20584
20753
  {
20585
- id: "mistral-ai",
20754
+ // Per-user, on-demand fetches initiated by a Mistral user (citation
20755
+ // click, "read this URL" prompt). Separate from MistralBot (crawl)
20756
+ // so the dashboard's user-fetch vs. bulk-crawl split stays honest.
20757
+ id: "mistral-ai-user",
20586
20758
  operator: "Mistral AI",
20587
20759
  product: "MistralAI-User",
20760
+ purpose: "user-agent",
20761
+ userAgentPatterns: [/MistralAI-User\//i]
20762
+ },
20763
+ {
20764
+ // Mistral's general crawler. Distinct from MistralAI-User (per-user
20765
+ // fetch) — same operator, different operational signal.
20766
+ id: "mistral-bot",
20767
+ operator: "Mistral AI",
20768
+ product: "MistralBot",
20588
20769
  purpose: "crawl",
20589
- // Mistral ships both `MistralAI-User/*` (chat-on-behalf-of-user
20590
- // fetches) and `MistralBot/*` (general crawler). Earlier rule only
20591
- // matched `MistralAI` and missed the bot — caught on 2026-05-18
20592
- // when canonry.ai/canonry-landing's classification chart went flat
20593
- // and the bot UA was sitting in the `unknown` bucket.
20594
- userAgentPatterns: [/MistralAI/i, /MistralBot/i]
20770
+ userAgentPatterns: [/MistralBot\//i]
20595
20771
  },
20596
20772
  {
20597
20773
  id: "deepseek",
@@ -20600,6 +20776,21 @@ var DEFAULT_AI_CRAWLER_RULES = [
20600
20776
  purpose: "training",
20601
20777
  userAgentPatterns: [/DeepSeekBot/i]
20602
20778
  },
20779
+ {
20780
+ id: "xai-grok-bot",
20781
+ operator: "xAI",
20782
+ product: "xAI-Bot",
20783
+ purpose: "crawl",
20784
+ // xAI documents its crawler at https://x.ai/bots/ as `xAI-Bot/<version>`.
20785
+ // Operators have also observed `Grok-Bot/...` in production logs. xAI
20786
+ // has been less consistent than OpenAI/Anthropic about publishing every
20787
+ // UA variant they ship, so the pattern is intentionally permissive
20788
+ // across the xAI/Grok family — better to over-match the operator than
20789
+ // leave real hits in the `unknown` bucket. A separate `purpose:
20790
+ // 'user-agent'` Grok rule can be added later if xAI ships a citation
20791
+ // user-fetcher UA (the way OpenAI ships ChatGPT-User alongside GPTBot).
20792
+ userAgentPatterns: [/xAI-Bot\//i, /Grok-Bot\//i, /GrokBot\//i]
20793
+ },
20603
20794
  // Classic search-engine crawlers. Not strictly "AI" by training origin,
20604
20795
  // but the same audience: machine traffic indexing the site for query
20605
20796
  // surfaces. Operators tracking AI visibility want this signal too —
@@ -20662,12 +20853,14 @@ var DEFAULT_AI_REFERRER_RULES = [
20662
20853
  { domain: AI_ENGINE_DOMAINS.claude, operator: "Anthropic", product: "Claude" },
20663
20854
  { domain: AI_ENGINE_DOMAINS.gemini, operator: "Google", product: "Gemini" },
20664
20855
  { domain: AI_ENGINE_DOMAINS.copilotMicrosoft, operator: "Microsoft", product: "Copilot" },
20856
+ { domain: AI_ENGINE_DOMAINS.grok, operator: "xAI", product: "Grok" },
20665
20857
  { domain: AI_ENGINE_DOMAINS.phind, operator: "Phind", product: "Phind" },
20666
20858
  { domain: AI_ENGINE_DOMAINS.you, operator: "You.com", product: "You.com" },
20667
20859
  { domain: AI_ENGINE_DOMAINS.metaAi, operator: "Meta", product: "Meta AI" }
20668
20860
  ];
20669
20861
 
20670
20862
  // ../integration-traffic/src/classifier.ts
20863
+ var USER_FETCH_PURPOSE = "user-agent";
20671
20864
  function normalizeHost(host) {
20672
20865
  return host.trim().toLowerCase().replace(/^www\./, "");
20673
20866
  }
@@ -20708,6 +20901,7 @@ function classifyCrawler(event) {
20708
20901
  const userAgent = event.userAgent?.trim();
20709
20902
  if (!userAgent) return null;
20710
20903
  for (const rule of DEFAULT_AI_CRAWLER_RULES) {
20904
+ if (rule.purpose === USER_FETCH_PURPOSE) continue;
20711
20905
  if (rule.userAgentPatterns.some((pattern) => pattern.test(userAgent))) {
20712
20906
  const verified = verifyIpForRule(event.remoteIp, rule.id);
20713
20907
  return {
@@ -20722,6 +20916,24 @@ function classifyCrawler(event) {
20722
20916
  }
20723
20917
  return null;
20724
20918
  }
20919
+ function classifyAiUserFetch(event) {
20920
+ const userAgent = event.userAgent?.trim();
20921
+ if (!userAgent) return null;
20922
+ for (const rule of DEFAULT_AI_CRAWLER_RULES) {
20923
+ if (rule.purpose !== USER_FETCH_PURPOSE) continue;
20924
+ if (rule.userAgentPatterns.some((pattern) => pattern.test(userAgent))) {
20925
+ const verified = verifyIpForRule(event.remoteIp, rule.id);
20926
+ return {
20927
+ botId: rule.id,
20928
+ operator: rule.operator,
20929
+ product: rule.product,
20930
+ verificationStatus: verified ? "verified" : "claimed_unverified",
20931
+ matchedUserAgent: userAgent
20932
+ };
20933
+ }
20934
+ }
20935
+ return null;
20936
+ }
20725
20937
  function classifyAiReferral(event) {
20726
20938
  const refererHost = hostFromUrl(event.referer);
20727
20939
  if (refererHost) {
@@ -20864,6 +21076,9 @@ function strongerReferralEvidence(current, next) {
20864
21076
  function sortCrawlerBuckets(a, b) {
20865
21077
  return a.tsHour.localeCompare(b.tsHour) || a.botId.localeCompare(b.botId) || a.pathNormalized.localeCompare(b.pathNormalized) || String(a.status).localeCompare(String(b.status));
20866
21078
  }
21079
+ function sortAiUserFetchBuckets(a, b) {
21080
+ return a.tsHour.localeCompare(b.tsHour) || a.botId.localeCompare(b.botId) || a.pathNormalized.localeCompare(b.pathNormalized) || String(a.status).localeCompare(String(b.status));
21081
+ }
20867
21082
  function sortReferralBuckets(a, b) {
20868
21083
  return a.tsHour.localeCompare(b.tsHour) || a.product.localeCompare(b.product) || a.sourceDomain.localeCompare(b.sourceDomain) || a.landingPathNormalized.localeCompare(b.landingPathNormalized) || String(a.status).localeCompare(String(b.status));
20869
21084
  }
@@ -20875,13 +21090,17 @@ function buildTrafficProbeReport(events, options = {}) {
20875
21090
  const configuredSessionWindowMs = options.aiReferralSessionWindowMs ?? DEFAULT_AI_REFERRAL_SESSION_WINDOW_MS;
20876
21091
  const aiReferralSessionWindowMs = configuredSessionWindowMs > 0 ? configuredSessionWindowMs : DEFAULT_AI_REFERRAL_SESSION_WINDOW_MS;
20877
21092
  const crawlerBuckets = /* @__PURE__ */ new Map();
21093
+ const aiUserFetchBuckets = /* @__PURE__ */ new Map();
20878
21094
  const aiReferralBuckets = /* @__PURE__ */ new Map();
20879
21095
  const aiReferralSessions = /* @__PURE__ */ new Map();
20880
21096
  const topBots = /* @__PURE__ */ new Map();
20881
21097
  const topCrawlerPaths = /* @__PURE__ */ new Map();
21098
+ const topAiUserFetchBots = /* @__PURE__ */ new Map();
21099
+ const topAiUserFetchPaths = /* @__PURE__ */ new Map();
20882
21100
  const topAiReferrers = /* @__PURE__ */ new Map();
20883
21101
  const topAiReferralLandingPaths = /* @__PURE__ */ new Map();
20884
21102
  let crawlerHits = 0;
21103
+ let aiUserFetchHits = 0;
20885
21104
  let aiReferralHits = 0;
20886
21105
  let unknownHits = 0;
20887
21106
  const samples = [];
@@ -20889,6 +21108,7 @@ function buildTrafficProbeReport(events, options = {}) {
20889
21108
  const tsHour = hourBucket(event.observedAt);
20890
21109
  const pathNormalized = normalizeTrafficPathPattern(event.path);
20891
21110
  const crawler = classifyCrawler(event);
21111
+ const aiUserFetch = classifyAiUserFetch(event);
20892
21112
  const aiReferral = classifyAiReferral(event);
20893
21113
  if (crawler) {
20894
21114
  crawlerHits += 1;
@@ -20921,6 +21141,37 @@ function buildTrafficProbeReport(events, options = {}) {
20921
21141
  else topBots.set(botKey, { fields: { botId: crawler.botId, operator: crawler.operator }, hits: 1 });
20922
21142
  incrementBucket(topCrawlerPaths, pathNormalized, { pathNormalized });
20923
21143
  }
21144
+ if (aiUserFetch) {
21145
+ aiUserFetchHits += 1;
21146
+ const key = [
21147
+ tsHour,
21148
+ aiUserFetch.botId,
21149
+ aiUserFetch.verificationStatus,
21150
+ pathNormalized,
21151
+ event.status ?? "null"
21152
+ ].join(" ");
21153
+ const existing = aiUserFetchBuckets.get(key);
21154
+ if (existing) {
21155
+ existing.hits += 1;
21156
+ } else {
21157
+ aiUserFetchBuckets.set(key, {
21158
+ tsHour,
21159
+ botId: aiUserFetch.botId,
21160
+ operator: aiUserFetch.operator,
21161
+ product: aiUserFetch.product,
21162
+ verificationStatus: aiUserFetch.verificationStatus,
21163
+ pathNormalized,
21164
+ status: event.status,
21165
+ hits: 1,
21166
+ sampledUserAgent: event.userAgent
21167
+ });
21168
+ }
21169
+ const botKey = `${aiUserFetch.botId} ${aiUserFetch.operator}`;
21170
+ const botEntry = topAiUserFetchBots.get(botKey);
21171
+ if (botEntry) botEntry.hits += 1;
21172
+ else topAiUserFetchBots.set(botKey, { fields: { botId: aiUserFetch.botId, operator: aiUserFetch.operator }, hits: 1 });
21173
+ incrementBucket(topAiUserFetchPaths, pathNormalized, { pathNormalized });
21174
+ }
20924
21175
  if (aiReferral) {
20925
21176
  aiReferralHits += 1;
20926
21177
  const landingPathNormalized = resolveAiReferralLandingPath(event, aiReferral.evidenceType);
@@ -20939,7 +21190,7 @@ function buildTrafficProbeReport(events, options = {}) {
20939
21190
  aiReferralSessions.set(key, existing ? strongerReferralEvidence(existing, session) : session);
20940
21191
  }
20941
21192
  }
20942
- if (!crawler && !aiReferral) unknownHits += 1;
21193
+ if (!crawler && !aiUserFetch && !aiReferral) unknownHits += 1;
20943
21194
  samples.push({
20944
21195
  eventId: event.eventId,
20945
21196
  observedAt: event.observedAt,
@@ -20950,6 +21201,7 @@ function buildTrafficProbeReport(events, options = {}) {
20950
21201
  userAgent: event.userAgent,
20951
21202
  referer: event.referer,
20952
21203
  crawler,
21204
+ aiUserFetch,
20953
21205
  aiReferral
20954
21206
  });
20955
21207
  if (samples.length > sampleLimit) samples.shift();
@@ -20985,14 +21237,18 @@ function buildTrafficProbeReport(events, options = {}) {
20985
21237
  totals: {
20986
21238
  normalizedEvents: events.length,
20987
21239
  crawlerHits,
21240
+ aiUserFetchHits,
20988
21241
  aiReferralSessions: aiReferralSessions.size,
20989
21242
  aiReferralHits,
20990
21243
  unknownHits
20991
21244
  },
20992
21245
  crawlerEventsHourly: [...crawlerBuckets.values()].sort(sortCrawlerBuckets),
21246
+ aiUserFetchEventsHourly: [...aiUserFetchBuckets.values()].sort(sortAiUserFetchBuckets),
20993
21247
  aiReferralEventsHourly: [...aiReferralBuckets.values()].sort(sortReferralBuckets),
20994
21248
  topBots: topEntries(topBots, 10),
20995
21249
  topCrawlerPaths: topEntries(topCrawlerPaths, 10),
21250
+ topAiUserFetchBots: topEntries(topAiUserFetchBots, 10),
21251
+ topAiUserFetchPaths: topEntries(topAiUserFetchPaths, 10),
20996
21252
  topAiReferrers: topEntries(topAiReferrers, 10),
20997
21253
  topAiReferralLandingPaths: topEntries(topAiReferralLandingPaths, 10),
20998
21254
  samples
@@ -21434,6 +21690,13 @@ async function runBackfillTask(options) {
21434
21690
  lte2(crawlerEventsHourly.tsHour, windowEndIso)
21435
21691
  )
21436
21692
  ).run();
21693
+ tx.delete(aiUserFetchEventsHourly).where(
21694
+ and19(
21695
+ eq24(aiUserFetchEventsHourly.sourceId, sourceRow.id),
21696
+ gte3(aiUserFetchEventsHourly.tsHour, windowStartIso),
21697
+ lte2(aiUserFetchEventsHourly.tsHour, windowEndIso)
21698
+ )
21699
+ ).run();
21437
21700
  tx.delete(aiReferralEventsHourly).where(
21438
21701
  and19(
21439
21702
  eq24(aiReferralEventsHourly.sourceId, sourceRow.id),
@@ -21464,6 +21727,22 @@ async function runBackfillTask(options) {
21464
21727
  updatedAt: finishedAt
21465
21728
  }).run();
21466
21729
  }
21730
+ for (const bucket of report.aiUserFetchEventsHourly) {
21731
+ tx.insert(aiUserFetchEventsHourly).values({
21732
+ projectId: project.id,
21733
+ sourceId: sourceRow.id,
21734
+ tsHour: bucket.tsHour,
21735
+ botId: bucket.botId,
21736
+ operator: bucket.operator,
21737
+ verificationStatus: bucket.verificationStatus,
21738
+ pathNormalized: bucket.pathNormalized,
21739
+ status: bucket.status ?? 0,
21740
+ hits: bucket.hits,
21741
+ sampledUserAgent: bucket.sampledUserAgent,
21742
+ createdAt: finishedAt,
21743
+ updatedAt: finishedAt
21744
+ }).run();
21745
+ }
21467
21746
  for (const bucket of report.aiReferralEventsHourly) {
21468
21747
  tx.insert(aiReferralEventsHourly).values({
21469
21748
  projectId: project.id,
@@ -21482,7 +21761,7 @@ async function runBackfillTask(options) {
21482
21761
  }).run();
21483
21762
  }
21484
21763
  for (const sample of report.samples) {
21485
- const eventType = sample.crawler ? "crawler" : sample.aiReferral ? "ai_referral" : "unknown";
21764
+ const eventType = sample.crawler ? "crawler" : sample.aiUserFetch ? "ai_user_fetch" : sample.aiReferral ? "ai_referral" : "unknown";
21486
21765
  const refererHost = (() => {
21487
21766
  if (!sample.referer) return null;
21488
21767
  try {
@@ -21504,6 +21783,7 @@ async function runBackfillTask(options) {
21504
21783
  refererHost,
21505
21784
  classifierDetailsJson: {
21506
21785
  crawler: sample.crawler,
21786
+ aiUserFetch: sample.aiUserFetch,
21507
21787
  aiReferral: sample.aiReferral
21508
21788
  },
21509
21789
  createdAt: finishedAt
@@ -21813,6 +22093,7 @@ async function trafficRoutes(app, opts) {
21813
22093
  sourceId: sourceRow.id,
21814
22094
  pulledEvents: 0,
21815
22095
  crawlerHits: 0,
22096
+ aiUserFetchHits: 0,
21816
22097
  aiReferralHits: 0,
21817
22098
  durationMs: Date.now() - syncStartedAtMs,
21818
22099
  errorCode
@@ -21964,11 +22245,13 @@ async function trafficRoutes(app, opts) {
21964
22245
  allEvents = page.events;
21965
22246
  }
21966
22247
  let crawlerBucketRows = 0;
22248
+ let aiUserFetchBucketRows = 0;
21967
22249
  let aiReferralBucketRows = 0;
21968
22250
  let sampleRows = 0;
21969
22251
  let finishedAt = (/* @__PURE__ */ new Date()).toISOString();
21970
22252
  let pulledEventsCount = 0;
21971
22253
  let crawlerHitsCount = 0;
22254
+ let aiUserFetchHitsCount = 0;
21972
22255
  let aiReferralHitsCount = 0;
21973
22256
  let unknownHitsCount = 0;
21974
22257
  app.db.transaction((tx) => {
@@ -21990,6 +22273,7 @@ async function trafficRoutes(app, opts) {
21990
22273
  finishedAt = (/* @__PURE__ */ new Date()).toISOString();
21991
22274
  pulledEventsCount = report.totals.normalizedEvents;
21992
22275
  crawlerHitsCount = report.totals.crawlerHits;
22276
+ aiUserFetchHitsCount = report.totals.aiUserFetchHits;
21993
22277
  aiReferralHitsCount = report.totals.aiReferralHits;
21994
22278
  unknownHitsCount = report.totals.unknownHits;
21995
22279
  for (const bucket of report.crawlerEventsHourly) {
@@ -22025,6 +22309,39 @@ async function trafficRoutes(app, opts) {
22025
22309
  }).run();
22026
22310
  crawlerBucketRows += 1;
22027
22311
  }
22312
+ for (const bucket of report.aiUserFetchEventsHourly) {
22313
+ const status = bucket.status ?? 0;
22314
+ tx.insert(aiUserFetchEventsHourly).values({
22315
+ projectId: project.id,
22316
+ sourceId: sourceRow.id,
22317
+ tsHour: bucket.tsHour,
22318
+ botId: bucket.botId,
22319
+ operator: bucket.operator,
22320
+ verificationStatus: bucket.verificationStatus,
22321
+ pathNormalized: bucket.pathNormalized,
22322
+ status,
22323
+ hits: bucket.hits,
22324
+ sampledUserAgent: bucket.sampledUserAgent,
22325
+ createdAt: finishedAt,
22326
+ updatedAt: finishedAt
22327
+ }).onConflictDoUpdate({
22328
+ target: [
22329
+ aiUserFetchEventsHourly.projectId,
22330
+ aiUserFetchEventsHourly.sourceId,
22331
+ aiUserFetchEventsHourly.tsHour,
22332
+ aiUserFetchEventsHourly.botId,
22333
+ aiUserFetchEventsHourly.verificationStatus,
22334
+ aiUserFetchEventsHourly.pathNormalized,
22335
+ aiUserFetchEventsHourly.status
22336
+ ],
22337
+ set: {
22338
+ hits: sql10`${aiUserFetchEventsHourly.hits} + ${bucket.hits}`,
22339
+ sampledUserAgent: bucket.sampledUserAgent,
22340
+ updatedAt: finishedAt
22341
+ }
22342
+ }).run();
22343
+ aiUserFetchBucketRows += 1;
22344
+ }
22028
22345
  for (const bucket of report.aiReferralEventsHourly) {
22029
22346
  const status = bucket.status ?? 0;
22030
22347
  tx.insert(aiReferralEventsHourly).values({
@@ -22060,7 +22377,7 @@ async function trafficRoutes(app, opts) {
22060
22377
  aiReferralBucketRows += 1;
22061
22378
  }
22062
22379
  for (const sample of report.samples) {
22063
- const eventType = sample.crawler ? "crawler" : sample.aiReferral ? "ai_referral" : "unknown";
22380
+ const eventType = sample.crawler ? "crawler" : sample.aiUserFetch ? "ai_user_fetch" : sample.aiReferral ? "ai_referral" : "unknown";
22064
22381
  const refererHost = (() => {
22065
22382
  if (!sample.referer) return null;
22066
22383
  try {
@@ -22082,6 +22399,7 @@ async function trafficRoutes(app, opts) {
22082
22399
  refererHost,
22083
22400
  classifierDetailsJson: {
22084
22401
  crawler: sample.crawler,
22402
+ aiUserFetch: sample.aiUserFetch,
22085
22403
  aiReferral: sample.aiReferral
22086
22404
  },
22087
22405
  createdAt: finishedAt
@@ -22119,6 +22437,7 @@ async function trafficRoutes(app, opts) {
22119
22437
  sourceId: sourceRow.id,
22120
22438
  pulledEvents: pulledEventsCount,
22121
22439
  crawlerHits: crawlerHitsCount,
22440
+ aiUserFetchHits: aiUserFetchHitsCount,
22122
22441
  aiReferralHits: aiReferralHitsCount,
22123
22442
  durationMs: Date.now() - syncStartedAtMs
22124
22443
  });
@@ -22130,9 +22449,11 @@ async function trafficRoutes(app, opts) {
22130
22449
  syncedAt: finishedAt,
22131
22450
  pulledEvents: pulledEventsCount,
22132
22451
  crawlerHits: crawlerHitsCount,
22452
+ aiUserFetchHits: aiUserFetchHitsCount,
22133
22453
  aiReferralHits: aiReferralHitsCount,
22134
22454
  unknownHits: unknownHitsCount,
22135
22455
  crawlerBucketRows,
22456
+ aiUserFetchBucketRows,
22136
22457
  aiReferralBucketRows,
22137
22458
  sampleRows,
22138
22459
  windowStart: windowStart.toISOString(),
@@ -22311,6 +22632,12 @@ async function trafficRoutes(app, opts) {
22311
22632
  gte3(crawlerEventsHourly.tsHour, since)
22312
22633
  )
22313
22634
  ).get();
22635
+ const aiUserFetchTotals = app.db.select({ total: sql10`COALESCE(SUM(${aiUserFetchEventsHourly.hits}), 0)` }).from(aiUserFetchEventsHourly).where(
22636
+ and19(
22637
+ eq24(aiUserFetchEventsHourly.sourceId, row.id),
22638
+ gte3(aiUserFetchEventsHourly.tsHour, since)
22639
+ )
22640
+ ).get();
22314
22641
  const aiTotals = app.db.select({ total: sql10`COALESCE(SUM(${aiReferralEventsHourly.sessionsOrHits}), 0)` }).from(aiReferralEventsHourly).where(
22315
22642
  and19(
22316
22643
  eq24(aiReferralEventsHourly.sourceId, row.id),
@@ -22334,6 +22661,7 @@ async function trafficRoutes(app, opts) {
22334
22661
  ...rowToDto(row),
22335
22662
  totals24h: {
22336
22663
  crawlerHits: Number(crawlerTotals?.total ?? 0),
22664
+ aiUserFetchHits: Number(aiUserFetchTotals?.total ?? 0),
22337
22665
  aiReferralHits: Number(aiTotals?.total ?? 0),
22338
22666
  sampleCount: Number(sampleTotals?.total ?? 0)
22339
22667
  },
@@ -22393,10 +22721,12 @@ async function trafficRoutes(app, opts) {
22393
22721
  const kindParam = request.query?.kind;
22394
22722
  let kind = "all";
22395
22723
  if (kindParam !== void 0) {
22396
- if (kindParam === "all" || kindParam === TrafficEventKinds.crawler || kindParam === TrafficEventKinds["ai-referral"]) {
22724
+ if (kindParam === "all" || kindParam === TrafficEventKinds.crawler || kindParam === TrafficEventKinds["ai-user-fetch"] || kindParam === TrafficEventKinds["ai-referral"]) {
22397
22725
  kind = kindParam;
22398
22726
  } else {
22399
- throw validationError(`"kind" must be one of: all, ${TrafficEventKinds.crawler}, ${TrafficEventKinds["ai-referral"]}`);
22727
+ throw validationError(
22728
+ `"kind" must be one of: all, ${TrafficEventKinds.crawler}, ${TrafficEventKinds["ai-user-fetch"]}, ${TrafficEventKinds["ai-referral"]}`
22729
+ );
22400
22730
  }
22401
22731
  }
22402
22732
  const limitParam = request.query?.limit;
@@ -22410,6 +22740,7 @@ async function trafficRoutes(app, opts) {
22410
22740
  const untilIso = until.toISOString();
22411
22741
  const events = [];
22412
22742
  let crawlerTotal = 0;
22743
+ let aiUserFetchTotal = 0;
22413
22744
  let aiReferralTotal = 0;
22414
22745
  if (kind === "all" || kind === TrafficEventKinds.crawler) {
22415
22746
  const crawlerFilters = [
@@ -22436,6 +22767,31 @@ async function trafficRoutes(app, opts) {
22436
22767
  });
22437
22768
  }
22438
22769
  }
22770
+ if (kind === "all" || kind === TrafficEventKinds["ai-user-fetch"]) {
22771
+ const userFetchFilters = [
22772
+ eq24(aiUserFetchEventsHourly.projectId, project.id),
22773
+ gte3(aiUserFetchEventsHourly.tsHour, sinceIso),
22774
+ lte2(aiUserFetchEventsHourly.tsHour, untilIso)
22775
+ ];
22776
+ if (sourceIdParam) userFetchFilters.push(eq24(aiUserFetchEventsHourly.sourceId, sourceIdParam));
22777
+ const userFetchWhere = and19(...userFetchFilters);
22778
+ const total = app.db.select({ total: sql10`COALESCE(SUM(${aiUserFetchEventsHourly.hits}), 0)` }).from(aiUserFetchEventsHourly).where(userFetchWhere).get();
22779
+ aiUserFetchTotal = Number(total?.total ?? 0);
22780
+ const rows = app.db.select().from(aiUserFetchEventsHourly).where(userFetchWhere).orderBy(desc13(aiUserFetchEventsHourly.tsHour)).limit(limit).all();
22781
+ for (const r of rows) {
22782
+ events.push({
22783
+ kind: TrafficEventKinds["ai-user-fetch"],
22784
+ sourceId: r.sourceId,
22785
+ tsHour: r.tsHour,
22786
+ botId: r.botId,
22787
+ operator: r.operator,
22788
+ verificationStatus: r.verificationStatus,
22789
+ pathNormalized: r.pathNormalized,
22790
+ status: r.status,
22791
+ hits: r.hits
22792
+ });
22793
+ }
22794
+ }
22439
22795
  if (kind === "all" || kind === TrafficEventKinds["ai-referral"]) {
22440
22796
  const aiFilters = [
22441
22797
  eq24(aiReferralEventsHourly.projectId, project.id),
@@ -22469,6 +22825,7 @@ async function trafficRoutes(app, opts) {
22469
22825
  windowEnd: untilIso,
22470
22826
  totals: {
22471
22827
  crawlerHits: crawlerTotal,
22828
+ aiUserFetchHits: aiUserFetchTotal,
22472
22829
  aiReferralHits: aiReferralTotal
22473
22830
  },
22474
22831
  events: trimmed
@@ -29445,7 +29802,7 @@ function readStoredGroundingSources(rawResponse) {
29445
29802
  return result;
29446
29803
  }
29447
29804
  async function backfillInsightsCommand(project, opts) {
29448
- const { IntelligenceService: IntelligenceService2 } = await import("./intelligence-service-2XL2M7QP.js");
29805
+ const { IntelligenceService: IntelligenceService2 } = await import("./intelligence-service-OCREQUCQ.js");
29449
29806
  const config = loadConfig();
29450
29807
  const db = createClient(config.database);
29451
29808
  migrate(db);
@@ -29860,42 +30217,74 @@ async function backfillTrafficClassificationCommand(opts) {
29860
30217
  providerResource: { type: "cloud_run_revision", labels: {} },
29861
30218
  providerLabels: {}
29862
30219
  };
29863
- const classified = classifyCrawler(probe);
30220
+ const userFetch = classifyAiUserFetch(probe);
30221
+ const classified = userFetch ?? classifyCrawler(probe);
29864
30222
  if (!classified) continue;
29865
30223
  result.reclassified++;
29866
30224
  result.byBot[classified.botId] = (result.byBot[classified.botId] ?? 0) + 1;
29867
30225
  if (isDryRun) continue;
29868
- db.update(rawEventSamples).set({ eventType: TrafficEventKinds.crawler }).where(eq35(rawEventSamples.id, snap.id)).run();
30226
+ db.update(rawEventSamples).set({ eventType: userFetch ? TrafficEventKinds["ai-user-fetch"] : TrafficEventKinds.crawler }).where(eq35(rawEventSamples.id, snap.id)).run();
29869
30227
  const tsHour = new Date(snap.ts);
29870
30228
  tsHour.setUTCMinutes(0, 0, 0);
29871
- db.insert(crawlerEventsHourly).values({
29872
- projectId: snap.projectId,
29873
- sourceId: snap.sourceId,
29874
- tsHour: tsHour.toISOString(),
29875
- botId: classified.botId,
29876
- operator: classified.operator,
29877
- verificationStatus: classified.verificationStatus,
29878
- pathNormalized: snap.pathNormalized,
29879
- status: snap.status ?? 200,
29880
- hits: 1,
29881
- sampledUserAgent: snap.userAgent,
29882
- createdAt: now,
29883
- updatedAt: now
29884
- }).onConflictDoUpdate({
29885
- target: [
29886
- crawlerEventsHourly.projectId,
29887
- crawlerEventsHourly.sourceId,
29888
- crawlerEventsHourly.tsHour,
29889
- crawlerEventsHourly.botId,
29890
- crawlerEventsHourly.verificationStatus,
29891
- crawlerEventsHourly.pathNormalized,
29892
- crawlerEventsHourly.status
29893
- ],
29894
- set: {
29895
- hits: sql15`${crawlerEventsHourly.hits} + 1`,
30229
+ if (userFetch) {
30230
+ db.insert(aiUserFetchEventsHourly).values({
30231
+ projectId: snap.projectId,
30232
+ sourceId: snap.sourceId,
30233
+ tsHour: tsHour.toISOString(),
30234
+ botId: userFetch.botId,
30235
+ operator: userFetch.operator,
30236
+ verificationStatus: userFetch.verificationStatus,
30237
+ pathNormalized: snap.pathNormalized,
30238
+ status: snap.status ?? 200,
30239
+ hits: 1,
30240
+ sampledUserAgent: snap.userAgent,
30241
+ createdAt: now,
29896
30242
  updatedAt: now
29897
- }
29898
- }).run();
30243
+ }).onConflictDoUpdate({
30244
+ target: [
30245
+ aiUserFetchEventsHourly.projectId,
30246
+ aiUserFetchEventsHourly.sourceId,
30247
+ aiUserFetchEventsHourly.tsHour,
30248
+ aiUserFetchEventsHourly.botId,
30249
+ aiUserFetchEventsHourly.verificationStatus,
30250
+ aiUserFetchEventsHourly.pathNormalized,
30251
+ aiUserFetchEventsHourly.status
30252
+ ],
30253
+ set: {
30254
+ hits: sql15`${aiUserFetchEventsHourly.hits} + 1`,
30255
+ updatedAt: now
30256
+ }
30257
+ }).run();
30258
+ } else {
30259
+ db.insert(crawlerEventsHourly).values({
30260
+ projectId: snap.projectId,
30261
+ sourceId: snap.sourceId,
30262
+ tsHour: tsHour.toISOString(),
30263
+ botId: classified.botId,
30264
+ operator: classified.operator,
30265
+ verificationStatus: classified.verificationStatus,
30266
+ pathNormalized: snap.pathNormalized,
30267
+ status: snap.status ?? 200,
30268
+ hits: 1,
30269
+ sampledUserAgent: snap.userAgent,
30270
+ createdAt: now,
30271
+ updatedAt: now
30272
+ }).onConflictDoUpdate({
30273
+ target: [
30274
+ crawlerEventsHourly.projectId,
30275
+ crawlerEventsHourly.sourceId,
30276
+ crawlerEventsHourly.tsHour,
30277
+ crawlerEventsHourly.botId,
30278
+ crawlerEventsHourly.verificationStatus,
30279
+ crawlerEventsHourly.pathNormalized,
30280
+ crawlerEventsHourly.status
30281
+ ],
30282
+ set: {
30283
+ hits: sql15`${crawlerEventsHourly.hits} + 1`,
30284
+ updatedAt: now
30285
+ }
30286
+ }).run();
30287
+ }
29899
30288
  }
29900
30289
  if (!isDryRun) {
29901
30290
  const afterRow = db.select({ n: sql15`count(*)` }).from(rawEventSamples).where(and28(