@ainyc/canonry 4.51.4 → 4.53.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/assets/{BacklinksPage-9TlM08Wf.js → BacklinksPage-DELb5ok3.js} +1 -1
- package/assets/assets/ProjectPage-CM_uQa2L.js +6 -0
- package/assets/assets/{RunRow-D7qdWWRl.js → RunRow-aqJEr7XJ.js} +1 -1
- package/assets/assets/{RunsPage-CvewepfU.js → RunsPage-Dhuj1w72.js} +1 -1
- package/assets/assets/{SettingsPage-C7BvAhiB.js → SettingsPage-B2_vxr4y.js} +1 -1
- package/assets/assets/{TrafficPage-DC3NhFOh.js → TrafficPage-BKaiZRIH.js} +1 -1
- package/assets/assets/TrafficSourceDetailPage-DXIQ4g9S.js +1 -0
- package/assets/assets/{arrow-left-Agb02DMK.js → arrow-left-CYjzP3M3.js} +1 -1
- package/assets/assets/{index-DTCZ93Ne.js → index-BStwmAg6.js} +55 -55
- package/assets/assets/{index-DeGyEwik.css → index-Bm3JQsW0.css} +1 -1
- package/assets/assets/{server-traffic-C-0Ndjpw.js → server-traffic-D_1gSi-b.js} +1 -1
- package/assets/assets/{trash-2-lkrXVRRm.js → trash-2-8JiADnUJ.js} +1 -1
- package/assets/index.html +2 -2
- package/dist/{chunk-HMZKIOLG.js → chunk-J7MX3YOH.js} +1 -1
- package/dist/{chunk-QZ5XSM6C.js → chunk-JHAHNKSN.js} +103 -1
- package/dist/{chunk-WBO5S3IX.js → chunk-KVE7RLBI.js} +452 -63
- package/dist/{chunk-FYGBW3SM.js → chunk-VZPDBHBW.js} +29 -1
- package/dist/cli.js +40 -27
- package/dist/index.js +4 -4
- package/dist/{intelligence-service-2XL2M7QP.js → intelligence-service-OCREQUCQ.js} +2 -2
- package/dist/mcp.js +2 -2
- package/package.json +10 -10
- package/assets/assets/ProjectPage-CD591qDz.js +0 -6
- package/assets/assets/TrafficSourceDetailPage-BvtTA6rs.js +0 -1
|
@@ -6,7 +6,7 @@ import {
|
|
|
6
6
|
loadConfig,
|
|
7
7
|
loadConfigRaw,
|
|
8
8
|
saveConfigPatch
|
|
9
|
-
} from "./chunk-
|
|
9
|
+
} from "./chunk-J7MX3YOH.js";
|
|
10
10
|
import {
|
|
11
11
|
DEFAULT_RUN_HISTORY_LIMIT,
|
|
12
12
|
IntelligenceService,
|
|
@@ -14,6 +14,7 @@ import {
|
|
|
14
14
|
agentMemory,
|
|
15
15
|
agentSessions,
|
|
16
16
|
aiReferralEventsHourly,
|
|
17
|
+
aiUserFetchEventsHourly,
|
|
17
18
|
apiKeys,
|
|
18
19
|
auditLog,
|
|
19
20
|
backlinkDomains,
|
|
@@ -83,7 +84,7 @@ import {
|
|
|
83
84
|
smoothedRunDelta,
|
|
84
85
|
trafficSources,
|
|
85
86
|
usageCounters
|
|
86
|
-
} from "./chunk-
|
|
87
|
+
} from "./chunk-JHAHNKSN.js";
|
|
87
88
|
import {
|
|
88
89
|
AGENT_MEMORY_VALUE_MAX_BYTES,
|
|
89
90
|
AGENT_PROVIDER_IDS,
|
|
@@ -284,7 +285,7 @@ import {
|
|
|
284
285
|
wordpressSchemaDeployResultDtoSchema,
|
|
285
286
|
wordpressSchemaStatusResultDtoSchema,
|
|
286
287
|
wordpressStatusDtoSchema
|
|
287
|
-
} from "./chunk-
|
|
288
|
+
} from "./chunk-VZPDBHBW.js";
|
|
288
289
|
|
|
289
290
|
// src/telemetry.ts
|
|
290
291
|
import crypto from "crypto";
|
|
@@ -5455,11 +5456,14 @@ function renderServerActivity(report, audience) {
|
|
|
5455
5456
|
const crawlerTrustSummary = `${formatNumber(sa.verifiedCrawlerHits.current)} verified \xB7 ${formatNumber(sa.unverifiedCrawlerHits.current)} unverified`;
|
|
5456
5457
|
const crawlerDelta = formatDelta(crawlerRequests, "requests");
|
|
5457
5458
|
const crawlerSubtitle = crawlerDelta ? `${escapeHtml(crawlerTrustSummary)} \xB7 ${crawlerDelta}` : escapeHtml(crawlerTrustSummary);
|
|
5458
|
-
const
|
|
5459
|
+
const userFetchDelta = formatDelta(sa.aiUserFetchHits, "requests");
|
|
5460
|
+
const userFetchSubtitle = userFetchDelta || escapeHtml("ChatGPT-User, Perplexity-User, MistralAI-User");
|
|
5461
|
+
const clientOperators = sa.byOperator.filter((o) => o.verifiedHits > 0 || o.unverifiedHits > 0 || o.userFetchHits > 0 || o.referralArrivals > 0).slice(0, 5);
|
|
5459
5462
|
const clientOperatorRows = clientOperators.map((o) => `
|
|
5460
5463
|
<tr>
|
|
5461
5464
|
<td>${escapeHtml(o.operator)}</td>
|
|
5462
5465
|
<td class="numeric">${formatNumber(o.verifiedHits + o.unverifiedHits)}</td>
|
|
5466
|
+
<td class="numeric">${formatNumber(o.userFetchHits)}</td>
|
|
5463
5467
|
<td class="numeric">${formatNumber(o.referralArrivals)}</td>
|
|
5464
5468
|
</tr>`).join("");
|
|
5465
5469
|
return section(
|
|
@@ -5470,6 +5474,11 @@ function renderServerActivity(report, audience) {
|
|
|
5470
5474
|
<div class="value">${formatNumber(crawlerRequests.current)}</div>
|
|
5471
5475
|
<div class="subtitle">${crawlerSubtitle}</div>
|
|
5472
5476
|
</div>
|
|
5477
|
+
<div class="metric">
|
|
5478
|
+
<div class="label">AI user-fetch requests</div>
|
|
5479
|
+
<div class="value">${formatNumber(sa.aiUserFetchHits.current)}</div>
|
|
5480
|
+
<div class="subtitle">${userFetchSubtitle}</div>
|
|
5481
|
+
</div>
|
|
5473
5482
|
<div class="metric">
|
|
5474
5483
|
<div class="label">AI referral sessions</div>
|
|
5475
5484
|
<div class="value">${formatNumber(sa.referralArrivals.current)}</div>
|
|
@@ -5478,10 +5487,10 @@ function renderServerActivity(report, audience) {
|
|
|
5478
5487
|
</div>
|
|
5479
5488
|
${clientOperatorRows ? `<div class="chart-card"><h3>By AI tool</h3>
|
|
5480
5489
|
<table class="report-table">
|
|
5481
|
-
<thead><tr><th>AI tool</th><th class="numeric">Bot requests (7d)</th><th class="numeric">Referral sessions</th></tr></thead>
|
|
5490
|
+
<thead><tr><th>AI tool</th><th class="numeric">Bot requests (7d)</th><th class="numeric">User fetches (7d)</th><th class="numeric">Referral sessions</th></tr></thead>
|
|
5482
5491
|
<tbody>${clientOperatorRows}</tbody>
|
|
5483
5492
|
</table>
|
|
5484
|
-
<p class="meta">Verified requests are reverse-DNS confirmed
|
|
5493
|
+
<p class="meta">Bot requests are bulk crawl (GPTBot, PerplexityBot, \u2026). User fetches are on-demand reads triggered by real users inside an AI surface (ChatGPT-User, Perplexity-User, \u2026). Verified requests are reverse-DNS confirmed; unverified requests are UA claims shown separately in agency diagnostics.</p>
|
|
5485
5494
|
</div>` : ""}`
|
|
5486
5495
|
);
|
|
5487
5496
|
}
|
|
@@ -5493,6 +5502,7 @@ function renderServerActivity(report, audience) {
|
|
|
5493
5502
|
<td>${escapeHtml(o.operator)}</td>
|
|
5494
5503
|
<td class="numeric">${formatNumber(o.verifiedHits)}</td>
|
|
5495
5504
|
<td class="numeric meta">${formatNumber(o.unverifiedHits)}</td>
|
|
5505
|
+
<td class="numeric">${formatNumber(o.userFetchHits)}</td>
|
|
5496
5506
|
<td class="numeric">${formatNumber(o.referralArrivals)}</td>
|
|
5497
5507
|
<td class="numeric ${toneClass}">${deltaText}</td>
|
|
5498
5508
|
</tr>`;
|
|
@@ -5533,6 +5543,11 @@ function renderServerActivity(report, audience) {
|
|
|
5533
5543
|
<div class="value">${formatNumber(sa.unverifiedCrawlerHits.current)}</div>
|
|
5534
5544
|
<div class="subtitle">${formatDelta(sa.unverifiedCrawlerHits, "hits")}</div>
|
|
5535
5545
|
</div>
|
|
5546
|
+
<div class="metric">
|
|
5547
|
+
<div class="label">AI user-fetch hits (7d)</div>
|
|
5548
|
+
<div class="value">${formatNumber(sa.aiUserFetchHits.current)}</div>
|
|
5549
|
+
<div class="subtitle">${formatDelta(sa.aiUserFetchHits, "hits")}</div>
|
|
5550
|
+
</div>
|
|
5536
5551
|
<div class="metric">
|
|
5537
5552
|
<div class="label">AI-referral sessions (7d)</div>
|
|
5538
5553
|
<div class="value">${formatNumber(sa.referralArrivals.current)}</div>
|
|
@@ -5541,9 +5556,9 @@ function renderServerActivity(report, audience) {
|
|
|
5541
5556
|
</div>
|
|
5542
5557
|
${trendChart}
|
|
5543
5558
|
${operatorRows ? `<div class="chart-card"><h3>Per AI operator</h3>
|
|
5544
|
-
<p class="meta">Verified means rDNS-confirmed. Unverified bots claim the user-agent but couldn't be verified \u2014 could be the real bot or an imitator.</p>
|
|
5559
|
+
<p class="meta">Verified means rDNS-confirmed. Unverified bots claim the user-agent but couldn't be verified \u2014 could be the real bot or an imitator. User fetches are on-demand reads from an AI surface on behalf of a real user (ChatGPT-User, Perplexity-User, \u2026) \u2014 disjoint from bulk crawl.</p>
|
|
5545
5560
|
<table class="report-table">
|
|
5546
|
-
<thead><tr><th>Operator</th><th class="numeric">Verified hits</th><th class="numeric">Unverified</th><th class="numeric">Referral sessions</th><th class="numeric">7d delta</th></tr></thead>
|
|
5561
|
+
<thead><tr><th>Operator</th><th class="numeric">Verified hits</th><th class="numeric">Unverified</th><th class="numeric">User fetches</th><th class="numeric">Referral sessions</th><th class="numeric">7d delta</th></tr></thead>
|
|
5547
5562
|
<tbody>${operatorRows}</tbody>
|
|
5548
5563
|
</table>
|
|
5549
5564
|
</div>` : ""}
|
|
@@ -6385,10 +6400,21 @@ function buildServerActivity(db, projectId) {
|
|
|
6385
6400
|
)
|
|
6386
6401
|
).get()?.total ?? 0
|
|
6387
6402
|
);
|
|
6403
|
+
const sumUserFetches = (windowStartIso, windowEndIso, exclusiveEnd = false) => Number(
|
|
6404
|
+
db.select({ total: sql5`COALESCE(SUM(${aiUserFetchEventsHourly.hits}), 0)` }).from(aiUserFetchEventsHourly).where(
|
|
6405
|
+
and9(
|
|
6406
|
+
eq14(aiUserFetchEventsHourly.projectId, projectId),
|
|
6407
|
+
gte2(aiUserFetchEventsHourly.tsHour, windowStartIso),
|
|
6408
|
+
exclusiveEnd ? lt(aiUserFetchEventsHourly.tsHour, windowEndIso) : lte(aiUserFetchEventsHourly.tsHour, windowEndIso)
|
|
6409
|
+
)
|
|
6410
|
+
).get()?.total ?? 0
|
|
6411
|
+
);
|
|
6388
6412
|
const verifiedCurrent = sumVerifiedCrawlers(headlineStart, headlineEnd);
|
|
6389
6413
|
const verifiedPrior = sumVerifiedCrawlers(priorStart, headlineStart, true);
|
|
6390
6414
|
const unverifiedCurrent = sumUnverifiedCrawlers(headlineStart, headlineEnd);
|
|
6391
6415
|
const unverifiedPrior = sumUnverifiedCrawlers(priorStart, headlineStart, true);
|
|
6416
|
+
const userFetchCurrent = sumUserFetches(headlineStart, headlineEnd);
|
|
6417
|
+
const userFetchPrior = sumUserFetches(priorStart, headlineStart, true);
|
|
6392
6418
|
const referralCurrent = sumReferrals(headlineStart, headlineEnd);
|
|
6393
6419
|
const referralPrior = sumReferrals(priorStart, headlineStart, true);
|
|
6394
6420
|
const crawlerByOperatorRows = db.select({
|
|
@@ -6424,11 +6450,21 @@ function buildServerActivity(db, projectId) {
|
|
|
6424
6450
|
lte(aiReferralEventsHourly.tsHour, headlineEnd)
|
|
6425
6451
|
)
|
|
6426
6452
|
).groupBy(aiReferralEventsHourly.operator).all();
|
|
6453
|
+
const userFetchByOperatorRows = db.select({
|
|
6454
|
+
operator: aiUserFetchEventsHourly.operator,
|
|
6455
|
+
hits: sql5`COALESCE(SUM(${aiUserFetchEventsHourly.hits}), 0)`
|
|
6456
|
+
}).from(aiUserFetchEventsHourly).where(
|
|
6457
|
+
and9(
|
|
6458
|
+
eq14(aiUserFetchEventsHourly.projectId, projectId),
|
|
6459
|
+
gte2(aiUserFetchEventsHourly.tsHour, headlineStart),
|
|
6460
|
+
lte(aiUserFetchEventsHourly.tsHour, headlineEnd)
|
|
6461
|
+
)
|
|
6462
|
+
).groupBy(aiUserFetchEventsHourly.operator).all();
|
|
6427
6463
|
const operatorAgg = /* @__PURE__ */ new Map();
|
|
6428
6464
|
const ensureOp = (op) => {
|
|
6429
6465
|
let entry = operatorAgg.get(op);
|
|
6430
6466
|
if (!entry) {
|
|
6431
|
-
entry = { verified: 0, unverified: 0, referrals: 0, prior: 0 };
|
|
6467
|
+
entry = { verified: 0, unverified: 0, userFetch: 0, referrals: 0, prior: 0 };
|
|
6432
6468
|
operatorAgg.set(op, entry);
|
|
6433
6469
|
}
|
|
6434
6470
|
return entry;
|
|
@@ -6441,6 +6477,9 @@ function buildServerActivity(db, projectId) {
|
|
|
6441
6477
|
for (const r of crawlerByOperatorPriorRows) {
|
|
6442
6478
|
ensureOp(r.operator).prior += Number(r.hits);
|
|
6443
6479
|
}
|
|
6480
|
+
for (const r of userFetchByOperatorRows) {
|
|
6481
|
+
ensureOp(r.operator).userFetch += Number(r.hits);
|
|
6482
|
+
}
|
|
6444
6483
|
for (const r of referralByOperatorRows) {
|
|
6445
6484
|
ensureOp(r.operator).referrals += Number(r.hits);
|
|
6446
6485
|
}
|
|
@@ -6448,10 +6487,11 @@ function buildServerActivity(db, projectId) {
|
|
|
6448
6487
|
operator,
|
|
6449
6488
|
verifiedHits: v.verified,
|
|
6450
6489
|
unverifiedHits: v.unverified,
|
|
6490
|
+
userFetchHits: v.userFetch,
|
|
6451
6491
|
referralArrivals: v.referrals,
|
|
6452
6492
|
deltaPct: deltaPercent(v.verified, v.prior)
|
|
6453
6493
|
})).sort(
|
|
6454
|
-
(a, b) => b.verifiedHits - a.verifiedHits || b.unverifiedHits - a.unverifiedHits || b.referralArrivals - a.referralArrivals
|
|
6494
|
+
(a, b) => b.verifiedHits - a.verifiedHits || b.userFetchHits - a.userFetchHits || b.unverifiedHits - a.unverifiedHits || b.referralArrivals - a.referralArrivals
|
|
6455
6495
|
);
|
|
6456
6496
|
const topPathsRows = db.select({
|
|
6457
6497
|
path: crawlerEventsHourly.pathNormalized,
|
|
@@ -6526,14 +6566,30 @@ function buildServerActivity(db, projectId) {
|
|
|
6526
6566
|
lte(aiReferralEventsHourly.tsHour, headlineEnd)
|
|
6527
6567
|
)
|
|
6528
6568
|
).groupBy(sql5`SUBSTR(${aiReferralEventsHourly.tsHour}, 1, 10)`).all();
|
|
6569
|
+
const userFetchTrendRows = db.select({
|
|
6570
|
+
date: sql5`SUBSTR(${aiUserFetchEventsHourly.tsHour}, 1, 10)`,
|
|
6571
|
+
hits: sql5`COALESCE(SUM(${aiUserFetchEventsHourly.hits}), 0)`
|
|
6572
|
+
}).from(aiUserFetchEventsHourly).where(
|
|
6573
|
+
and9(
|
|
6574
|
+
eq14(aiUserFetchEventsHourly.projectId, projectId),
|
|
6575
|
+
gte2(aiUserFetchEventsHourly.tsHour, trendStart),
|
|
6576
|
+
lte(aiUserFetchEventsHourly.tsHour, headlineEnd)
|
|
6577
|
+
)
|
|
6578
|
+
).groupBy(sql5`SUBSTR(${aiUserFetchEventsHourly.tsHour}, 1, 10)`).all();
|
|
6579
|
+
const emptyTrendEntry = () => ({ verifiedCrawlerHits: 0, userFetchHits: 0, referralArrivals: 0 });
|
|
6529
6580
|
const dailyTrendMap = /* @__PURE__ */ new Map();
|
|
6530
6581
|
for (const r of crawlerTrendRows) {
|
|
6531
|
-
const e = dailyTrendMap.get(r.date) ??
|
|
6582
|
+
const e = dailyTrendMap.get(r.date) ?? emptyTrendEntry();
|
|
6532
6583
|
e.verifiedCrawlerHits += Number(r.hits);
|
|
6533
6584
|
dailyTrendMap.set(r.date, e);
|
|
6534
6585
|
}
|
|
6586
|
+
for (const r of userFetchTrendRows) {
|
|
6587
|
+
const e = dailyTrendMap.get(r.date) ?? emptyTrendEntry();
|
|
6588
|
+
e.userFetchHits += Number(r.hits);
|
|
6589
|
+
dailyTrendMap.set(r.date, e);
|
|
6590
|
+
}
|
|
6535
6591
|
for (const r of referralTrendRows) {
|
|
6536
|
-
const e = dailyTrendMap.get(r.date) ??
|
|
6592
|
+
const e = dailyTrendMap.get(r.date) ?? emptyTrendEntry();
|
|
6537
6593
|
e.referralArrivals += Number(r.hits);
|
|
6538
6594
|
dailyTrendMap.set(r.date, e);
|
|
6539
6595
|
}
|
|
@@ -6541,7 +6597,7 @@ function buildServerActivity(db, projectId) {
|
|
|
6541
6597
|
return {
|
|
6542
6598
|
windowStart: headlineStart,
|
|
6543
6599
|
windowEnd: headlineEnd,
|
|
6544
|
-
hasData: verifiedCurrent + unverifiedCurrent + referralCurrent + verifiedPrior + unverifiedPrior + referralPrior > 0 || byOperator.length > 0 || topCrawledPaths.length > 0 || referralProducts.length > 0,
|
|
6600
|
+
hasData: verifiedCurrent + unverifiedCurrent + userFetchCurrent + referralCurrent + verifiedPrior + unverifiedPrior + userFetchPrior + referralPrior > 0 || byOperator.length > 0 || topCrawledPaths.length > 0 || referralProducts.length > 0,
|
|
6545
6601
|
verifiedCrawlerHits: {
|
|
6546
6602
|
current: verifiedCurrent,
|
|
6547
6603
|
prior: verifiedPrior,
|
|
@@ -6552,6 +6608,11 @@ function buildServerActivity(db, projectId) {
|
|
|
6552
6608
|
prior: unverifiedPrior,
|
|
6553
6609
|
deltaPct: deltaPercent(unverifiedCurrent, unverifiedPrior)
|
|
6554
6610
|
},
|
|
6611
|
+
aiUserFetchHits: {
|
|
6612
|
+
current: userFetchCurrent,
|
|
6613
|
+
prior: userFetchPrior,
|
|
6614
|
+
deltaPct: deltaPercent(userFetchCurrent, userFetchPrior)
|
|
6615
|
+
},
|
|
6555
6616
|
referralArrivals: {
|
|
6556
6617
|
current: referralCurrent,
|
|
6557
6618
|
prior: referralPrior,
|
|
@@ -11458,14 +11519,14 @@ var routeCatalog = [
|
|
|
11458
11519
|
{
|
|
11459
11520
|
method: "get",
|
|
11460
11521
|
path: "/api/v1/projects/{name}/traffic/events",
|
|
11461
|
-
summary: "List rolled-up crawler hits and AI-referral sessions within a window",
|
|
11462
|
-
description: "Returns hourly rollup rows from `crawler_events_hourly` and `ai_referral_events_hourly`. Defaults to the last 24h. Totals reflect the full window; the `events` array is capped by `limit` (default 500, max 5000).",
|
|
11522
|
+
summary: "List rolled-up crawler hits, AI user-fetch hits, and AI-referral sessions within a window",
|
|
11523
|
+
description: "Returns hourly rollup rows from `crawler_events_hourly`, `ai_user_fetch_events_hourly`, and `ai_referral_events_hourly`. Defaults to the last 24h. Totals reflect the full window; the `events` array is capped by `limit` (default 500, max 5000).",
|
|
11463
11524
|
tags: ["traffic"],
|
|
11464
11525
|
parameters: [
|
|
11465
11526
|
nameParameter,
|
|
11466
11527
|
{ name: "since", in: "query", description: "ISO-8601 window start (defaults to 24h ago).", schema: stringSchema },
|
|
11467
11528
|
{ name: "until", in: "query", description: "ISO-8601 window end (defaults to now).", schema: stringSchema },
|
|
11468
|
-
{ name: "kind", in: "query", description: 'Filter to "crawler", "ai-referral", or "all" (default).', schema: stringSchema },
|
|
11529
|
+
{ name: "kind", in: "query", description: 'Filter to "crawler", "ai-user-fetch", "ai-referral", or "all" (default).', schema: stringSchema },
|
|
11469
11530
|
{ name: "limit", in: "query", description: "Max rows per kind in the events array (default 500, max 5000).", schema: stringSchema },
|
|
11470
11531
|
{ name: "sourceId", in: "query", description: "Restrict to a single traffic source.", schema: stringSchema }
|
|
11471
11532
|
],
|
|
@@ -19168,6 +19229,74 @@ var chatgpt_user_default = {
|
|
|
19168
19229
|
]
|
|
19169
19230
|
};
|
|
19170
19231
|
|
|
19232
|
+
// ../integration-traffic/src/ip-ranges/google-user-triggered-agents.json
|
|
19233
|
+
var google_user_triggered_agents_default = {
|
|
19234
|
+
_source: "https://developers.google.com/static/crawling/ipranges/user-triggered-agents.json",
|
|
19235
|
+
creationTime: "2026-05-19T14:46:15.000000",
|
|
19236
|
+
prefixes: [
|
|
19237
|
+
{
|
|
19238
|
+
ipv6Prefix: "2001:4860:c::/124"
|
|
19239
|
+
},
|
|
19240
|
+
{
|
|
19241
|
+
ipv6Prefix: "2001:4860:c::10/124"
|
|
19242
|
+
},
|
|
19243
|
+
{
|
|
19244
|
+
ipv6Prefix: "2001:4860:c::20/124"
|
|
19245
|
+
},
|
|
19246
|
+
{
|
|
19247
|
+
ipv6Prefix: "2001:4860:c::30/124"
|
|
19248
|
+
},
|
|
19249
|
+
{
|
|
19250
|
+
ipv6Prefix: "2001:4860:c::40/124"
|
|
19251
|
+
},
|
|
19252
|
+
{
|
|
19253
|
+
ipv6Prefix: "2001:4860:c::50/124"
|
|
19254
|
+
},
|
|
19255
|
+
{
|
|
19256
|
+
ipv6Prefix: "2001:4860:c::60/124"
|
|
19257
|
+
},
|
|
19258
|
+
{
|
|
19259
|
+
ipv6Prefix: "2001:4860:c::70/124"
|
|
19260
|
+
},
|
|
19261
|
+
{
|
|
19262
|
+
ipv4Prefix: "136.121.16.0/24"
|
|
19263
|
+
},
|
|
19264
|
+
{
|
|
19265
|
+
ipv4Prefix: "136.121.24.0/21"
|
|
19266
|
+
},
|
|
19267
|
+
{
|
|
19268
|
+
ipv4Prefix: "136.121.40.0/21"
|
|
19269
|
+
},
|
|
19270
|
+
{
|
|
19271
|
+
ipv4Prefix: "136.122.0.0/16"
|
|
19272
|
+
},
|
|
19273
|
+
{
|
|
19274
|
+
ipv4Prefix: "74.125.232.0/28"
|
|
19275
|
+
},
|
|
19276
|
+
{
|
|
19277
|
+
ipv4Prefix: "74.125.232.112/28"
|
|
19278
|
+
},
|
|
19279
|
+
{
|
|
19280
|
+
ipv4Prefix: "74.125.232.16/28"
|
|
19281
|
+
},
|
|
19282
|
+
{
|
|
19283
|
+
ipv4Prefix: "74.125.232.32/28"
|
|
19284
|
+
},
|
|
19285
|
+
{
|
|
19286
|
+
ipv4Prefix: "74.125.232.48/28"
|
|
19287
|
+
},
|
|
19288
|
+
{
|
|
19289
|
+
ipv4Prefix: "74.125.232.64/28"
|
|
19290
|
+
},
|
|
19291
|
+
{
|
|
19292
|
+
ipv4Prefix: "74.125.232.80/28"
|
|
19293
|
+
},
|
|
19294
|
+
{
|
|
19295
|
+
ipv4Prefix: "74.125.232.96/28"
|
|
19296
|
+
}
|
|
19297
|
+
]
|
|
19298
|
+
};
|
|
19299
|
+
|
|
19171
19300
|
// ../integration-traffic/src/ip-ranges/googlebot.json
|
|
19172
19301
|
var googlebot_default = {
|
|
19173
19302
|
_source: "https://developers.google.com/static/search/apis/ipranges/googlebot.json",
|
|
@@ -20366,6 +20495,12 @@ var RULE_ID_TO_RANGES = {
|
|
|
20366
20495
|
// (also covers Copilot grounding — Microsoft routes Copilot's
|
|
20367
20496
|
// web fetches through bingbot infrastructure)
|
|
20368
20497
|
"bingbot": bingbot_default,
|
|
20498
|
+
// Google-Agent — Google's agentic user-triggered fetcher (Project
|
|
20499
|
+
// Mariner et al.). Verified against Google's user-triggered-agents
|
|
20500
|
+
// list, which covers every Google user-triggered fetcher collectively
|
|
20501
|
+
// (Google publishes no per-fetcher split).
|
|
20502
|
+
// src: https://developers.google.com/static/crawling/ipranges/user-triggered-agents.json
|
|
20503
|
+
"google-agent": google_user_triggered_agents_default,
|
|
20369
20504
|
// Perplexity — split between crawler and user-on-behalf fetcher,
|
|
20370
20505
|
// same shape as OpenAI's split.
|
|
20371
20506
|
// src: https://www.perplexity.ai/perplexitybot.json
|
|
@@ -20377,10 +20512,12 @@ var RULE_ID_TO_RANGES = {
|
|
|
20377
20512
|
// PBC at ARIN (the authoritative allocation record). Maintained by
|
|
20378
20513
|
// hand; refresh by re-querying the ARIN entity below. The crawler
|
|
20379
20514
|
// block is AWS-ANTHROPIC 216.73.216.0/22 — empirical Cloud Run
|
|
20380
|
-
// logs show all real ClaudeBot traffic comes from there.
|
|
20381
|
-
// set is shared across every Claude-* UA the classifier emits
|
|
20515
|
+
// logs show all real ClaudeBot traffic comes from there. The same
|
|
20516
|
+
// raw set is shared across every Claude-* UA the classifier emits:
|
|
20517
|
+
// both the training crawler and the per-user fetcher map here.
|
|
20382
20518
|
// src: https://rdap.arin.net/registry/entity/AP-2440
|
|
20383
|
-
"anthropic-claudebot": anthropic_default
|
|
20519
|
+
"anthropic-claudebot": anthropic_default,
|
|
20520
|
+
"claude-user": anthropic_default
|
|
20384
20521
|
};
|
|
20385
20522
|
var CACHE = (() => {
|
|
20386
20523
|
const cache = /* @__PURE__ */ new Map();
|
|
@@ -20492,9 +20629,11 @@ var DEFAULT_AI_CRAWLER_RULES = [
|
|
|
20492
20629
|
// Anthropic ships several Claude-* crawlers (ClaudeBot for training,
|
|
20493
20630
|
// Claude-Web for chat fetches, Claude-SearchBot for search). The
|
|
20494
20631
|
// `Claude-` prefix + `Bot/` suffix is the stable shape — pattern is
|
|
20495
|
-
// permissive enough to catch new Claude-* variants as Anthropic
|
|
20632
|
+
// permissive enough to catch new Claude-*Bot variants as Anthropic
|
|
20496
20633
|
// adds them, without matching unrelated UAs that happen to mention
|
|
20497
|
-
// "claude".
|
|
20634
|
+
// "claude". The per-user fetcher `Claude-User` has no `Bot/` suffix
|
|
20635
|
+
// and is intentionally NOT matched here — it routes through the
|
|
20636
|
+
// separate `claude-user` rule below (purpose: 'user-agent').
|
|
20498
20637
|
userAgentPatterns: [
|
|
20499
20638
|
/ClaudeBot\//i,
|
|
20500
20639
|
/Claude-Web\//i,
|
|
@@ -20503,6 +20642,21 @@ var DEFAULT_AI_CRAWLER_RULES = [
|
|
|
20503
20642
|
/anthropic-ai/i
|
|
20504
20643
|
]
|
|
20505
20644
|
},
|
|
20645
|
+
{
|
|
20646
|
+
// Anthropic's on-behalf-of-user fetcher: Claude fetches a URL when
|
|
20647
|
+
// a person asks about it mid-conversation (citation click, "read
|
|
20648
|
+
// this page" prompt). Distinct from ClaudeBot (training crawl) —
|
|
20649
|
+
// same operator, opposite operational signal, mirroring OpenAI's
|
|
20650
|
+
// GPTBot vs. ChatGPT-User split. The `anthropic-claudebot` rule
|
|
20651
|
+
// above does not match `Claude-User/` (its `Claude-[A-Z]+Bot/`
|
|
20652
|
+
// pattern needs a `Bot/` suffix), so this is the only rule that
|
|
20653
|
+
// routes it — into the user-fetch bucket, not bulk crawl.
|
|
20654
|
+
id: "claude-user",
|
|
20655
|
+
operator: "Anthropic",
|
|
20656
|
+
product: "Claude-User",
|
|
20657
|
+
purpose: "user-agent",
|
|
20658
|
+
userAgentPatterns: [/Claude-User\//i]
|
|
20659
|
+
},
|
|
20506
20660
|
{
|
|
20507
20661
|
id: "perplexity-bot",
|
|
20508
20662
|
operator: "Perplexity",
|
|
@@ -20528,6 +20682,21 @@ var DEFAULT_AI_CRAWLER_RULES = [
|
|
|
20528
20682
|
purpose: "training-control",
|
|
20529
20683
|
userAgentPatterns: [/Google-Extended/i]
|
|
20530
20684
|
},
|
|
20685
|
+
{
|
|
20686
|
+
// Google-Agent: agents on Google infrastructure that navigate the
|
|
20687
|
+
// web and act "upon user request" (e.g. Project Mariner) — a
|
|
20688
|
+
// user-driven fetch, routed to the user-fetch bucket. Google ships
|
|
20689
|
+
// no distinct Gemini fetch UA (`Google-Extended` above is a
|
|
20690
|
+
// robots.txt control token, not a request UA), so this is the
|
|
20691
|
+
// closest Google equivalent to ChatGPT-User. The UA is browser-like
|
|
20692
|
+
// with a `compatible; Google-Agent;` token. IP ranges:
|
|
20693
|
+
// user-triggered-agents.json.
|
|
20694
|
+
id: "google-agent",
|
|
20695
|
+
operator: "Google",
|
|
20696
|
+
product: "Google-Agent",
|
|
20697
|
+
purpose: "user-agent",
|
|
20698
|
+
userAgentPatterns: [/Google-Agent/i]
|
|
20699
|
+
},
|
|
20531
20700
|
{
|
|
20532
20701
|
id: "bytespider",
|
|
20533
20702
|
operator: "ByteDance",
|
|
@@ -20582,16 +20751,23 @@ var DEFAULT_AI_CRAWLER_RULES = [
|
|
|
20582
20751
|
userAgentPatterns: [/Diffbot/i]
|
|
20583
20752
|
},
|
|
20584
20753
|
{
|
|
20585
|
-
|
|
20754
|
+
// Per-user, on-demand fetches initiated by a Mistral user (citation
|
|
20755
|
+
// click, "read this URL" prompt). Separate from MistralBot (crawl)
|
|
20756
|
+
// so the dashboard's user-fetch vs. bulk-crawl split stays honest.
|
|
20757
|
+
id: "mistral-ai-user",
|
|
20586
20758
|
operator: "Mistral AI",
|
|
20587
20759
|
product: "MistralAI-User",
|
|
20760
|
+
purpose: "user-agent",
|
|
20761
|
+
userAgentPatterns: [/MistralAI-User\//i]
|
|
20762
|
+
},
|
|
20763
|
+
{
|
|
20764
|
+
// Mistral's general crawler. Distinct from MistralAI-User (per-user
|
|
20765
|
+
// fetch) — same operator, different operational signal.
|
|
20766
|
+
id: "mistral-bot",
|
|
20767
|
+
operator: "Mistral AI",
|
|
20768
|
+
product: "MistralBot",
|
|
20588
20769
|
purpose: "crawl",
|
|
20589
|
-
|
|
20590
|
-
// fetches) and `MistralBot/*` (general crawler). Earlier rule only
|
|
20591
|
-
// matched `MistralAI` and missed the bot — caught on 2026-05-18
|
|
20592
|
-
// when canonry.ai/canonry-landing's classification chart went flat
|
|
20593
|
-
// and the bot UA was sitting in the `unknown` bucket.
|
|
20594
|
-
userAgentPatterns: [/MistralAI/i, /MistralBot/i]
|
|
20770
|
+
userAgentPatterns: [/MistralBot\//i]
|
|
20595
20771
|
},
|
|
20596
20772
|
{
|
|
20597
20773
|
id: "deepseek",
|
|
@@ -20600,6 +20776,21 @@ var DEFAULT_AI_CRAWLER_RULES = [
|
|
|
20600
20776
|
purpose: "training",
|
|
20601
20777
|
userAgentPatterns: [/DeepSeekBot/i]
|
|
20602
20778
|
},
|
|
20779
|
+
{
|
|
20780
|
+
id: "xai-grok-bot",
|
|
20781
|
+
operator: "xAI",
|
|
20782
|
+
product: "xAI-Bot",
|
|
20783
|
+
purpose: "crawl",
|
|
20784
|
+
// xAI documents its crawler at https://x.ai/bots/ as `xAI-Bot/<version>`.
|
|
20785
|
+
// Operators have also observed `Grok-Bot/...` in production logs. xAI
|
|
20786
|
+
// has been less consistent than OpenAI/Anthropic about publishing every
|
|
20787
|
+
// UA variant they ship, so the pattern is intentionally permissive
|
|
20788
|
+
// across the xAI/Grok family — better to over-match the operator than
|
|
20789
|
+
// leave real hits in the `unknown` bucket. A separate `purpose:
|
|
20790
|
+
// 'user-agent'` Grok rule can be added later if xAI ships a citation
|
|
20791
|
+
// user-fetcher UA (the way OpenAI ships ChatGPT-User alongside GPTBot).
|
|
20792
|
+
userAgentPatterns: [/xAI-Bot\//i, /Grok-Bot\//i, /GrokBot\//i]
|
|
20793
|
+
},
|
|
20603
20794
|
// Classic search-engine crawlers. Not strictly "AI" by training origin,
|
|
20604
20795
|
// but the same audience: machine traffic indexing the site for query
|
|
20605
20796
|
// surfaces. Operators tracking AI visibility want this signal too —
|
|
@@ -20662,12 +20853,14 @@ var DEFAULT_AI_REFERRER_RULES = [
|
|
|
20662
20853
|
{ domain: AI_ENGINE_DOMAINS.claude, operator: "Anthropic", product: "Claude" },
|
|
20663
20854
|
{ domain: AI_ENGINE_DOMAINS.gemini, operator: "Google", product: "Gemini" },
|
|
20664
20855
|
{ domain: AI_ENGINE_DOMAINS.copilotMicrosoft, operator: "Microsoft", product: "Copilot" },
|
|
20856
|
+
{ domain: AI_ENGINE_DOMAINS.grok, operator: "xAI", product: "Grok" },
|
|
20665
20857
|
{ domain: AI_ENGINE_DOMAINS.phind, operator: "Phind", product: "Phind" },
|
|
20666
20858
|
{ domain: AI_ENGINE_DOMAINS.you, operator: "You.com", product: "You.com" },
|
|
20667
20859
|
{ domain: AI_ENGINE_DOMAINS.metaAi, operator: "Meta", product: "Meta AI" }
|
|
20668
20860
|
];
|
|
20669
20861
|
|
|
20670
20862
|
// ../integration-traffic/src/classifier.ts
|
|
20863
|
+
var USER_FETCH_PURPOSE = "user-agent";
|
|
20671
20864
|
function normalizeHost(host) {
|
|
20672
20865
|
return host.trim().toLowerCase().replace(/^www\./, "");
|
|
20673
20866
|
}
|
|
@@ -20708,6 +20901,7 @@ function classifyCrawler(event) {
|
|
|
20708
20901
|
const userAgent = event.userAgent?.trim();
|
|
20709
20902
|
if (!userAgent) return null;
|
|
20710
20903
|
for (const rule of DEFAULT_AI_CRAWLER_RULES) {
|
|
20904
|
+
if (rule.purpose === USER_FETCH_PURPOSE) continue;
|
|
20711
20905
|
if (rule.userAgentPatterns.some((pattern) => pattern.test(userAgent))) {
|
|
20712
20906
|
const verified = verifyIpForRule(event.remoteIp, rule.id);
|
|
20713
20907
|
return {
|
|
@@ -20722,6 +20916,24 @@ function classifyCrawler(event) {
|
|
|
20722
20916
|
}
|
|
20723
20917
|
return null;
|
|
20724
20918
|
}
|
|
20919
|
+
function classifyAiUserFetch(event) {
|
|
20920
|
+
const userAgent = event.userAgent?.trim();
|
|
20921
|
+
if (!userAgent) return null;
|
|
20922
|
+
for (const rule of DEFAULT_AI_CRAWLER_RULES) {
|
|
20923
|
+
if (rule.purpose !== USER_FETCH_PURPOSE) continue;
|
|
20924
|
+
if (rule.userAgentPatterns.some((pattern) => pattern.test(userAgent))) {
|
|
20925
|
+
const verified = verifyIpForRule(event.remoteIp, rule.id);
|
|
20926
|
+
return {
|
|
20927
|
+
botId: rule.id,
|
|
20928
|
+
operator: rule.operator,
|
|
20929
|
+
product: rule.product,
|
|
20930
|
+
verificationStatus: verified ? "verified" : "claimed_unverified",
|
|
20931
|
+
matchedUserAgent: userAgent
|
|
20932
|
+
};
|
|
20933
|
+
}
|
|
20934
|
+
}
|
|
20935
|
+
return null;
|
|
20936
|
+
}
|
|
20725
20937
|
function classifyAiReferral(event) {
|
|
20726
20938
|
const refererHost = hostFromUrl(event.referer);
|
|
20727
20939
|
if (refererHost) {
|
|
@@ -20864,6 +21076,9 @@ function strongerReferralEvidence(current, next) {
|
|
|
20864
21076
|
function sortCrawlerBuckets(a, b) {
|
|
20865
21077
|
return a.tsHour.localeCompare(b.tsHour) || a.botId.localeCompare(b.botId) || a.pathNormalized.localeCompare(b.pathNormalized) || String(a.status).localeCompare(String(b.status));
|
|
20866
21078
|
}
|
|
21079
|
+
function sortAiUserFetchBuckets(a, b) {
|
|
21080
|
+
return a.tsHour.localeCompare(b.tsHour) || a.botId.localeCompare(b.botId) || a.pathNormalized.localeCompare(b.pathNormalized) || String(a.status).localeCompare(String(b.status));
|
|
21081
|
+
}
|
|
20867
21082
|
function sortReferralBuckets(a, b) {
|
|
20868
21083
|
return a.tsHour.localeCompare(b.tsHour) || a.product.localeCompare(b.product) || a.sourceDomain.localeCompare(b.sourceDomain) || a.landingPathNormalized.localeCompare(b.landingPathNormalized) || String(a.status).localeCompare(String(b.status));
|
|
20869
21084
|
}
|
|
@@ -20875,13 +21090,17 @@ function buildTrafficProbeReport(events, options = {}) {
|
|
|
20875
21090
|
const configuredSessionWindowMs = options.aiReferralSessionWindowMs ?? DEFAULT_AI_REFERRAL_SESSION_WINDOW_MS;
|
|
20876
21091
|
const aiReferralSessionWindowMs = configuredSessionWindowMs > 0 ? configuredSessionWindowMs : DEFAULT_AI_REFERRAL_SESSION_WINDOW_MS;
|
|
20877
21092
|
const crawlerBuckets = /* @__PURE__ */ new Map();
|
|
21093
|
+
const aiUserFetchBuckets = /* @__PURE__ */ new Map();
|
|
20878
21094
|
const aiReferralBuckets = /* @__PURE__ */ new Map();
|
|
20879
21095
|
const aiReferralSessions = /* @__PURE__ */ new Map();
|
|
20880
21096
|
const topBots = /* @__PURE__ */ new Map();
|
|
20881
21097
|
const topCrawlerPaths = /* @__PURE__ */ new Map();
|
|
21098
|
+
const topAiUserFetchBots = /* @__PURE__ */ new Map();
|
|
21099
|
+
const topAiUserFetchPaths = /* @__PURE__ */ new Map();
|
|
20882
21100
|
const topAiReferrers = /* @__PURE__ */ new Map();
|
|
20883
21101
|
const topAiReferralLandingPaths = /* @__PURE__ */ new Map();
|
|
20884
21102
|
let crawlerHits = 0;
|
|
21103
|
+
let aiUserFetchHits = 0;
|
|
20885
21104
|
let aiReferralHits = 0;
|
|
20886
21105
|
let unknownHits = 0;
|
|
20887
21106
|
const samples = [];
|
|
@@ -20889,6 +21108,7 @@ function buildTrafficProbeReport(events, options = {}) {
|
|
|
20889
21108
|
const tsHour = hourBucket(event.observedAt);
|
|
20890
21109
|
const pathNormalized = normalizeTrafficPathPattern(event.path);
|
|
20891
21110
|
const crawler = classifyCrawler(event);
|
|
21111
|
+
const aiUserFetch = classifyAiUserFetch(event);
|
|
20892
21112
|
const aiReferral = classifyAiReferral(event);
|
|
20893
21113
|
if (crawler) {
|
|
20894
21114
|
crawlerHits += 1;
|
|
@@ -20921,6 +21141,37 @@ function buildTrafficProbeReport(events, options = {}) {
|
|
|
20921
21141
|
else topBots.set(botKey, { fields: { botId: crawler.botId, operator: crawler.operator }, hits: 1 });
|
|
20922
21142
|
incrementBucket(topCrawlerPaths, pathNormalized, { pathNormalized });
|
|
20923
21143
|
}
|
|
21144
|
+
if (aiUserFetch) {
|
|
21145
|
+
aiUserFetchHits += 1;
|
|
21146
|
+
const key = [
|
|
21147
|
+
tsHour,
|
|
21148
|
+
aiUserFetch.botId,
|
|
21149
|
+
aiUserFetch.verificationStatus,
|
|
21150
|
+
pathNormalized,
|
|
21151
|
+
event.status ?? "null"
|
|
21152
|
+
].join(" ");
|
|
21153
|
+
const existing = aiUserFetchBuckets.get(key);
|
|
21154
|
+
if (existing) {
|
|
21155
|
+
existing.hits += 1;
|
|
21156
|
+
} else {
|
|
21157
|
+
aiUserFetchBuckets.set(key, {
|
|
21158
|
+
tsHour,
|
|
21159
|
+
botId: aiUserFetch.botId,
|
|
21160
|
+
operator: aiUserFetch.operator,
|
|
21161
|
+
product: aiUserFetch.product,
|
|
21162
|
+
verificationStatus: aiUserFetch.verificationStatus,
|
|
21163
|
+
pathNormalized,
|
|
21164
|
+
status: event.status,
|
|
21165
|
+
hits: 1,
|
|
21166
|
+
sampledUserAgent: event.userAgent
|
|
21167
|
+
});
|
|
21168
|
+
}
|
|
21169
|
+
const botKey = `${aiUserFetch.botId} ${aiUserFetch.operator}`;
|
|
21170
|
+
const botEntry = topAiUserFetchBots.get(botKey);
|
|
21171
|
+
if (botEntry) botEntry.hits += 1;
|
|
21172
|
+
else topAiUserFetchBots.set(botKey, { fields: { botId: aiUserFetch.botId, operator: aiUserFetch.operator }, hits: 1 });
|
|
21173
|
+
incrementBucket(topAiUserFetchPaths, pathNormalized, { pathNormalized });
|
|
21174
|
+
}
|
|
20924
21175
|
if (aiReferral) {
|
|
20925
21176
|
aiReferralHits += 1;
|
|
20926
21177
|
const landingPathNormalized = resolveAiReferralLandingPath(event, aiReferral.evidenceType);
|
|
@@ -20939,7 +21190,7 @@ function buildTrafficProbeReport(events, options = {}) {
|
|
|
20939
21190
|
aiReferralSessions.set(key, existing ? strongerReferralEvidence(existing, session) : session);
|
|
20940
21191
|
}
|
|
20941
21192
|
}
|
|
20942
|
-
if (!crawler && !aiReferral) unknownHits += 1;
|
|
21193
|
+
if (!crawler && !aiUserFetch && !aiReferral) unknownHits += 1;
|
|
20943
21194
|
samples.push({
|
|
20944
21195
|
eventId: event.eventId,
|
|
20945
21196
|
observedAt: event.observedAt,
|
|
@@ -20950,6 +21201,7 @@ function buildTrafficProbeReport(events, options = {}) {
|
|
|
20950
21201
|
userAgent: event.userAgent,
|
|
20951
21202
|
referer: event.referer,
|
|
20952
21203
|
crawler,
|
|
21204
|
+
aiUserFetch,
|
|
20953
21205
|
aiReferral
|
|
20954
21206
|
});
|
|
20955
21207
|
if (samples.length > sampleLimit) samples.shift();
|
|
@@ -20985,14 +21237,18 @@ function buildTrafficProbeReport(events, options = {}) {
|
|
|
20985
21237
|
totals: {
|
|
20986
21238
|
normalizedEvents: events.length,
|
|
20987
21239
|
crawlerHits,
|
|
21240
|
+
aiUserFetchHits,
|
|
20988
21241
|
aiReferralSessions: aiReferralSessions.size,
|
|
20989
21242
|
aiReferralHits,
|
|
20990
21243
|
unknownHits
|
|
20991
21244
|
},
|
|
20992
21245
|
crawlerEventsHourly: [...crawlerBuckets.values()].sort(sortCrawlerBuckets),
|
|
21246
|
+
aiUserFetchEventsHourly: [...aiUserFetchBuckets.values()].sort(sortAiUserFetchBuckets),
|
|
20993
21247
|
aiReferralEventsHourly: [...aiReferralBuckets.values()].sort(sortReferralBuckets),
|
|
20994
21248
|
topBots: topEntries(topBots, 10),
|
|
20995
21249
|
topCrawlerPaths: topEntries(topCrawlerPaths, 10),
|
|
21250
|
+
topAiUserFetchBots: topEntries(topAiUserFetchBots, 10),
|
|
21251
|
+
topAiUserFetchPaths: topEntries(topAiUserFetchPaths, 10),
|
|
20996
21252
|
topAiReferrers: topEntries(topAiReferrers, 10),
|
|
20997
21253
|
topAiReferralLandingPaths: topEntries(topAiReferralLandingPaths, 10),
|
|
20998
21254
|
samples
|
|
@@ -21434,6 +21690,13 @@ async function runBackfillTask(options) {
|
|
|
21434
21690
|
lte2(crawlerEventsHourly.tsHour, windowEndIso)
|
|
21435
21691
|
)
|
|
21436
21692
|
).run();
|
|
21693
|
+
tx.delete(aiUserFetchEventsHourly).where(
|
|
21694
|
+
and19(
|
|
21695
|
+
eq24(aiUserFetchEventsHourly.sourceId, sourceRow.id),
|
|
21696
|
+
gte3(aiUserFetchEventsHourly.tsHour, windowStartIso),
|
|
21697
|
+
lte2(aiUserFetchEventsHourly.tsHour, windowEndIso)
|
|
21698
|
+
)
|
|
21699
|
+
).run();
|
|
21437
21700
|
tx.delete(aiReferralEventsHourly).where(
|
|
21438
21701
|
and19(
|
|
21439
21702
|
eq24(aiReferralEventsHourly.sourceId, sourceRow.id),
|
|
@@ -21464,6 +21727,22 @@ async function runBackfillTask(options) {
|
|
|
21464
21727
|
updatedAt: finishedAt
|
|
21465
21728
|
}).run();
|
|
21466
21729
|
}
|
|
21730
|
+
for (const bucket of report.aiUserFetchEventsHourly) {
|
|
21731
|
+
tx.insert(aiUserFetchEventsHourly).values({
|
|
21732
|
+
projectId: project.id,
|
|
21733
|
+
sourceId: sourceRow.id,
|
|
21734
|
+
tsHour: bucket.tsHour,
|
|
21735
|
+
botId: bucket.botId,
|
|
21736
|
+
operator: bucket.operator,
|
|
21737
|
+
verificationStatus: bucket.verificationStatus,
|
|
21738
|
+
pathNormalized: bucket.pathNormalized,
|
|
21739
|
+
status: bucket.status ?? 0,
|
|
21740
|
+
hits: bucket.hits,
|
|
21741
|
+
sampledUserAgent: bucket.sampledUserAgent,
|
|
21742
|
+
createdAt: finishedAt,
|
|
21743
|
+
updatedAt: finishedAt
|
|
21744
|
+
}).run();
|
|
21745
|
+
}
|
|
21467
21746
|
for (const bucket of report.aiReferralEventsHourly) {
|
|
21468
21747
|
tx.insert(aiReferralEventsHourly).values({
|
|
21469
21748
|
projectId: project.id,
|
|
@@ -21482,7 +21761,7 @@ async function runBackfillTask(options) {
|
|
|
21482
21761
|
}).run();
|
|
21483
21762
|
}
|
|
21484
21763
|
for (const sample of report.samples) {
|
|
21485
|
-
const eventType = sample.crawler ? "crawler" : sample.aiReferral ? "ai_referral" : "unknown";
|
|
21764
|
+
const eventType = sample.crawler ? "crawler" : sample.aiUserFetch ? "ai_user_fetch" : sample.aiReferral ? "ai_referral" : "unknown";
|
|
21486
21765
|
const refererHost = (() => {
|
|
21487
21766
|
if (!sample.referer) return null;
|
|
21488
21767
|
try {
|
|
@@ -21504,6 +21783,7 @@ async function runBackfillTask(options) {
|
|
|
21504
21783
|
refererHost,
|
|
21505
21784
|
classifierDetailsJson: {
|
|
21506
21785
|
crawler: sample.crawler,
|
|
21786
|
+
aiUserFetch: sample.aiUserFetch,
|
|
21507
21787
|
aiReferral: sample.aiReferral
|
|
21508
21788
|
},
|
|
21509
21789
|
createdAt: finishedAt
|
|
@@ -21813,6 +22093,7 @@ async function trafficRoutes(app, opts) {
|
|
|
21813
22093
|
sourceId: sourceRow.id,
|
|
21814
22094
|
pulledEvents: 0,
|
|
21815
22095
|
crawlerHits: 0,
|
|
22096
|
+
aiUserFetchHits: 0,
|
|
21816
22097
|
aiReferralHits: 0,
|
|
21817
22098
|
durationMs: Date.now() - syncStartedAtMs,
|
|
21818
22099
|
errorCode
|
|
@@ -21964,11 +22245,13 @@ async function trafficRoutes(app, opts) {
|
|
|
21964
22245
|
allEvents = page.events;
|
|
21965
22246
|
}
|
|
21966
22247
|
let crawlerBucketRows = 0;
|
|
22248
|
+
let aiUserFetchBucketRows = 0;
|
|
21967
22249
|
let aiReferralBucketRows = 0;
|
|
21968
22250
|
let sampleRows = 0;
|
|
21969
22251
|
let finishedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
21970
22252
|
let pulledEventsCount = 0;
|
|
21971
22253
|
let crawlerHitsCount = 0;
|
|
22254
|
+
let aiUserFetchHitsCount = 0;
|
|
21972
22255
|
let aiReferralHitsCount = 0;
|
|
21973
22256
|
let unknownHitsCount = 0;
|
|
21974
22257
|
app.db.transaction((tx) => {
|
|
@@ -21990,6 +22273,7 @@ async function trafficRoutes(app, opts) {
|
|
|
21990
22273
|
finishedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
21991
22274
|
pulledEventsCount = report.totals.normalizedEvents;
|
|
21992
22275
|
crawlerHitsCount = report.totals.crawlerHits;
|
|
22276
|
+
aiUserFetchHitsCount = report.totals.aiUserFetchHits;
|
|
21993
22277
|
aiReferralHitsCount = report.totals.aiReferralHits;
|
|
21994
22278
|
unknownHitsCount = report.totals.unknownHits;
|
|
21995
22279
|
for (const bucket of report.crawlerEventsHourly) {
|
|
@@ -22025,6 +22309,39 @@ async function trafficRoutes(app, opts) {
|
|
|
22025
22309
|
}).run();
|
|
22026
22310
|
crawlerBucketRows += 1;
|
|
22027
22311
|
}
|
|
22312
|
+
for (const bucket of report.aiUserFetchEventsHourly) {
|
|
22313
|
+
const status = bucket.status ?? 0;
|
|
22314
|
+
tx.insert(aiUserFetchEventsHourly).values({
|
|
22315
|
+
projectId: project.id,
|
|
22316
|
+
sourceId: sourceRow.id,
|
|
22317
|
+
tsHour: bucket.tsHour,
|
|
22318
|
+
botId: bucket.botId,
|
|
22319
|
+
operator: bucket.operator,
|
|
22320
|
+
verificationStatus: bucket.verificationStatus,
|
|
22321
|
+
pathNormalized: bucket.pathNormalized,
|
|
22322
|
+
status,
|
|
22323
|
+
hits: bucket.hits,
|
|
22324
|
+
sampledUserAgent: bucket.sampledUserAgent,
|
|
22325
|
+
createdAt: finishedAt,
|
|
22326
|
+
updatedAt: finishedAt
|
|
22327
|
+
}).onConflictDoUpdate({
|
|
22328
|
+
target: [
|
|
22329
|
+
aiUserFetchEventsHourly.projectId,
|
|
22330
|
+
aiUserFetchEventsHourly.sourceId,
|
|
22331
|
+
aiUserFetchEventsHourly.tsHour,
|
|
22332
|
+
aiUserFetchEventsHourly.botId,
|
|
22333
|
+
aiUserFetchEventsHourly.verificationStatus,
|
|
22334
|
+
aiUserFetchEventsHourly.pathNormalized,
|
|
22335
|
+
aiUserFetchEventsHourly.status
|
|
22336
|
+
],
|
|
22337
|
+
set: {
|
|
22338
|
+
hits: sql10`${aiUserFetchEventsHourly.hits} + ${bucket.hits}`,
|
|
22339
|
+
sampledUserAgent: bucket.sampledUserAgent,
|
|
22340
|
+
updatedAt: finishedAt
|
|
22341
|
+
}
|
|
22342
|
+
}).run();
|
|
22343
|
+
aiUserFetchBucketRows += 1;
|
|
22344
|
+
}
|
|
22028
22345
|
for (const bucket of report.aiReferralEventsHourly) {
|
|
22029
22346
|
const status = bucket.status ?? 0;
|
|
22030
22347
|
tx.insert(aiReferralEventsHourly).values({
|
|
@@ -22060,7 +22377,7 @@ async function trafficRoutes(app, opts) {
|
|
|
22060
22377
|
aiReferralBucketRows += 1;
|
|
22061
22378
|
}
|
|
22062
22379
|
for (const sample of report.samples) {
|
|
22063
|
-
const eventType = sample.crawler ? "crawler" : sample.aiReferral ? "ai_referral" : "unknown";
|
|
22380
|
+
const eventType = sample.crawler ? "crawler" : sample.aiUserFetch ? "ai_user_fetch" : sample.aiReferral ? "ai_referral" : "unknown";
|
|
22064
22381
|
const refererHost = (() => {
|
|
22065
22382
|
if (!sample.referer) return null;
|
|
22066
22383
|
try {
|
|
@@ -22082,6 +22399,7 @@ async function trafficRoutes(app, opts) {
|
|
|
22082
22399
|
refererHost,
|
|
22083
22400
|
classifierDetailsJson: {
|
|
22084
22401
|
crawler: sample.crawler,
|
|
22402
|
+
aiUserFetch: sample.aiUserFetch,
|
|
22085
22403
|
aiReferral: sample.aiReferral
|
|
22086
22404
|
},
|
|
22087
22405
|
createdAt: finishedAt
|
|
@@ -22119,6 +22437,7 @@ async function trafficRoutes(app, opts) {
|
|
|
22119
22437
|
sourceId: sourceRow.id,
|
|
22120
22438
|
pulledEvents: pulledEventsCount,
|
|
22121
22439
|
crawlerHits: crawlerHitsCount,
|
|
22440
|
+
aiUserFetchHits: aiUserFetchHitsCount,
|
|
22122
22441
|
aiReferralHits: aiReferralHitsCount,
|
|
22123
22442
|
durationMs: Date.now() - syncStartedAtMs
|
|
22124
22443
|
});
|
|
@@ -22130,9 +22449,11 @@ async function trafficRoutes(app, opts) {
|
|
|
22130
22449
|
syncedAt: finishedAt,
|
|
22131
22450
|
pulledEvents: pulledEventsCount,
|
|
22132
22451
|
crawlerHits: crawlerHitsCount,
|
|
22452
|
+
aiUserFetchHits: aiUserFetchHitsCount,
|
|
22133
22453
|
aiReferralHits: aiReferralHitsCount,
|
|
22134
22454
|
unknownHits: unknownHitsCount,
|
|
22135
22455
|
crawlerBucketRows,
|
|
22456
|
+
aiUserFetchBucketRows,
|
|
22136
22457
|
aiReferralBucketRows,
|
|
22137
22458
|
sampleRows,
|
|
22138
22459
|
windowStart: windowStart.toISOString(),
|
|
@@ -22311,6 +22632,12 @@ async function trafficRoutes(app, opts) {
|
|
|
22311
22632
|
gte3(crawlerEventsHourly.tsHour, since)
|
|
22312
22633
|
)
|
|
22313
22634
|
).get();
|
|
22635
|
+
const aiUserFetchTotals = app.db.select({ total: sql10`COALESCE(SUM(${aiUserFetchEventsHourly.hits}), 0)` }).from(aiUserFetchEventsHourly).where(
|
|
22636
|
+
and19(
|
|
22637
|
+
eq24(aiUserFetchEventsHourly.sourceId, row.id),
|
|
22638
|
+
gte3(aiUserFetchEventsHourly.tsHour, since)
|
|
22639
|
+
)
|
|
22640
|
+
).get();
|
|
22314
22641
|
const aiTotals = app.db.select({ total: sql10`COALESCE(SUM(${aiReferralEventsHourly.sessionsOrHits}), 0)` }).from(aiReferralEventsHourly).where(
|
|
22315
22642
|
and19(
|
|
22316
22643
|
eq24(aiReferralEventsHourly.sourceId, row.id),
|
|
@@ -22334,6 +22661,7 @@ async function trafficRoutes(app, opts) {
|
|
|
22334
22661
|
...rowToDto(row),
|
|
22335
22662
|
totals24h: {
|
|
22336
22663
|
crawlerHits: Number(crawlerTotals?.total ?? 0),
|
|
22664
|
+
aiUserFetchHits: Number(aiUserFetchTotals?.total ?? 0),
|
|
22337
22665
|
aiReferralHits: Number(aiTotals?.total ?? 0),
|
|
22338
22666
|
sampleCount: Number(sampleTotals?.total ?? 0)
|
|
22339
22667
|
},
|
|
@@ -22393,10 +22721,12 @@ async function trafficRoutes(app, opts) {
|
|
|
22393
22721
|
const kindParam = request.query?.kind;
|
|
22394
22722
|
let kind = "all";
|
|
22395
22723
|
if (kindParam !== void 0) {
|
|
22396
|
-
if (kindParam === "all" || kindParam === TrafficEventKinds.crawler || kindParam === TrafficEventKinds["ai-referral"]) {
|
|
22724
|
+
if (kindParam === "all" || kindParam === TrafficEventKinds.crawler || kindParam === TrafficEventKinds["ai-user-fetch"] || kindParam === TrafficEventKinds["ai-referral"]) {
|
|
22397
22725
|
kind = kindParam;
|
|
22398
22726
|
} else {
|
|
22399
|
-
throw validationError(
|
|
22727
|
+
throw validationError(
|
|
22728
|
+
`"kind" must be one of: all, ${TrafficEventKinds.crawler}, ${TrafficEventKinds["ai-user-fetch"]}, ${TrafficEventKinds["ai-referral"]}`
|
|
22729
|
+
);
|
|
22400
22730
|
}
|
|
22401
22731
|
}
|
|
22402
22732
|
const limitParam = request.query?.limit;
|
|
@@ -22410,6 +22740,7 @@ async function trafficRoutes(app, opts) {
|
|
|
22410
22740
|
const untilIso = until.toISOString();
|
|
22411
22741
|
const events = [];
|
|
22412
22742
|
let crawlerTotal = 0;
|
|
22743
|
+
let aiUserFetchTotal = 0;
|
|
22413
22744
|
let aiReferralTotal = 0;
|
|
22414
22745
|
if (kind === "all" || kind === TrafficEventKinds.crawler) {
|
|
22415
22746
|
const crawlerFilters = [
|
|
@@ -22436,6 +22767,31 @@ async function trafficRoutes(app, opts) {
|
|
|
22436
22767
|
});
|
|
22437
22768
|
}
|
|
22438
22769
|
}
|
|
22770
|
+
if (kind === "all" || kind === TrafficEventKinds["ai-user-fetch"]) {
|
|
22771
|
+
const userFetchFilters = [
|
|
22772
|
+
eq24(aiUserFetchEventsHourly.projectId, project.id),
|
|
22773
|
+
gte3(aiUserFetchEventsHourly.tsHour, sinceIso),
|
|
22774
|
+
lte2(aiUserFetchEventsHourly.tsHour, untilIso)
|
|
22775
|
+
];
|
|
22776
|
+
if (sourceIdParam) userFetchFilters.push(eq24(aiUserFetchEventsHourly.sourceId, sourceIdParam));
|
|
22777
|
+
const userFetchWhere = and19(...userFetchFilters);
|
|
22778
|
+
const total = app.db.select({ total: sql10`COALESCE(SUM(${aiUserFetchEventsHourly.hits}), 0)` }).from(aiUserFetchEventsHourly).where(userFetchWhere).get();
|
|
22779
|
+
aiUserFetchTotal = Number(total?.total ?? 0);
|
|
22780
|
+
const rows = app.db.select().from(aiUserFetchEventsHourly).where(userFetchWhere).orderBy(desc13(aiUserFetchEventsHourly.tsHour)).limit(limit).all();
|
|
22781
|
+
for (const r of rows) {
|
|
22782
|
+
events.push({
|
|
22783
|
+
kind: TrafficEventKinds["ai-user-fetch"],
|
|
22784
|
+
sourceId: r.sourceId,
|
|
22785
|
+
tsHour: r.tsHour,
|
|
22786
|
+
botId: r.botId,
|
|
22787
|
+
operator: r.operator,
|
|
22788
|
+
verificationStatus: r.verificationStatus,
|
|
22789
|
+
pathNormalized: r.pathNormalized,
|
|
22790
|
+
status: r.status,
|
|
22791
|
+
hits: r.hits
|
|
22792
|
+
});
|
|
22793
|
+
}
|
|
22794
|
+
}
|
|
22439
22795
|
if (kind === "all" || kind === TrafficEventKinds["ai-referral"]) {
|
|
22440
22796
|
const aiFilters = [
|
|
22441
22797
|
eq24(aiReferralEventsHourly.projectId, project.id),
|
|
@@ -22469,6 +22825,7 @@ async function trafficRoutes(app, opts) {
|
|
|
22469
22825
|
windowEnd: untilIso,
|
|
22470
22826
|
totals: {
|
|
22471
22827
|
crawlerHits: crawlerTotal,
|
|
22828
|
+
aiUserFetchHits: aiUserFetchTotal,
|
|
22472
22829
|
aiReferralHits: aiReferralTotal
|
|
22473
22830
|
},
|
|
22474
22831
|
events: trimmed
|
|
@@ -29445,7 +29802,7 @@ function readStoredGroundingSources(rawResponse) {
|
|
|
29445
29802
|
return result;
|
|
29446
29803
|
}
|
|
29447
29804
|
async function backfillInsightsCommand(project, opts) {
|
|
29448
|
-
const { IntelligenceService: IntelligenceService2 } = await import("./intelligence-service-
|
|
29805
|
+
const { IntelligenceService: IntelligenceService2 } = await import("./intelligence-service-OCREQUCQ.js");
|
|
29449
29806
|
const config = loadConfig();
|
|
29450
29807
|
const db = createClient(config.database);
|
|
29451
29808
|
migrate(db);
|
|
@@ -29860,42 +30217,74 @@ async function backfillTrafficClassificationCommand(opts) {
|
|
|
29860
30217
|
providerResource: { type: "cloud_run_revision", labels: {} },
|
|
29861
30218
|
providerLabels: {}
|
|
29862
30219
|
};
|
|
29863
|
-
const
|
|
30220
|
+
const userFetch = classifyAiUserFetch(probe);
|
|
30221
|
+
const classified = userFetch ?? classifyCrawler(probe);
|
|
29864
30222
|
if (!classified) continue;
|
|
29865
30223
|
result.reclassified++;
|
|
29866
30224
|
result.byBot[classified.botId] = (result.byBot[classified.botId] ?? 0) + 1;
|
|
29867
30225
|
if (isDryRun) continue;
|
|
29868
|
-
db.update(rawEventSamples).set({ eventType: TrafficEventKinds.crawler }).where(eq35(rawEventSamples.id, snap.id)).run();
|
|
30226
|
+
db.update(rawEventSamples).set({ eventType: userFetch ? TrafficEventKinds["ai-user-fetch"] : TrafficEventKinds.crawler }).where(eq35(rawEventSamples.id, snap.id)).run();
|
|
29869
30227
|
const tsHour = new Date(snap.ts);
|
|
29870
30228
|
tsHour.setUTCMinutes(0, 0, 0);
|
|
29871
|
-
|
|
29872
|
-
|
|
29873
|
-
|
|
29874
|
-
|
|
29875
|
-
|
|
29876
|
-
|
|
29877
|
-
|
|
29878
|
-
|
|
29879
|
-
|
|
29880
|
-
|
|
29881
|
-
|
|
29882
|
-
|
|
29883
|
-
|
|
29884
|
-
}).onConflictDoUpdate({
|
|
29885
|
-
target: [
|
|
29886
|
-
crawlerEventsHourly.projectId,
|
|
29887
|
-
crawlerEventsHourly.sourceId,
|
|
29888
|
-
crawlerEventsHourly.tsHour,
|
|
29889
|
-
crawlerEventsHourly.botId,
|
|
29890
|
-
crawlerEventsHourly.verificationStatus,
|
|
29891
|
-
crawlerEventsHourly.pathNormalized,
|
|
29892
|
-
crawlerEventsHourly.status
|
|
29893
|
-
],
|
|
29894
|
-
set: {
|
|
29895
|
-
hits: sql15`${crawlerEventsHourly.hits} + 1`,
|
|
30229
|
+
if (userFetch) {
|
|
30230
|
+
db.insert(aiUserFetchEventsHourly).values({
|
|
30231
|
+
projectId: snap.projectId,
|
|
30232
|
+
sourceId: snap.sourceId,
|
|
30233
|
+
tsHour: tsHour.toISOString(),
|
|
30234
|
+
botId: userFetch.botId,
|
|
30235
|
+
operator: userFetch.operator,
|
|
30236
|
+
verificationStatus: userFetch.verificationStatus,
|
|
30237
|
+
pathNormalized: snap.pathNormalized,
|
|
30238
|
+
status: snap.status ?? 200,
|
|
30239
|
+
hits: 1,
|
|
30240
|
+
sampledUserAgent: snap.userAgent,
|
|
30241
|
+
createdAt: now,
|
|
29896
30242
|
updatedAt: now
|
|
29897
|
-
}
|
|
29898
|
-
|
|
30243
|
+
}).onConflictDoUpdate({
|
|
30244
|
+
target: [
|
|
30245
|
+
aiUserFetchEventsHourly.projectId,
|
|
30246
|
+
aiUserFetchEventsHourly.sourceId,
|
|
30247
|
+
aiUserFetchEventsHourly.tsHour,
|
|
30248
|
+
aiUserFetchEventsHourly.botId,
|
|
30249
|
+
aiUserFetchEventsHourly.verificationStatus,
|
|
30250
|
+
aiUserFetchEventsHourly.pathNormalized,
|
|
30251
|
+
aiUserFetchEventsHourly.status
|
|
30252
|
+
],
|
|
30253
|
+
set: {
|
|
30254
|
+
hits: sql15`${aiUserFetchEventsHourly.hits} + 1`,
|
|
30255
|
+
updatedAt: now
|
|
30256
|
+
}
|
|
30257
|
+
}).run();
|
|
30258
|
+
} else {
|
|
30259
|
+
db.insert(crawlerEventsHourly).values({
|
|
30260
|
+
projectId: snap.projectId,
|
|
30261
|
+
sourceId: snap.sourceId,
|
|
30262
|
+
tsHour: tsHour.toISOString(),
|
|
30263
|
+
botId: classified.botId,
|
|
30264
|
+
operator: classified.operator,
|
|
30265
|
+
verificationStatus: classified.verificationStatus,
|
|
30266
|
+
pathNormalized: snap.pathNormalized,
|
|
30267
|
+
status: snap.status ?? 200,
|
|
30268
|
+
hits: 1,
|
|
30269
|
+
sampledUserAgent: snap.userAgent,
|
|
30270
|
+
createdAt: now,
|
|
30271
|
+
updatedAt: now
|
|
30272
|
+
}).onConflictDoUpdate({
|
|
30273
|
+
target: [
|
|
30274
|
+
crawlerEventsHourly.projectId,
|
|
30275
|
+
crawlerEventsHourly.sourceId,
|
|
30276
|
+
crawlerEventsHourly.tsHour,
|
|
30277
|
+
crawlerEventsHourly.botId,
|
|
30278
|
+
crawlerEventsHourly.verificationStatus,
|
|
30279
|
+
crawlerEventsHourly.pathNormalized,
|
|
30280
|
+
crawlerEventsHourly.status
|
|
30281
|
+
],
|
|
30282
|
+
set: {
|
|
30283
|
+
hits: sql15`${crawlerEventsHourly.hits} + 1`,
|
|
30284
|
+
updatedAt: now
|
|
30285
|
+
}
|
|
30286
|
+
}).run();
|
|
30287
|
+
}
|
|
29899
30288
|
}
|
|
29900
30289
|
if (!isDryRun) {
|
|
29901
30290
|
const afterRow = db.select({ n: sql15`count(*)` }).from(rawEventSamples).where(and28(
|