@ainyc/canonry 4.51.4 → 4.54.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/agent-workspace/skills/canonry/references/server-side-traffic.md +8 -5
- package/assets/assets/{BacklinksPage-9TlM08Wf.js → BacklinksPage-BXFT4pLI.js} +1 -1
- package/assets/assets/ProjectPage-DAtd9Vay.js +6 -0
- package/assets/assets/{RunRow-D7qdWWRl.js → RunRow-38dDceGl.js} +1 -1
- package/assets/assets/{RunsPage-CvewepfU.js → RunsPage-AJnFLtaE.js} +1 -1
- package/assets/assets/{SettingsPage-C7BvAhiB.js → SettingsPage-FT9ZAvFH.js} +1 -1
- package/assets/assets/{TrafficPage-DC3NhFOh.js → TrafficPage-B4A3oO8M.js} +1 -1
- package/assets/assets/TrafficSourceDetailPage-8NYU1TA6.js +1 -0
- package/assets/assets/{arrow-left-Agb02DMK.js → arrow-left-DgI0X1Q1.js} +1 -1
- package/assets/assets/{index-DeGyEwik.css → index-Bm3JQsW0.css} +1 -1
- package/assets/assets/{index-DTCZ93Ne.js → index-DLPKqyhx.js} +50 -50
- package/assets/assets/{server-traffic-C-0Ndjpw.js → server-traffic-GqiQYm6x.js} +1 -1
- package/assets/assets/{trash-2-lkrXVRRm.js → trash-2-BwPzJ8NI.js} +1 -1
- package/assets/index.html +2 -2
- package/dist/{chunk-WBO5S3IX.js → chunk-CRO6Q25G.js} +533 -65
- package/dist/{chunk-HMZKIOLG.js → chunk-J7MX3YOH.js} +1 -1
- package/dist/{chunk-QZ5XSM6C.js → chunk-JHAHNKSN.js} +103 -1
- package/dist/{chunk-FYGBW3SM.js → chunk-VZPDBHBW.js} +29 -1
- package/dist/cli.js +40 -27
- package/dist/index.js +4 -4
- package/dist/{intelligence-service-2XL2M7QP.js → intelligence-service-OCREQUCQ.js} +2 -2
- package/dist/mcp.js +2 -2
- package/package.json +12 -12
- package/assets/assets/ProjectPage-CD591qDz.js +0 -6
- package/assets/assets/TrafficSourceDetailPage-BvtTA6rs.js +0 -1
|
@@ -6,7 +6,7 @@ import {
|
|
|
6
6
|
loadConfig,
|
|
7
7
|
loadConfigRaw,
|
|
8
8
|
saveConfigPatch
|
|
9
|
-
} from "./chunk-
|
|
9
|
+
} from "./chunk-J7MX3YOH.js";
|
|
10
10
|
import {
|
|
11
11
|
DEFAULT_RUN_HISTORY_LIMIT,
|
|
12
12
|
IntelligenceService,
|
|
@@ -14,6 +14,7 @@ import {
|
|
|
14
14
|
agentMemory,
|
|
15
15
|
agentSessions,
|
|
16
16
|
aiReferralEventsHourly,
|
|
17
|
+
aiUserFetchEventsHourly,
|
|
17
18
|
apiKeys,
|
|
18
19
|
auditLog,
|
|
19
20
|
backlinkDomains,
|
|
@@ -83,7 +84,7 @@ import {
|
|
|
83
84
|
smoothedRunDelta,
|
|
84
85
|
trafficSources,
|
|
85
86
|
usageCounters
|
|
86
|
-
} from "./chunk-
|
|
87
|
+
} from "./chunk-JHAHNKSN.js";
|
|
87
88
|
import {
|
|
88
89
|
AGENT_MEMORY_VALUE_MAX_BYTES,
|
|
89
90
|
AGENT_PROVIDER_IDS,
|
|
@@ -284,7 +285,7 @@ import {
|
|
|
284
285
|
wordpressSchemaDeployResultDtoSchema,
|
|
285
286
|
wordpressSchemaStatusResultDtoSchema,
|
|
286
287
|
wordpressStatusDtoSchema
|
|
287
|
-
} from "./chunk-
|
|
288
|
+
} from "./chunk-VZPDBHBW.js";
|
|
288
289
|
|
|
289
290
|
// src/telemetry.ts
|
|
290
291
|
import crypto from "crypto";
|
|
@@ -5455,11 +5456,14 @@ function renderServerActivity(report, audience) {
|
|
|
5455
5456
|
const crawlerTrustSummary = `${formatNumber(sa.verifiedCrawlerHits.current)} verified \xB7 ${formatNumber(sa.unverifiedCrawlerHits.current)} unverified`;
|
|
5456
5457
|
const crawlerDelta = formatDelta(crawlerRequests, "requests");
|
|
5457
5458
|
const crawlerSubtitle = crawlerDelta ? `${escapeHtml(crawlerTrustSummary)} \xB7 ${crawlerDelta}` : escapeHtml(crawlerTrustSummary);
|
|
5458
|
-
const
|
|
5459
|
+
const userFetchDelta = formatDelta(sa.aiUserFetchHits, "requests");
|
|
5460
|
+
const userFetchSubtitle = userFetchDelta || escapeHtml("ChatGPT-User, Perplexity-User, MistralAI-User");
|
|
5461
|
+
const clientOperators = sa.byOperator.filter((o) => o.verifiedHits > 0 || o.unverifiedHits > 0 || o.userFetchHits > 0 || o.referralArrivals > 0).slice(0, 5);
|
|
5459
5462
|
const clientOperatorRows = clientOperators.map((o) => `
|
|
5460
5463
|
<tr>
|
|
5461
5464
|
<td>${escapeHtml(o.operator)}</td>
|
|
5462
5465
|
<td class="numeric">${formatNumber(o.verifiedHits + o.unverifiedHits)}</td>
|
|
5466
|
+
<td class="numeric">${formatNumber(o.userFetchHits)}</td>
|
|
5463
5467
|
<td class="numeric">${formatNumber(o.referralArrivals)}</td>
|
|
5464
5468
|
</tr>`).join("");
|
|
5465
5469
|
return section(
|
|
@@ -5470,6 +5474,11 @@ function renderServerActivity(report, audience) {
|
|
|
5470
5474
|
<div class="value">${formatNumber(crawlerRequests.current)}</div>
|
|
5471
5475
|
<div class="subtitle">${crawlerSubtitle}</div>
|
|
5472
5476
|
</div>
|
|
5477
|
+
<div class="metric">
|
|
5478
|
+
<div class="label">AI user-fetch requests</div>
|
|
5479
|
+
<div class="value">${formatNumber(sa.aiUserFetchHits.current)}</div>
|
|
5480
|
+
<div class="subtitle">${userFetchSubtitle}</div>
|
|
5481
|
+
</div>
|
|
5473
5482
|
<div class="metric">
|
|
5474
5483
|
<div class="label">AI referral sessions</div>
|
|
5475
5484
|
<div class="value">${formatNumber(sa.referralArrivals.current)}</div>
|
|
@@ -5478,10 +5487,10 @@ function renderServerActivity(report, audience) {
|
|
|
5478
5487
|
</div>
|
|
5479
5488
|
${clientOperatorRows ? `<div class="chart-card"><h3>By AI tool</h3>
|
|
5480
5489
|
<table class="report-table">
|
|
5481
|
-
<thead><tr><th>AI tool</th><th class="numeric">Bot requests (7d)</th><th class="numeric">Referral sessions</th></tr></thead>
|
|
5490
|
+
<thead><tr><th>AI tool</th><th class="numeric">Bot requests (7d)</th><th class="numeric">User fetches (7d)</th><th class="numeric">Referral sessions</th></tr></thead>
|
|
5482
5491
|
<tbody>${clientOperatorRows}</tbody>
|
|
5483
5492
|
</table>
|
|
5484
|
-
<p class="meta">Verified requests are reverse-DNS confirmed
|
|
5493
|
+
<p class="meta">Bot requests are bulk crawl (GPTBot, PerplexityBot, \u2026). User fetches are on-demand reads triggered by real users inside an AI surface (ChatGPT-User, Perplexity-User, \u2026). Verified requests are reverse-DNS confirmed; unverified requests are UA claims shown separately in agency diagnostics.</p>
|
|
5485
5494
|
</div>` : ""}`
|
|
5486
5495
|
);
|
|
5487
5496
|
}
|
|
@@ -5493,6 +5502,7 @@ function renderServerActivity(report, audience) {
|
|
|
5493
5502
|
<td>${escapeHtml(o.operator)}</td>
|
|
5494
5503
|
<td class="numeric">${formatNumber(o.verifiedHits)}</td>
|
|
5495
5504
|
<td class="numeric meta">${formatNumber(o.unverifiedHits)}</td>
|
|
5505
|
+
<td class="numeric">${formatNumber(o.userFetchHits)}</td>
|
|
5496
5506
|
<td class="numeric">${formatNumber(o.referralArrivals)}</td>
|
|
5497
5507
|
<td class="numeric ${toneClass}">${deltaText}</td>
|
|
5498
5508
|
</tr>`;
|
|
@@ -5533,6 +5543,11 @@ function renderServerActivity(report, audience) {
|
|
|
5533
5543
|
<div class="value">${formatNumber(sa.unverifiedCrawlerHits.current)}</div>
|
|
5534
5544
|
<div class="subtitle">${formatDelta(sa.unverifiedCrawlerHits, "hits")}</div>
|
|
5535
5545
|
</div>
|
|
5546
|
+
<div class="metric">
|
|
5547
|
+
<div class="label">AI user-fetch hits (7d)</div>
|
|
5548
|
+
<div class="value">${formatNumber(sa.aiUserFetchHits.current)}</div>
|
|
5549
|
+
<div class="subtitle">${formatDelta(sa.aiUserFetchHits, "hits")}</div>
|
|
5550
|
+
</div>
|
|
5536
5551
|
<div class="metric">
|
|
5537
5552
|
<div class="label">AI-referral sessions (7d)</div>
|
|
5538
5553
|
<div class="value">${formatNumber(sa.referralArrivals.current)}</div>
|
|
@@ -5541,9 +5556,9 @@ function renderServerActivity(report, audience) {
|
|
|
5541
5556
|
</div>
|
|
5542
5557
|
${trendChart}
|
|
5543
5558
|
${operatorRows ? `<div class="chart-card"><h3>Per AI operator</h3>
|
|
5544
|
-
<p class="meta">Verified means rDNS-confirmed. Unverified bots claim the user-agent but couldn't be verified \u2014 could be the real bot or an imitator.</p>
|
|
5559
|
+
<p class="meta">Verified means rDNS-confirmed. Unverified bots claim the user-agent but couldn't be verified \u2014 could be the real bot or an imitator. User fetches are on-demand reads from an AI surface on behalf of a real user (ChatGPT-User, Perplexity-User, \u2026) \u2014 disjoint from bulk crawl.</p>
|
|
5545
5560
|
<table class="report-table">
|
|
5546
|
-
<thead><tr><th>Operator</th><th class="numeric">Verified hits</th><th class="numeric">Unverified</th><th class="numeric">Referral sessions</th><th class="numeric">7d delta</th></tr></thead>
|
|
5561
|
+
<thead><tr><th>Operator</th><th class="numeric">Verified hits</th><th class="numeric">Unverified</th><th class="numeric">User fetches</th><th class="numeric">Referral sessions</th><th class="numeric">7d delta</th></tr></thead>
|
|
5547
5562
|
<tbody>${operatorRows}</tbody>
|
|
5548
5563
|
</table>
|
|
5549
5564
|
</div>` : ""}
|
|
@@ -6385,10 +6400,21 @@ function buildServerActivity(db, projectId) {
|
|
|
6385
6400
|
)
|
|
6386
6401
|
).get()?.total ?? 0
|
|
6387
6402
|
);
|
|
6403
|
+
const sumUserFetches = (windowStartIso, windowEndIso, exclusiveEnd = false) => Number(
|
|
6404
|
+
db.select({ total: sql5`COALESCE(SUM(${aiUserFetchEventsHourly.hits}), 0)` }).from(aiUserFetchEventsHourly).where(
|
|
6405
|
+
and9(
|
|
6406
|
+
eq14(aiUserFetchEventsHourly.projectId, projectId),
|
|
6407
|
+
gte2(aiUserFetchEventsHourly.tsHour, windowStartIso),
|
|
6408
|
+
exclusiveEnd ? lt(aiUserFetchEventsHourly.tsHour, windowEndIso) : lte(aiUserFetchEventsHourly.tsHour, windowEndIso)
|
|
6409
|
+
)
|
|
6410
|
+
).get()?.total ?? 0
|
|
6411
|
+
);
|
|
6388
6412
|
const verifiedCurrent = sumVerifiedCrawlers(headlineStart, headlineEnd);
|
|
6389
6413
|
const verifiedPrior = sumVerifiedCrawlers(priorStart, headlineStart, true);
|
|
6390
6414
|
const unverifiedCurrent = sumUnverifiedCrawlers(headlineStart, headlineEnd);
|
|
6391
6415
|
const unverifiedPrior = sumUnverifiedCrawlers(priorStart, headlineStart, true);
|
|
6416
|
+
const userFetchCurrent = sumUserFetches(headlineStart, headlineEnd);
|
|
6417
|
+
const userFetchPrior = sumUserFetches(priorStart, headlineStart, true);
|
|
6392
6418
|
const referralCurrent = sumReferrals(headlineStart, headlineEnd);
|
|
6393
6419
|
const referralPrior = sumReferrals(priorStart, headlineStart, true);
|
|
6394
6420
|
const crawlerByOperatorRows = db.select({
|
|
@@ -6424,11 +6450,21 @@ function buildServerActivity(db, projectId) {
|
|
|
6424
6450
|
lte(aiReferralEventsHourly.tsHour, headlineEnd)
|
|
6425
6451
|
)
|
|
6426
6452
|
).groupBy(aiReferralEventsHourly.operator).all();
|
|
6453
|
+
const userFetchByOperatorRows = db.select({
|
|
6454
|
+
operator: aiUserFetchEventsHourly.operator,
|
|
6455
|
+
hits: sql5`COALESCE(SUM(${aiUserFetchEventsHourly.hits}), 0)`
|
|
6456
|
+
}).from(aiUserFetchEventsHourly).where(
|
|
6457
|
+
and9(
|
|
6458
|
+
eq14(aiUserFetchEventsHourly.projectId, projectId),
|
|
6459
|
+
gte2(aiUserFetchEventsHourly.tsHour, headlineStart),
|
|
6460
|
+
lte(aiUserFetchEventsHourly.tsHour, headlineEnd)
|
|
6461
|
+
)
|
|
6462
|
+
).groupBy(aiUserFetchEventsHourly.operator).all();
|
|
6427
6463
|
const operatorAgg = /* @__PURE__ */ new Map();
|
|
6428
6464
|
const ensureOp = (op) => {
|
|
6429
6465
|
let entry = operatorAgg.get(op);
|
|
6430
6466
|
if (!entry) {
|
|
6431
|
-
entry = { verified: 0, unverified: 0, referrals: 0, prior: 0 };
|
|
6467
|
+
entry = { verified: 0, unverified: 0, userFetch: 0, referrals: 0, prior: 0 };
|
|
6432
6468
|
operatorAgg.set(op, entry);
|
|
6433
6469
|
}
|
|
6434
6470
|
return entry;
|
|
@@ -6441,6 +6477,9 @@ function buildServerActivity(db, projectId) {
|
|
|
6441
6477
|
for (const r of crawlerByOperatorPriorRows) {
|
|
6442
6478
|
ensureOp(r.operator).prior += Number(r.hits);
|
|
6443
6479
|
}
|
|
6480
|
+
for (const r of userFetchByOperatorRows) {
|
|
6481
|
+
ensureOp(r.operator).userFetch += Number(r.hits);
|
|
6482
|
+
}
|
|
6444
6483
|
for (const r of referralByOperatorRows) {
|
|
6445
6484
|
ensureOp(r.operator).referrals += Number(r.hits);
|
|
6446
6485
|
}
|
|
@@ -6448,10 +6487,11 @@ function buildServerActivity(db, projectId) {
|
|
|
6448
6487
|
operator,
|
|
6449
6488
|
verifiedHits: v.verified,
|
|
6450
6489
|
unverifiedHits: v.unverified,
|
|
6490
|
+
userFetchHits: v.userFetch,
|
|
6451
6491
|
referralArrivals: v.referrals,
|
|
6452
6492
|
deltaPct: deltaPercent(v.verified, v.prior)
|
|
6453
6493
|
})).sort(
|
|
6454
|
-
(a, b) => b.verifiedHits - a.verifiedHits || b.unverifiedHits - a.unverifiedHits || b.referralArrivals - a.referralArrivals
|
|
6494
|
+
(a, b) => b.verifiedHits - a.verifiedHits || b.userFetchHits - a.userFetchHits || b.unverifiedHits - a.unverifiedHits || b.referralArrivals - a.referralArrivals
|
|
6455
6495
|
);
|
|
6456
6496
|
const topPathsRows = db.select({
|
|
6457
6497
|
path: crawlerEventsHourly.pathNormalized,
|
|
@@ -6526,14 +6566,30 @@ function buildServerActivity(db, projectId) {
|
|
|
6526
6566
|
lte(aiReferralEventsHourly.tsHour, headlineEnd)
|
|
6527
6567
|
)
|
|
6528
6568
|
).groupBy(sql5`SUBSTR(${aiReferralEventsHourly.tsHour}, 1, 10)`).all();
|
|
6569
|
+
const userFetchTrendRows = db.select({
|
|
6570
|
+
date: sql5`SUBSTR(${aiUserFetchEventsHourly.tsHour}, 1, 10)`,
|
|
6571
|
+
hits: sql5`COALESCE(SUM(${aiUserFetchEventsHourly.hits}), 0)`
|
|
6572
|
+
}).from(aiUserFetchEventsHourly).where(
|
|
6573
|
+
and9(
|
|
6574
|
+
eq14(aiUserFetchEventsHourly.projectId, projectId),
|
|
6575
|
+
gte2(aiUserFetchEventsHourly.tsHour, trendStart),
|
|
6576
|
+
lte(aiUserFetchEventsHourly.tsHour, headlineEnd)
|
|
6577
|
+
)
|
|
6578
|
+
).groupBy(sql5`SUBSTR(${aiUserFetchEventsHourly.tsHour}, 1, 10)`).all();
|
|
6579
|
+
const emptyTrendEntry = () => ({ verifiedCrawlerHits: 0, userFetchHits: 0, referralArrivals: 0 });
|
|
6529
6580
|
const dailyTrendMap = /* @__PURE__ */ new Map();
|
|
6530
6581
|
for (const r of crawlerTrendRows) {
|
|
6531
|
-
const e = dailyTrendMap.get(r.date) ??
|
|
6582
|
+
const e = dailyTrendMap.get(r.date) ?? emptyTrendEntry();
|
|
6532
6583
|
e.verifiedCrawlerHits += Number(r.hits);
|
|
6533
6584
|
dailyTrendMap.set(r.date, e);
|
|
6534
6585
|
}
|
|
6586
|
+
for (const r of userFetchTrendRows) {
|
|
6587
|
+
const e = dailyTrendMap.get(r.date) ?? emptyTrendEntry();
|
|
6588
|
+
e.userFetchHits += Number(r.hits);
|
|
6589
|
+
dailyTrendMap.set(r.date, e);
|
|
6590
|
+
}
|
|
6535
6591
|
for (const r of referralTrendRows) {
|
|
6536
|
-
const e = dailyTrendMap.get(r.date) ??
|
|
6592
|
+
const e = dailyTrendMap.get(r.date) ?? emptyTrendEntry();
|
|
6537
6593
|
e.referralArrivals += Number(r.hits);
|
|
6538
6594
|
dailyTrendMap.set(r.date, e);
|
|
6539
6595
|
}
|
|
@@ -6541,7 +6597,7 @@ function buildServerActivity(db, projectId) {
|
|
|
6541
6597
|
return {
|
|
6542
6598
|
windowStart: headlineStart,
|
|
6543
6599
|
windowEnd: headlineEnd,
|
|
6544
|
-
hasData: verifiedCurrent + unverifiedCurrent + referralCurrent + verifiedPrior + unverifiedPrior + referralPrior > 0 || byOperator.length > 0 || topCrawledPaths.length > 0 || referralProducts.length > 0,
|
|
6600
|
+
hasData: verifiedCurrent + unverifiedCurrent + userFetchCurrent + referralCurrent + verifiedPrior + unverifiedPrior + userFetchPrior + referralPrior > 0 || byOperator.length > 0 || topCrawledPaths.length > 0 || referralProducts.length > 0,
|
|
6545
6601
|
verifiedCrawlerHits: {
|
|
6546
6602
|
current: verifiedCurrent,
|
|
6547
6603
|
prior: verifiedPrior,
|
|
@@ -6552,6 +6608,11 @@ function buildServerActivity(db, projectId) {
|
|
|
6552
6608
|
prior: unverifiedPrior,
|
|
6553
6609
|
deltaPct: deltaPercent(unverifiedCurrent, unverifiedPrior)
|
|
6554
6610
|
},
|
|
6611
|
+
aiUserFetchHits: {
|
|
6612
|
+
current: userFetchCurrent,
|
|
6613
|
+
prior: userFetchPrior,
|
|
6614
|
+
deltaPct: deltaPercent(userFetchCurrent, userFetchPrior)
|
|
6615
|
+
},
|
|
6555
6616
|
referralArrivals: {
|
|
6556
6617
|
current: referralCurrent,
|
|
6557
6618
|
prior: referralPrior,
|
|
@@ -11458,14 +11519,14 @@ var routeCatalog = [
|
|
|
11458
11519
|
{
|
|
11459
11520
|
method: "get",
|
|
11460
11521
|
path: "/api/v1/projects/{name}/traffic/events",
|
|
11461
|
-
summary: "List rolled-up crawler hits and AI-referral sessions within a window",
|
|
11462
|
-
description: "Returns hourly rollup rows from `crawler_events_hourly` and `ai_referral_events_hourly`. Defaults to the last 24h. Totals reflect the full window; the `events` array is capped by `limit` (default 500, max 5000).",
|
|
11522
|
+
summary: "List rolled-up crawler hits, AI user-fetch hits, and AI-referral sessions within a window",
|
|
11523
|
+
description: "Returns hourly rollup rows from `crawler_events_hourly`, `ai_user_fetch_events_hourly`, and `ai_referral_events_hourly`. Defaults to the last 24h. Totals reflect the full window; the `events` array is capped by `limit` (default 500, max 5000).",
|
|
11463
11524
|
tags: ["traffic"],
|
|
11464
11525
|
parameters: [
|
|
11465
11526
|
nameParameter,
|
|
11466
11527
|
{ name: "since", in: "query", description: "ISO-8601 window start (defaults to 24h ago).", schema: stringSchema },
|
|
11467
11528
|
{ name: "until", in: "query", description: "ISO-8601 window end (defaults to now).", schema: stringSchema },
|
|
11468
|
-
{ name: "kind", in: "query", description: 'Filter to "crawler", "ai-referral", or "all" (default).', schema: stringSchema },
|
|
11529
|
+
{ name: "kind", in: "query", description: 'Filter to "crawler", "ai-user-fetch", "ai-referral", or "all" (default).', schema: stringSchema },
|
|
11469
11530
|
{ name: "limit", in: "query", description: "Max rows per kind in the events array (default 500, max 5000).", schema: stringSchema },
|
|
11470
11531
|
{ name: "sourceId", in: "query", description: "Restrict to a single traffic source.", schema: stringSchema }
|
|
11471
11532
|
],
|
|
@@ -19168,6 +19229,74 @@ var chatgpt_user_default = {
|
|
|
19168
19229
|
]
|
|
19169
19230
|
};
|
|
19170
19231
|
|
|
19232
|
+
// ../integration-traffic/src/ip-ranges/google-user-triggered-agents.json
|
|
19233
|
+
var google_user_triggered_agents_default = {
|
|
19234
|
+
_source: "https://developers.google.com/static/crawling/ipranges/user-triggered-agents.json",
|
|
19235
|
+
creationTime: "2026-05-19T14:46:15.000000",
|
|
19236
|
+
prefixes: [
|
|
19237
|
+
{
|
|
19238
|
+
ipv6Prefix: "2001:4860:c::/124"
|
|
19239
|
+
},
|
|
19240
|
+
{
|
|
19241
|
+
ipv6Prefix: "2001:4860:c::10/124"
|
|
19242
|
+
},
|
|
19243
|
+
{
|
|
19244
|
+
ipv6Prefix: "2001:4860:c::20/124"
|
|
19245
|
+
},
|
|
19246
|
+
{
|
|
19247
|
+
ipv6Prefix: "2001:4860:c::30/124"
|
|
19248
|
+
},
|
|
19249
|
+
{
|
|
19250
|
+
ipv6Prefix: "2001:4860:c::40/124"
|
|
19251
|
+
},
|
|
19252
|
+
{
|
|
19253
|
+
ipv6Prefix: "2001:4860:c::50/124"
|
|
19254
|
+
},
|
|
19255
|
+
{
|
|
19256
|
+
ipv6Prefix: "2001:4860:c::60/124"
|
|
19257
|
+
},
|
|
19258
|
+
{
|
|
19259
|
+
ipv6Prefix: "2001:4860:c::70/124"
|
|
19260
|
+
},
|
|
19261
|
+
{
|
|
19262
|
+
ipv4Prefix: "136.121.16.0/24"
|
|
19263
|
+
},
|
|
19264
|
+
{
|
|
19265
|
+
ipv4Prefix: "136.121.24.0/21"
|
|
19266
|
+
},
|
|
19267
|
+
{
|
|
19268
|
+
ipv4Prefix: "136.121.40.0/21"
|
|
19269
|
+
},
|
|
19270
|
+
{
|
|
19271
|
+
ipv4Prefix: "136.122.0.0/16"
|
|
19272
|
+
},
|
|
19273
|
+
{
|
|
19274
|
+
ipv4Prefix: "74.125.232.0/28"
|
|
19275
|
+
},
|
|
19276
|
+
{
|
|
19277
|
+
ipv4Prefix: "74.125.232.112/28"
|
|
19278
|
+
},
|
|
19279
|
+
{
|
|
19280
|
+
ipv4Prefix: "74.125.232.16/28"
|
|
19281
|
+
},
|
|
19282
|
+
{
|
|
19283
|
+
ipv4Prefix: "74.125.232.32/28"
|
|
19284
|
+
},
|
|
19285
|
+
{
|
|
19286
|
+
ipv4Prefix: "74.125.232.48/28"
|
|
19287
|
+
},
|
|
19288
|
+
{
|
|
19289
|
+
ipv4Prefix: "74.125.232.64/28"
|
|
19290
|
+
},
|
|
19291
|
+
{
|
|
19292
|
+
ipv4Prefix: "74.125.232.80/28"
|
|
19293
|
+
},
|
|
19294
|
+
{
|
|
19295
|
+
ipv4Prefix: "74.125.232.96/28"
|
|
19296
|
+
}
|
|
19297
|
+
]
|
|
19298
|
+
};
|
|
19299
|
+
|
|
19171
19300
|
// ../integration-traffic/src/ip-ranges/googlebot.json
|
|
19172
19301
|
var googlebot_default = {
|
|
19173
19302
|
_source: "https://developers.google.com/static/search/apis/ipranges/googlebot.json",
|
|
@@ -20366,6 +20495,12 @@ var RULE_ID_TO_RANGES = {
|
|
|
20366
20495
|
// (also covers Copilot grounding — Microsoft routes Copilot's
|
|
20367
20496
|
// web fetches through bingbot infrastructure)
|
|
20368
20497
|
"bingbot": bingbot_default,
|
|
20498
|
+
// Google-Agent — Google's agentic user-triggered fetcher (Project
|
|
20499
|
+
// Mariner et al.). Verified against Google's user-triggered-agents
|
|
20500
|
+
// list, which covers every Google user-triggered fetcher collectively
|
|
20501
|
+
// (Google publishes no per-fetcher split).
|
|
20502
|
+
// src: https://developers.google.com/static/crawling/ipranges/user-triggered-agents.json
|
|
20503
|
+
"google-agent": google_user_triggered_agents_default,
|
|
20369
20504
|
// Perplexity — split between crawler and user-on-behalf fetcher,
|
|
20370
20505
|
// same shape as OpenAI's split.
|
|
20371
20506
|
// src: https://www.perplexity.ai/perplexitybot.json
|
|
@@ -20377,10 +20512,12 @@ var RULE_ID_TO_RANGES = {
|
|
|
20377
20512
|
// PBC at ARIN (the authoritative allocation record). Maintained by
|
|
20378
20513
|
// hand; refresh by re-querying the ARIN entity below. The crawler
|
|
20379
20514
|
// block is AWS-ANTHROPIC 216.73.216.0/22 — empirical Cloud Run
|
|
20380
|
-
// logs show all real ClaudeBot traffic comes from there.
|
|
20381
|
-
// set is shared across every Claude-* UA the classifier emits
|
|
20515
|
+
// logs show all real ClaudeBot traffic comes from there. The same
|
|
20516
|
+
// raw set is shared across every Claude-* UA the classifier emits:
|
|
20517
|
+
// both the training crawler and the per-user fetcher map here.
|
|
20382
20518
|
// src: https://rdap.arin.net/registry/entity/AP-2440
|
|
20383
|
-
"anthropic-claudebot": anthropic_default
|
|
20519
|
+
"anthropic-claudebot": anthropic_default,
|
|
20520
|
+
"claude-user": anthropic_default
|
|
20384
20521
|
};
|
|
20385
20522
|
var CACHE = (() => {
|
|
20386
20523
|
const cache = /* @__PURE__ */ new Map();
|
|
@@ -20492,9 +20629,11 @@ var DEFAULT_AI_CRAWLER_RULES = [
|
|
|
20492
20629
|
// Anthropic ships several Claude-* crawlers (ClaudeBot for training,
|
|
20493
20630
|
// Claude-Web for chat fetches, Claude-SearchBot for search). The
|
|
20494
20631
|
// `Claude-` prefix + `Bot/` suffix is the stable shape — pattern is
|
|
20495
|
-
// permissive enough to catch new Claude-* variants as Anthropic
|
|
20632
|
+
// permissive enough to catch new Claude-*Bot variants as Anthropic
|
|
20496
20633
|
// adds them, without matching unrelated UAs that happen to mention
|
|
20497
|
-
// "claude".
|
|
20634
|
+
// "claude". The per-user fetcher `Claude-User` has no `Bot/` suffix
|
|
20635
|
+
// and is intentionally NOT matched here — it routes through the
|
|
20636
|
+
// separate `claude-user` rule below (purpose: 'user-agent').
|
|
20498
20637
|
userAgentPatterns: [
|
|
20499
20638
|
/ClaudeBot\//i,
|
|
20500
20639
|
/Claude-Web\//i,
|
|
@@ -20503,6 +20642,21 @@ var DEFAULT_AI_CRAWLER_RULES = [
|
|
|
20503
20642
|
/anthropic-ai/i
|
|
20504
20643
|
]
|
|
20505
20644
|
},
|
|
20645
|
+
{
|
|
20646
|
+
// Anthropic's on-behalf-of-user fetcher: Claude fetches a URL when
|
|
20647
|
+
// a person asks about it mid-conversation (citation click, "read
|
|
20648
|
+
// this page" prompt). Distinct from ClaudeBot (training crawl) —
|
|
20649
|
+
// same operator, opposite operational signal, mirroring OpenAI's
|
|
20650
|
+
// GPTBot vs. ChatGPT-User split. The `anthropic-claudebot` rule
|
|
20651
|
+
// above does not match `Claude-User/` (its `Claude-[A-Z]+Bot/`
|
|
20652
|
+
// pattern needs a `Bot/` suffix), so this is the only rule that
|
|
20653
|
+
// routes it — into the user-fetch bucket, not bulk crawl.
|
|
20654
|
+
id: "claude-user",
|
|
20655
|
+
operator: "Anthropic",
|
|
20656
|
+
product: "Claude-User",
|
|
20657
|
+
purpose: "user-agent",
|
|
20658
|
+
userAgentPatterns: [/Claude-User\//i]
|
|
20659
|
+
},
|
|
20506
20660
|
{
|
|
20507
20661
|
id: "perplexity-bot",
|
|
20508
20662
|
operator: "Perplexity",
|
|
@@ -20528,6 +20682,21 @@ var DEFAULT_AI_CRAWLER_RULES = [
|
|
|
20528
20682
|
purpose: "training-control",
|
|
20529
20683
|
userAgentPatterns: [/Google-Extended/i]
|
|
20530
20684
|
},
|
|
20685
|
+
{
|
|
20686
|
+
// Google-Agent: agents on Google infrastructure that navigate the
|
|
20687
|
+
// web and act "upon user request" (e.g. Project Mariner) — a
|
|
20688
|
+
// user-driven fetch, routed to the user-fetch bucket. Google ships
|
|
20689
|
+
// no distinct Gemini fetch UA (`Google-Extended` above is a
|
|
20690
|
+
// robots.txt control token, not a request UA), so this is the
|
|
20691
|
+
// closest Google equivalent to ChatGPT-User. The UA is browser-like
|
|
20692
|
+
// with a `compatible; Google-Agent;` token. IP ranges:
|
|
20693
|
+
// user-triggered-agents.json.
|
|
20694
|
+
id: "google-agent",
|
|
20695
|
+
operator: "Google",
|
|
20696
|
+
product: "Google-Agent",
|
|
20697
|
+
purpose: "user-agent",
|
|
20698
|
+
userAgentPatterns: [/Google-Agent/i]
|
|
20699
|
+
},
|
|
20531
20700
|
{
|
|
20532
20701
|
id: "bytespider",
|
|
20533
20702
|
operator: "ByteDance",
|
|
@@ -20582,16 +20751,23 @@ var DEFAULT_AI_CRAWLER_RULES = [
|
|
|
20582
20751
|
userAgentPatterns: [/Diffbot/i]
|
|
20583
20752
|
},
|
|
20584
20753
|
{
|
|
20585
|
-
|
|
20754
|
+
// Per-user, on-demand fetches initiated by a Mistral user (citation
|
|
20755
|
+
// click, "read this URL" prompt). Separate from MistralBot (crawl)
|
|
20756
|
+
// so the dashboard's user-fetch vs. bulk-crawl split stays honest.
|
|
20757
|
+
id: "mistral-ai-user",
|
|
20586
20758
|
operator: "Mistral AI",
|
|
20587
20759
|
product: "MistralAI-User",
|
|
20760
|
+
purpose: "user-agent",
|
|
20761
|
+
userAgentPatterns: [/MistralAI-User\//i]
|
|
20762
|
+
},
|
|
20763
|
+
{
|
|
20764
|
+
// Mistral's general crawler. Distinct from MistralAI-User (per-user
|
|
20765
|
+
// fetch) — same operator, different operational signal.
|
|
20766
|
+
id: "mistral-bot",
|
|
20767
|
+
operator: "Mistral AI",
|
|
20768
|
+
product: "MistralBot",
|
|
20588
20769
|
purpose: "crawl",
|
|
20589
|
-
|
|
20590
|
-
// fetches) and `MistralBot/*` (general crawler). Earlier rule only
|
|
20591
|
-
// matched `MistralAI` and missed the bot — caught on 2026-05-18
|
|
20592
|
-
// when canonry.ai/canonry-landing's classification chart went flat
|
|
20593
|
-
// and the bot UA was sitting in the `unknown` bucket.
|
|
20594
|
-
userAgentPatterns: [/MistralAI/i, /MistralBot/i]
|
|
20770
|
+
userAgentPatterns: [/MistralBot\//i]
|
|
20595
20771
|
},
|
|
20596
20772
|
{
|
|
20597
20773
|
id: "deepseek",
|
|
@@ -20600,6 +20776,21 @@ var DEFAULT_AI_CRAWLER_RULES = [
|
|
|
20600
20776
|
purpose: "training",
|
|
20601
20777
|
userAgentPatterns: [/DeepSeekBot/i]
|
|
20602
20778
|
},
|
|
20779
|
+
{
|
|
20780
|
+
id: "xai-grok-bot",
|
|
20781
|
+
operator: "xAI",
|
|
20782
|
+
product: "xAI-Bot",
|
|
20783
|
+
purpose: "crawl",
|
|
20784
|
+
// xAI documents its crawler at https://x.ai/bots/ as `xAI-Bot/<version>`.
|
|
20785
|
+
// Operators have also observed `Grok-Bot/...` in production logs. xAI
|
|
20786
|
+
// has been less consistent than OpenAI/Anthropic about publishing every
|
|
20787
|
+
// UA variant they ship, so the pattern is intentionally permissive
|
|
20788
|
+
// across the xAI/Grok family — better to over-match the operator than
|
|
20789
|
+
// leave real hits in the `unknown` bucket. A separate `purpose:
|
|
20790
|
+
// 'user-agent'` Grok rule can be added later if xAI ships a citation
|
|
20791
|
+
// user-fetcher UA (the way OpenAI ships ChatGPT-User alongside GPTBot).
|
|
20792
|
+
userAgentPatterns: [/xAI-Bot\//i, /Grok-Bot\//i, /GrokBot\//i]
|
|
20793
|
+
},
|
|
20603
20794
|
// Classic search-engine crawlers. Not strictly "AI" by training origin,
|
|
20604
20795
|
// but the same audience: machine traffic indexing the site for query
|
|
20605
20796
|
// surfaces. Operators tracking AI visibility want this signal too —
|
|
@@ -20662,12 +20853,14 @@ var DEFAULT_AI_REFERRER_RULES = [
|
|
|
20662
20853
|
{ domain: AI_ENGINE_DOMAINS.claude, operator: "Anthropic", product: "Claude" },
|
|
20663
20854
|
{ domain: AI_ENGINE_DOMAINS.gemini, operator: "Google", product: "Gemini" },
|
|
20664
20855
|
{ domain: AI_ENGINE_DOMAINS.copilotMicrosoft, operator: "Microsoft", product: "Copilot" },
|
|
20856
|
+
{ domain: AI_ENGINE_DOMAINS.grok, operator: "xAI", product: "Grok" },
|
|
20665
20857
|
{ domain: AI_ENGINE_DOMAINS.phind, operator: "Phind", product: "Phind" },
|
|
20666
20858
|
{ domain: AI_ENGINE_DOMAINS.you, operator: "You.com", product: "You.com" },
|
|
20667
20859
|
{ domain: AI_ENGINE_DOMAINS.metaAi, operator: "Meta", product: "Meta AI" }
|
|
20668
20860
|
];
|
|
20669
20861
|
|
|
20670
20862
|
// ../integration-traffic/src/classifier.ts
|
|
20863
|
+
var USER_FETCH_PURPOSE = "user-agent";
|
|
20671
20864
|
function normalizeHost(host) {
|
|
20672
20865
|
return host.trim().toLowerCase().replace(/^www\./, "");
|
|
20673
20866
|
}
|
|
@@ -20708,6 +20901,7 @@ function classifyCrawler(event) {
|
|
|
20708
20901
|
const userAgent = event.userAgent?.trim();
|
|
20709
20902
|
if (!userAgent) return null;
|
|
20710
20903
|
for (const rule of DEFAULT_AI_CRAWLER_RULES) {
|
|
20904
|
+
if (rule.purpose === USER_FETCH_PURPOSE) continue;
|
|
20711
20905
|
if (rule.userAgentPatterns.some((pattern) => pattern.test(userAgent))) {
|
|
20712
20906
|
const verified = verifyIpForRule(event.remoteIp, rule.id);
|
|
20713
20907
|
return {
|
|
@@ -20722,6 +20916,24 @@ function classifyCrawler(event) {
|
|
|
20722
20916
|
}
|
|
20723
20917
|
return null;
|
|
20724
20918
|
}
|
|
20919
|
+
function classifyAiUserFetch(event) {
|
|
20920
|
+
const userAgent = event.userAgent?.trim();
|
|
20921
|
+
if (!userAgent) return null;
|
|
20922
|
+
for (const rule of DEFAULT_AI_CRAWLER_RULES) {
|
|
20923
|
+
if (rule.purpose !== USER_FETCH_PURPOSE) continue;
|
|
20924
|
+
if (rule.userAgentPatterns.some((pattern) => pattern.test(userAgent))) {
|
|
20925
|
+
const verified = verifyIpForRule(event.remoteIp, rule.id);
|
|
20926
|
+
return {
|
|
20927
|
+
botId: rule.id,
|
|
20928
|
+
operator: rule.operator,
|
|
20929
|
+
product: rule.product,
|
|
20930
|
+
verificationStatus: verified ? "verified" : "claimed_unverified",
|
|
20931
|
+
matchedUserAgent: userAgent
|
|
20932
|
+
};
|
|
20933
|
+
}
|
|
20934
|
+
}
|
|
20935
|
+
return null;
|
|
20936
|
+
}
|
|
20725
20937
|
function classifyAiReferral(event) {
|
|
20726
20938
|
const refererHost = hostFromUrl(event.referer);
|
|
20727
20939
|
if (refererHost) {
|
|
@@ -20864,6 +21076,9 @@ function strongerReferralEvidence(current, next) {
|
|
|
20864
21076
|
function sortCrawlerBuckets(a, b) {
|
|
20865
21077
|
return a.tsHour.localeCompare(b.tsHour) || a.botId.localeCompare(b.botId) || a.pathNormalized.localeCompare(b.pathNormalized) || String(a.status).localeCompare(String(b.status));
|
|
20866
21078
|
}
|
|
21079
|
+
function sortAiUserFetchBuckets(a, b) {
|
|
21080
|
+
return a.tsHour.localeCompare(b.tsHour) || a.botId.localeCompare(b.botId) || a.pathNormalized.localeCompare(b.pathNormalized) || String(a.status).localeCompare(String(b.status));
|
|
21081
|
+
}
|
|
20867
21082
|
function sortReferralBuckets(a, b) {
|
|
20868
21083
|
return a.tsHour.localeCompare(b.tsHour) || a.product.localeCompare(b.product) || a.sourceDomain.localeCompare(b.sourceDomain) || a.landingPathNormalized.localeCompare(b.landingPathNormalized) || String(a.status).localeCompare(String(b.status));
|
|
20869
21084
|
}
|
|
@@ -20875,13 +21090,17 @@ function buildTrafficProbeReport(events, options = {}) {
|
|
|
20875
21090
|
const configuredSessionWindowMs = options.aiReferralSessionWindowMs ?? DEFAULT_AI_REFERRAL_SESSION_WINDOW_MS;
|
|
20876
21091
|
const aiReferralSessionWindowMs = configuredSessionWindowMs > 0 ? configuredSessionWindowMs : DEFAULT_AI_REFERRAL_SESSION_WINDOW_MS;
|
|
20877
21092
|
const crawlerBuckets = /* @__PURE__ */ new Map();
|
|
21093
|
+
const aiUserFetchBuckets = /* @__PURE__ */ new Map();
|
|
20878
21094
|
const aiReferralBuckets = /* @__PURE__ */ new Map();
|
|
20879
21095
|
const aiReferralSessions = /* @__PURE__ */ new Map();
|
|
20880
21096
|
const topBots = /* @__PURE__ */ new Map();
|
|
20881
21097
|
const topCrawlerPaths = /* @__PURE__ */ new Map();
|
|
21098
|
+
const topAiUserFetchBots = /* @__PURE__ */ new Map();
|
|
21099
|
+
const topAiUserFetchPaths = /* @__PURE__ */ new Map();
|
|
20882
21100
|
const topAiReferrers = /* @__PURE__ */ new Map();
|
|
20883
21101
|
const topAiReferralLandingPaths = /* @__PURE__ */ new Map();
|
|
20884
21102
|
let crawlerHits = 0;
|
|
21103
|
+
let aiUserFetchHits = 0;
|
|
20885
21104
|
let aiReferralHits = 0;
|
|
20886
21105
|
let unknownHits = 0;
|
|
20887
21106
|
const samples = [];
|
|
@@ -20889,6 +21108,7 @@ function buildTrafficProbeReport(events, options = {}) {
|
|
|
20889
21108
|
const tsHour = hourBucket(event.observedAt);
|
|
20890
21109
|
const pathNormalized = normalizeTrafficPathPattern(event.path);
|
|
20891
21110
|
const crawler = classifyCrawler(event);
|
|
21111
|
+
const aiUserFetch = classifyAiUserFetch(event);
|
|
20892
21112
|
const aiReferral = classifyAiReferral(event);
|
|
20893
21113
|
if (crawler) {
|
|
20894
21114
|
crawlerHits += 1;
|
|
@@ -20921,6 +21141,37 @@ function buildTrafficProbeReport(events, options = {}) {
|
|
|
20921
21141
|
else topBots.set(botKey, { fields: { botId: crawler.botId, operator: crawler.operator }, hits: 1 });
|
|
20922
21142
|
incrementBucket(topCrawlerPaths, pathNormalized, { pathNormalized });
|
|
20923
21143
|
}
|
|
21144
|
+
if (aiUserFetch) {
|
|
21145
|
+
aiUserFetchHits += 1;
|
|
21146
|
+
const key = [
|
|
21147
|
+
tsHour,
|
|
21148
|
+
aiUserFetch.botId,
|
|
21149
|
+
aiUserFetch.verificationStatus,
|
|
21150
|
+
pathNormalized,
|
|
21151
|
+
event.status ?? "null"
|
|
21152
|
+
].join(" ");
|
|
21153
|
+
const existing = aiUserFetchBuckets.get(key);
|
|
21154
|
+
if (existing) {
|
|
21155
|
+
existing.hits += 1;
|
|
21156
|
+
} else {
|
|
21157
|
+
aiUserFetchBuckets.set(key, {
|
|
21158
|
+
tsHour,
|
|
21159
|
+
botId: aiUserFetch.botId,
|
|
21160
|
+
operator: aiUserFetch.operator,
|
|
21161
|
+
product: aiUserFetch.product,
|
|
21162
|
+
verificationStatus: aiUserFetch.verificationStatus,
|
|
21163
|
+
pathNormalized,
|
|
21164
|
+
status: event.status,
|
|
21165
|
+
hits: 1,
|
|
21166
|
+
sampledUserAgent: event.userAgent
|
|
21167
|
+
});
|
|
21168
|
+
}
|
|
21169
|
+
const botKey = `${aiUserFetch.botId} ${aiUserFetch.operator}`;
|
|
21170
|
+
const botEntry = topAiUserFetchBots.get(botKey);
|
|
21171
|
+
if (botEntry) botEntry.hits += 1;
|
|
21172
|
+
else topAiUserFetchBots.set(botKey, { fields: { botId: aiUserFetch.botId, operator: aiUserFetch.operator }, hits: 1 });
|
|
21173
|
+
incrementBucket(topAiUserFetchPaths, pathNormalized, { pathNormalized });
|
|
21174
|
+
}
|
|
20924
21175
|
if (aiReferral) {
|
|
20925
21176
|
aiReferralHits += 1;
|
|
20926
21177
|
const landingPathNormalized = resolveAiReferralLandingPath(event, aiReferral.evidenceType);
|
|
@@ -20939,7 +21190,7 @@ function buildTrafficProbeReport(events, options = {}) {
|
|
|
20939
21190
|
aiReferralSessions.set(key, existing ? strongerReferralEvidence(existing, session) : session);
|
|
20940
21191
|
}
|
|
20941
21192
|
}
|
|
20942
|
-
if (!crawler && !aiReferral) unknownHits += 1;
|
|
21193
|
+
if (!crawler && !aiUserFetch && !aiReferral) unknownHits += 1;
|
|
20943
21194
|
samples.push({
|
|
20944
21195
|
eventId: event.eventId,
|
|
20945
21196
|
observedAt: event.observedAt,
|
|
@@ -20950,6 +21201,7 @@ function buildTrafficProbeReport(events, options = {}) {
|
|
|
20950
21201
|
userAgent: event.userAgent,
|
|
20951
21202
|
referer: event.referer,
|
|
20952
21203
|
crawler,
|
|
21204
|
+
aiUserFetch,
|
|
20953
21205
|
aiReferral
|
|
20954
21206
|
});
|
|
20955
21207
|
if (samples.length > sampleLimit) samples.shift();
|
|
@@ -20985,14 +21237,18 @@ function buildTrafficProbeReport(events, options = {}) {
|
|
|
20985
21237
|
totals: {
|
|
20986
21238
|
normalizedEvents: events.length,
|
|
20987
21239
|
crawlerHits,
|
|
21240
|
+
aiUserFetchHits,
|
|
20988
21241
|
aiReferralSessions: aiReferralSessions.size,
|
|
20989
21242
|
aiReferralHits,
|
|
20990
21243
|
unknownHits
|
|
20991
21244
|
},
|
|
20992
21245
|
crawlerEventsHourly: [...crawlerBuckets.values()].sort(sortCrawlerBuckets),
|
|
21246
|
+
aiUserFetchEventsHourly: [...aiUserFetchBuckets.values()].sort(sortAiUserFetchBuckets),
|
|
20993
21247
|
aiReferralEventsHourly: [...aiReferralBuckets.values()].sort(sortReferralBuckets),
|
|
20994
21248
|
topBots: topEntries(topBots, 10),
|
|
20995
21249
|
topCrawlerPaths: topEntries(topCrawlerPaths, 10),
|
|
21250
|
+
topAiUserFetchBots: topEntries(topAiUserFetchBots, 10),
|
|
21251
|
+
topAiUserFetchPaths: topEntries(topAiUserFetchPaths, 10),
|
|
20996
21252
|
topAiReferrers: topEntries(topAiReferrers, 10),
|
|
20997
21253
|
topAiReferralLandingPaths: topEntries(topAiReferralLandingPaths, 10),
|
|
20998
21254
|
samples
|
|
@@ -21004,6 +21260,9 @@ function incrementBucket(map, key, fields) {
|
|
|
21004
21260
|
else map.set(key, { fields, hits: 1 });
|
|
21005
21261
|
}
|
|
21006
21262
|
|
|
21263
|
+
// ../integration-wordpress-traffic/src/client.ts
|
|
21264
|
+
import { randomUUID } from "crypto";
|
|
21265
|
+
|
|
21007
21266
|
// ../integration-wordpress-traffic/src/normalize.ts
|
|
21008
21267
|
function trimOrNull(value) {
|
|
21009
21268
|
if (value === null || value === void 0) return null;
|
|
@@ -21034,7 +21293,7 @@ function normalizeWordpressTrafficEvent(event) {
|
|
|
21034
21293
|
queryString,
|
|
21035
21294
|
status: typeof event.status === "number" && Number.isFinite(event.status) ? event.status : null,
|
|
21036
21295
|
userAgent: trimOrNull(event.user_agent),
|
|
21037
|
-
remoteIp: trimOrNull(event.
|
|
21296
|
+
remoteIp: trimOrNull(event.remote_ip),
|
|
21038
21297
|
referer: trimOrNull(event.referer),
|
|
21039
21298
|
latencyMs: null,
|
|
21040
21299
|
requestSizeBytes: null,
|
|
@@ -21117,11 +21376,13 @@ async function listWordpressTrafficEvents(options) {
|
|
|
21117
21376
|
if (options.until !== void 0 && options.until !== "") {
|
|
21118
21377
|
url.searchParams.set("until", options.until);
|
|
21119
21378
|
}
|
|
21379
|
+
url.searchParams.set("_cb", randomUUID());
|
|
21120
21380
|
const response = await fetch(url, {
|
|
21121
21381
|
method: "GET",
|
|
21122
21382
|
headers: {
|
|
21123
21383
|
Authorization: authHeader,
|
|
21124
|
-
Accept: "application/json"
|
|
21384
|
+
Accept: "application/json",
|
|
21385
|
+
"Cache-Control": "no-cache"
|
|
21125
21386
|
},
|
|
21126
21387
|
signal: AbortSignal.timeout(timeoutMs)
|
|
21127
21388
|
});
|
|
@@ -21434,6 +21695,13 @@ async function runBackfillTask(options) {
|
|
|
21434
21695
|
lte2(crawlerEventsHourly.tsHour, windowEndIso)
|
|
21435
21696
|
)
|
|
21436
21697
|
).run();
|
|
21698
|
+
tx.delete(aiUserFetchEventsHourly).where(
|
|
21699
|
+
and19(
|
|
21700
|
+
eq24(aiUserFetchEventsHourly.sourceId, sourceRow.id),
|
|
21701
|
+
gte3(aiUserFetchEventsHourly.tsHour, windowStartIso),
|
|
21702
|
+
lte2(aiUserFetchEventsHourly.tsHour, windowEndIso)
|
|
21703
|
+
)
|
|
21704
|
+
).run();
|
|
21437
21705
|
tx.delete(aiReferralEventsHourly).where(
|
|
21438
21706
|
and19(
|
|
21439
21707
|
eq24(aiReferralEventsHourly.sourceId, sourceRow.id),
|
|
@@ -21464,6 +21732,22 @@ async function runBackfillTask(options) {
|
|
|
21464
21732
|
updatedAt: finishedAt
|
|
21465
21733
|
}).run();
|
|
21466
21734
|
}
|
|
21735
|
+
for (const bucket of report.aiUserFetchEventsHourly) {
|
|
21736
|
+
tx.insert(aiUserFetchEventsHourly).values({
|
|
21737
|
+
projectId: project.id,
|
|
21738
|
+
sourceId: sourceRow.id,
|
|
21739
|
+
tsHour: bucket.tsHour,
|
|
21740
|
+
botId: bucket.botId,
|
|
21741
|
+
operator: bucket.operator,
|
|
21742
|
+
verificationStatus: bucket.verificationStatus,
|
|
21743
|
+
pathNormalized: bucket.pathNormalized,
|
|
21744
|
+
status: bucket.status ?? 0,
|
|
21745
|
+
hits: bucket.hits,
|
|
21746
|
+
sampledUserAgent: bucket.sampledUserAgent,
|
|
21747
|
+
createdAt: finishedAt,
|
|
21748
|
+
updatedAt: finishedAt
|
|
21749
|
+
}).run();
|
|
21750
|
+
}
|
|
21467
21751
|
for (const bucket of report.aiReferralEventsHourly) {
|
|
21468
21752
|
tx.insert(aiReferralEventsHourly).values({
|
|
21469
21753
|
projectId: project.id,
|
|
@@ -21482,7 +21766,7 @@ async function runBackfillTask(options) {
|
|
|
21482
21766
|
}).run();
|
|
21483
21767
|
}
|
|
21484
21768
|
for (const sample of report.samples) {
|
|
21485
|
-
const eventType = sample.crawler ? "crawler" : sample.aiReferral ? "ai_referral" : "unknown";
|
|
21769
|
+
const eventType = sample.crawler ? "crawler" : sample.aiUserFetch ? "ai_user_fetch" : sample.aiReferral ? "ai_referral" : "unknown";
|
|
21486
21770
|
const refererHost = (() => {
|
|
21487
21771
|
if (!sample.referer) return null;
|
|
21488
21772
|
try {
|
|
@@ -21504,6 +21788,7 @@ async function runBackfillTask(options) {
|
|
|
21504
21788
|
refererHost,
|
|
21505
21789
|
classifierDetailsJson: {
|
|
21506
21790
|
crawler: sample.crawler,
|
|
21791
|
+
aiUserFetch: sample.aiUserFetch,
|
|
21507
21792
|
aiReferral: sample.aiReferral
|
|
21508
21793
|
},
|
|
21509
21794
|
createdAt: finishedAt
|
|
@@ -21813,6 +22098,7 @@ async function trafficRoutes(app, opts) {
|
|
|
21813
22098
|
sourceId: sourceRow.id,
|
|
21814
22099
|
pulledEvents: 0,
|
|
21815
22100
|
crawlerHits: 0,
|
|
22101
|
+
aiUserFetchHits: 0,
|
|
21816
22102
|
aiReferralHits: 0,
|
|
21817
22103
|
durationMs: Date.now() - syncStartedAtMs,
|
|
21818
22104
|
errorCode
|
|
@@ -21964,11 +22250,13 @@ async function trafficRoutes(app, opts) {
|
|
|
21964
22250
|
allEvents = page.events;
|
|
21965
22251
|
}
|
|
21966
22252
|
let crawlerBucketRows = 0;
|
|
22253
|
+
let aiUserFetchBucketRows = 0;
|
|
21967
22254
|
let aiReferralBucketRows = 0;
|
|
21968
22255
|
let sampleRows = 0;
|
|
21969
22256
|
let finishedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
21970
22257
|
let pulledEventsCount = 0;
|
|
21971
22258
|
let crawlerHitsCount = 0;
|
|
22259
|
+
let aiUserFetchHitsCount = 0;
|
|
21972
22260
|
let aiReferralHitsCount = 0;
|
|
21973
22261
|
let unknownHitsCount = 0;
|
|
21974
22262
|
app.db.transaction((tx) => {
|
|
@@ -21990,6 +22278,7 @@ async function trafficRoutes(app, opts) {
|
|
|
21990
22278
|
finishedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
21991
22279
|
pulledEventsCount = report.totals.normalizedEvents;
|
|
21992
22280
|
crawlerHitsCount = report.totals.crawlerHits;
|
|
22281
|
+
aiUserFetchHitsCount = report.totals.aiUserFetchHits;
|
|
21993
22282
|
aiReferralHitsCount = report.totals.aiReferralHits;
|
|
21994
22283
|
unknownHitsCount = report.totals.unknownHits;
|
|
21995
22284
|
for (const bucket of report.crawlerEventsHourly) {
|
|
@@ -22025,6 +22314,39 @@ async function trafficRoutes(app, opts) {
|
|
|
22025
22314
|
}).run();
|
|
22026
22315
|
crawlerBucketRows += 1;
|
|
22027
22316
|
}
|
|
22317
|
+
for (const bucket of report.aiUserFetchEventsHourly) {
|
|
22318
|
+
const status = bucket.status ?? 0;
|
|
22319
|
+
tx.insert(aiUserFetchEventsHourly).values({
|
|
22320
|
+
projectId: project.id,
|
|
22321
|
+
sourceId: sourceRow.id,
|
|
22322
|
+
tsHour: bucket.tsHour,
|
|
22323
|
+
botId: bucket.botId,
|
|
22324
|
+
operator: bucket.operator,
|
|
22325
|
+
verificationStatus: bucket.verificationStatus,
|
|
22326
|
+
pathNormalized: bucket.pathNormalized,
|
|
22327
|
+
status,
|
|
22328
|
+
hits: bucket.hits,
|
|
22329
|
+
sampledUserAgent: bucket.sampledUserAgent,
|
|
22330
|
+
createdAt: finishedAt,
|
|
22331
|
+
updatedAt: finishedAt
|
|
22332
|
+
}).onConflictDoUpdate({
|
|
22333
|
+
target: [
|
|
22334
|
+
aiUserFetchEventsHourly.projectId,
|
|
22335
|
+
aiUserFetchEventsHourly.sourceId,
|
|
22336
|
+
aiUserFetchEventsHourly.tsHour,
|
|
22337
|
+
aiUserFetchEventsHourly.botId,
|
|
22338
|
+
aiUserFetchEventsHourly.verificationStatus,
|
|
22339
|
+
aiUserFetchEventsHourly.pathNormalized,
|
|
22340
|
+
aiUserFetchEventsHourly.status
|
|
22341
|
+
],
|
|
22342
|
+
set: {
|
|
22343
|
+
hits: sql10`${aiUserFetchEventsHourly.hits} + ${bucket.hits}`,
|
|
22344
|
+
sampledUserAgent: bucket.sampledUserAgent,
|
|
22345
|
+
updatedAt: finishedAt
|
|
22346
|
+
}
|
|
22347
|
+
}).run();
|
|
22348
|
+
aiUserFetchBucketRows += 1;
|
|
22349
|
+
}
|
|
22028
22350
|
for (const bucket of report.aiReferralEventsHourly) {
|
|
22029
22351
|
const status = bucket.status ?? 0;
|
|
22030
22352
|
tx.insert(aiReferralEventsHourly).values({
|
|
@@ -22060,7 +22382,7 @@ async function trafficRoutes(app, opts) {
|
|
|
22060
22382
|
aiReferralBucketRows += 1;
|
|
22061
22383
|
}
|
|
22062
22384
|
for (const sample of report.samples) {
|
|
22063
|
-
const eventType = sample.crawler ? "crawler" : sample.aiReferral ? "ai_referral" : "unknown";
|
|
22385
|
+
const eventType = sample.crawler ? "crawler" : sample.aiUserFetch ? "ai_user_fetch" : sample.aiReferral ? "ai_referral" : "unknown";
|
|
22064
22386
|
const refererHost = (() => {
|
|
22065
22387
|
if (!sample.referer) return null;
|
|
22066
22388
|
try {
|
|
@@ -22082,6 +22404,7 @@ async function trafficRoutes(app, opts) {
|
|
|
22082
22404
|
refererHost,
|
|
22083
22405
|
classifierDetailsJson: {
|
|
22084
22406
|
crawler: sample.crawler,
|
|
22407
|
+
aiUserFetch: sample.aiUserFetch,
|
|
22085
22408
|
aiReferral: sample.aiReferral
|
|
22086
22409
|
},
|
|
22087
22410
|
createdAt: finishedAt
|
|
@@ -22119,6 +22442,7 @@ async function trafficRoutes(app, opts) {
|
|
|
22119
22442
|
sourceId: sourceRow.id,
|
|
22120
22443
|
pulledEvents: pulledEventsCount,
|
|
22121
22444
|
crawlerHits: crawlerHitsCount,
|
|
22445
|
+
aiUserFetchHits: aiUserFetchHitsCount,
|
|
22122
22446
|
aiReferralHits: aiReferralHitsCount,
|
|
22123
22447
|
durationMs: Date.now() - syncStartedAtMs
|
|
22124
22448
|
});
|
|
@@ -22130,9 +22454,11 @@ async function trafficRoutes(app, opts) {
|
|
|
22130
22454
|
syncedAt: finishedAt,
|
|
22131
22455
|
pulledEvents: pulledEventsCount,
|
|
22132
22456
|
crawlerHits: crawlerHitsCount,
|
|
22457
|
+
aiUserFetchHits: aiUserFetchHitsCount,
|
|
22133
22458
|
aiReferralHits: aiReferralHitsCount,
|
|
22134
22459
|
unknownHits: unknownHitsCount,
|
|
22135
22460
|
crawlerBucketRows,
|
|
22461
|
+
aiUserFetchBucketRows,
|
|
22136
22462
|
aiReferralBucketRows,
|
|
22137
22463
|
sampleRows,
|
|
22138
22464
|
windowStart: windowStart.toISOString(),
|
|
@@ -22311,6 +22637,12 @@ async function trafficRoutes(app, opts) {
|
|
|
22311
22637
|
gte3(crawlerEventsHourly.tsHour, since)
|
|
22312
22638
|
)
|
|
22313
22639
|
).get();
|
|
22640
|
+
const aiUserFetchTotals = app.db.select({ total: sql10`COALESCE(SUM(${aiUserFetchEventsHourly.hits}), 0)` }).from(aiUserFetchEventsHourly).where(
|
|
22641
|
+
and19(
|
|
22642
|
+
eq24(aiUserFetchEventsHourly.sourceId, row.id),
|
|
22643
|
+
gte3(aiUserFetchEventsHourly.tsHour, since)
|
|
22644
|
+
)
|
|
22645
|
+
).get();
|
|
22314
22646
|
const aiTotals = app.db.select({ total: sql10`COALESCE(SUM(${aiReferralEventsHourly.sessionsOrHits}), 0)` }).from(aiReferralEventsHourly).where(
|
|
22315
22647
|
and19(
|
|
22316
22648
|
eq24(aiReferralEventsHourly.sourceId, row.id),
|
|
@@ -22334,6 +22666,7 @@ async function trafficRoutes(app, opts) {
|
|
|
22334
22666
|
...rowToDto(row),
|
|
22335
22667
|
totals24h: {
|
|
22336
22668
|
crawlerHits: Number(crawlerTotals?.total ?? 0),
|
|
22669
|
+
aiUserFetchHits: Number(aiUserFetchTotals?.total ?? 0),
|
|
22337
22670
|
aiReferralHits: Number(aiTotals?.total ?? 0),
|
|
22338
22671
|
sampleCount: Number(sampleTotals?.total ?? 0)
|
|
22339
22672
|
},
|
|
@@ -22393,10 +22726,12 @@ async function trafficRoutes(app, opts) {
|
|
|
22393
22726
|
const kindParam = request.query?.kind;
|
|
22394
22727
|
let kind = "all";
|
|
22395
22728
|
if (kindParam !== void 0) {
|
|
22396
|
-
if (kindParam === "all" || kindParam === TrafficEventKinds.crawler || kindParam === TrafficEventKinds["ai-referral"]) {
|
|
22729
|
+
if (kindParam === "all" || kindParam === TrafficEventKinds.crawler || kindParam === TrafficEventKinds["ai-user-fetch"] || kindParam === TrafficEventKinds["ai-referral"]) {
|
|
22397
22730
|
kind = kindParam;
|
|
22398
22731
|
} else {
|
|
22399
|
-
throw validationError(
|
|
22732
|
+
throw validationError(
|
|
22733
|
+
`"kind" must be one of: all, ${TrafficEventKinds.crawler}, ${TrafficEventKinds["ai-user-fetch"]}, ${TrafficEventKinds["ai-referral"]}`
|
|
22734
|
+
);
|
|
22400
22735
|
}
|
|
22401
22736
|
}
|
|
22402
22737
|
const limitParam = request.query?.limit;
|
|
@@ -22410,6 +22745,7 @@ async function trafficRoutes(app, opts) {
|
|
|
22410
22745
|
const untilIso = until.toISOString();
|
|
22411
22746
|
const events = [];
|
|
22412
22747
|
let crawlerTotal = 0;
|
|
22748
|
+
let aiUserFetchTotal = 0;
|
|
22413
22749
|
let aiReferralTotal = 0;
|
|
22414
22750
|
if (kind === "all" || kind === TrafficEventKinds.crawler) {
|
|
22415
22751
|
const crawlerFilters = [
|
|
@@ -22436,6 +22772,31 @@ async function trafficRoutes(app, opts) {
|
|
|
22436
22772
|
});
|
|
22437
22773
|
}
|
|
22438
22774
|
}
|
|
22775
|
+
if (kind === "all" || kind === TrafficEventKinds["ai-user-fetch"]) {
|
|
22776
|
+
const userFetchFilters = [
|
|
22777
|
+
eq24(aiUserFetchEventsHourly.projectId, project.id),
|
|
22778
|
+
gte3(aiUserFetchEventsHourly.tsHour, sinceIso),
|
|
22779
|
+
lte2(aiUserFetchEventsHourly.tsHour, untilIso)
|
|
22780
|
+
];
|
|
22781
|
+
if (sourceIdParam) userFetchFilters.push(eq24(aiUserFetchEventsHourly.sourceId, sourceIdParam));
|
|
22782
|
+
const userFetchWhere = and19(...userFetchFilters);
|
|
22783
|
+
const total = app.db.select({ total: sql10`COALESCE(SUM(${aiUserFetchEventsHourly.hits}), 0)` }).from(aiUserFetchEventsHourly).where(userFetchWhere).get();
|
|
22784
|
+
aiUserFetchTotal = Number(total?.total ?? 0);
|
|
22785
|
+
const rows = app.db.select().from(aiUserFetchEventsHourly).where(userFetchWhere).orderBy(desc13(aiUserFetchEventsHourly.tsHour)).limit(limit).all();
|
|
22786
|
+
for (const r of rows) {
|
|
22787
|
+
events.push({
|
|
22788
|
+
kind: TrafficEventKinds["ai-user-fetch"],
|
|
22789
|
+
sourceId: r.sourceId,
|
|
22790
|
+
tsHour: r.tsHour,
|
|
22791
|
+
botId: r.botId,
|
|
22792
|
+
operator: r.operator,
|
|
22793
|
+
verificationStatus: r.verificationStatus,
|
|
22794
|
+
pathNormalized: r.pathNormalized,
|
|
22795
|
+
status: r.status,
|
|
22796
|
+
hits: r.hits
|
|
22797
|
+
});
|
|
22798
|
+
}
|
|
22799
|
+
}
|
|
22439
22800
|
if (kind === "all" || kind === TrafficEventKinds["ai-referral"]) {
|
|
22440
22801
|
const aiFilters = [
|
|
22441
22802
|
eq24(aiReferralEventsHourly.projectId, project.id),
|
|
@@ -22469,6 +22830,7 @@ async function trafficRoutes(app, opts) {
|
|
|
22469
22830
|
windowEnd: untilIso,
|
|
22470
22831
|
totals: {
|
|
22471
22832
|
crawlerHits: crawlerTotal,
|
|
22833
|
+
aiUserFetchHits: aiUserFetchTotal,
|
|
22472
22834
|
aiReferralHits: aiReferralTotal
|
|
22473
22835
|
},
|
|
22474
22836
|
events: trimmed
|
|
@@ -23542,6 +23904,76 @@ var TRAFFIC_SOURCE_CHECKS = [
|
|
|
23542
23904
|
scopesCheck2
|
|
23543
23905
|
];
|
|
23544
23906
|
|
|
23907
|
+
// ../api-routes/src/doctor/checks/wordpress-publish.ts
|
|
23908
|
+
var WORDPRESS_PUBLISH_CHECKS = [
|
|
23909
|
+
{
|
|
23910
|
+
id: "wordpress.publish.connection",
|
|
23911
|
+
category: CheckCategories.auth,
|
|
23912
|
+
scope: CheckScopes.project,
|
|
23913
|
+
title: "WordPress publishing connection",
|
|
23914
|
+
run: async (ctx) => {
|
|
23915
|
+
if (!ctx.project) {
|
|
23916
|
+
return {
|
|
23917
|
+
status: CheckStatuses.skipped,
|
|
23918
|
+
code: "wordpress.publish.no-project",
|
|
23919
|
+
summary: "Project context required.",
|
|
23920
|
+
remediation: null
|
|
23921
|
+
};
|
|
23922
|
+
}
|
|
23923
|
+
const store = ctx.wordpressConnectionStore;
|
|
23924
|
+
if (!store) {
|
|
23925
|
+
return {
|
|
23926
|
+
status: CheckStatuses.skipped,
|
|
23927
|
+
code: "wordpress.publish.store-unavailable",
|
|
23928
|
+
summary: "WordPress connection store is not configured for this deployment.",
|
|
23929
|
+
remediation: null
|
|
23930
|
+
};
|
|
23931
|
+
}
|
|
23932
|
+
const connection = store.getConnection(ctx.project.name);
|
|
23933
|
+
if (!connection) {
|
|
23934
|
+
return {
|
|
23935
|
+
status: CheckStatuses.skipped,
|
|
23936
|
+
code: "wordpress.publish.not-configured",
|
|
23937
|
+
summary: `No WordPress publishing connection configured for ${ctx.project.name}.`,
|
|
23938
|
+
remediation: `If this project publishes to WordPress, run \`canonry wordpress connect ${ctx.project.name} --url <url> --user <user>\`.`
|
|
23939
|
+
};
|
|
23940
|
+
}
|
|
23941
|
+
try {
|
|
23942
|
+
const status = await verifyWordpressConnection(connection);
|
|
23943
|
+
return {
|
|
23944
|
+
status: CheckStatuses.ok,
|
|
23945
|
+
code: "wordpress.publish.connected",
|
|
23946
|
+
summary: `WordPress publishing connection verified; wp/v2 REST API reachable at ${status.url}.`,
|
|
23947
|
+
remediation: null,
|
|
23948
|
+
details: {
|
|
23949
|
+
url: status.url,
|
|
23950
|
+
wordpressVersion: status.version,
|
|
23951
|
+
pageCount: status.pageCount
|
|
23952
|
+
}
|
|
23953
|
+
};
|
|
23954
|
+
} catch (err) {
|
|
23955
|
+
if (err instanceof WordpressApiError && err.code === "AUTH_INVALID") {
|
|
23956
|
+
return {
|
|
23957
|
+
status: CheckStatuses.fail,
|
|
23958
|
+
code: "wordpress.publish.unauthorized",
|
|
23959
|
+
summary: "WordPress rejected the stored application password.",
|
|
23960
|
+
remediation: `Regenerate the Application Password in wp-admin (Users \u2192 Profile \u2192 Application Passwords), then reconnect with \`canonry wordpress connect ${ctx.project.name} --url <url> --user <user>\`.`,
|
|
23961
|
+
details: { error: err.message }
|
|
23962
|
+
};
|
|
23963
|
+
}
|
|
23964
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
23965
|
+
return {
|
|
23966
|
+
status: CheckStatuses.fail,
|
|
23967
|
+
code: "wordpress.publish.verification-failed",
|
|
23968
|
+
summary: "WordPress publishing connection could not be verified.",
|
|
23969
|
+
remediation: "Confirm the site URL is correct and the WordPress REST API is reachable.",
|
|
23970
|
+
details: { error: message }
|
|
23971
|
+
};
|
|
23972
|
+
}
|
|
23973
|
+
}
|
|
23974
|
+
}
|
|
23975
|
+
];
|
|
23976
|
+
|
|
23545
23977
|
// ../api-routes/src/doctor/registry.ts
|
|
23546
23978
|
var ALL_CHECKS = [
|
|
23547
23979
|
// Runtime-state checks run first so file-system gone errors surface
|
|
@@ -23549,6 +23981,7 @@ var ALL_CHECKS = [
|
|
|
23549
23981
|
...RUNTIME_STATE_CHECKS,
|
|
23550
23982
|
...GOOGLE_AUTH_CHECKS,
|
|
23551
23983
|
...BING_AUTH_CHECKS,
|
|
23984
|
+
...WORDPRESS_PUBLISH_CHECKS,
|
|
23552
23985
|
...GA_AUTH_CHECKS,
|
|
23553
23986
|
...PROVIDERS_CHECKS,
|
|
23554
23987
|
...TRAFFIC_SOURCE_CHECKS,
|
|
@@ -23633,6 +24066,7 @@ async function doctorRoutes(app, opts) {
|
|
|
23633
24066
|
project: null,
|
|
23634
24067
|
googleConnectionStore: opts.googleConnectionStore,
|
|
23635
24068
|
bingConnectionStore: opts.bingConnectionStore,
|
|
24069
|
+
wordpressConnectionStore: opts.wordpressConnectionStore,
|
|
23636
24070
|
ga4CredentialStore: opts.ga4CredentialStore,
|
|
23637
24071
|
getGoogleAuthConfig: opts.getGoogleAuthConfig,
|
|
23638
24072
|
redirectUri,
|
|
@@ -23655,6 +24089,7 @@ async function doctorRoutes(app, opts) {
|
|
|
23655
24089
|
},
|
|
23656
24090
|
googleConnectionStore: opts.googleConnectionStore,
|
|
23657
24091
|
bingConnectionStore: opts.bingConnectionStore,
|
|
24092
|
+
wordpressConnectionStore: opts.wordpressConnectionStore,
|
|
23658
24093
|
ga4CredentialStore: opts.ga4CredentialStore,
|
|
23659
24094
|
getGoogleAuthConfig: opts.getGoogleAuthConfig,
|
|
23660
24095
|
redirectUri,
|
|
@@ -24283,6 +24718,7 @@ async function apiRoutes(app, opts) {
|
|
|
24283
24718
|
await api.register(doctorRoutes, {
|
|
24284
24719
|
googleConnectionStore: opts.googleConnectionStore,
|
|
24285
24720
|
bingConnectionStore: opts.bingConnectionStore,
|
|
24721
|
+
wordpressConnectionStore: opts.wordpressConnectionStore,
|
|
24286
24722
|
ga4CredentialStore: opts.ga4CredentialStore,
|
|
24287
24723
|
getGoogleAuthConfig: opts.getGoogleAuthConfig,
|
|
24288
24724
|
publicUrl: opts.publicUrl,
|
|
@@ -29445,7 +29881,7 @@ function readStoredGroundingSources(rawResponse) {
|
|
|
29445
29881
|
return result;
|
|
29446
29882
|
}
|
|
29447
29883
|
async function backfillInsightsCommand(project, opts) {
|
|
29448
|
-
const { IntelligenceService: IntelligenceService2 } = await import("./intelligence-service-
|
|
29884
|
+
const { IntelligenceService: IntelligenceService2 } = await import("./intelligence-service-OCREQUCQ.js");
|
|
29449
29885
|
const config = loadConfig();
|
|
29450
29886
|
const db = createClient(config.database);
|
|
29451
29887
|
migrate(db);
|
|
@@ -29860,42 +30296,74 @@ async function backfillTrafficClassificationCommand(opts) {
|
|
|
29860
30296
|
providerResource: { type: "cloud_run_revision", labels: {} },
|
|
29861
30297
|
providerLabels: {}
|
|
29862
30298
|
};
|
|
29863
|
-
const
|
|
30299
|
+
const userFetch = classifyAiUserFetch(probe);
|
|
30300
|
+
const classified = userFetch ?? classifyCrawler(probe);
|
|
29864
30301
|
if (!classified) continue;
|
|
29865
30302
|
result.reclassified++;
|
|
29866
30303
|
result.byBot[classified.botId] = (result.byBot[classified.botId] ?? 0) + 1;
|
|
29867
30304
|
if (isDryRun) continue;
|
|
29868
|
-
db.update(rawEventSamples).set({ eventType: TrafficEventKinds.crawler }).where(eq35(rawEventSamples.id, snap.id)).run();
|
|
30305
|
+
db.update(rawEventSamples).set({ eventType: userFetch ? TrafficEventKinds["ai-user-fetch"] : TrafficEventKinds.crawler }).where(eq35(rawEventSamples.id, snap.id)).run();
|
|
29869
30306
|
const tsHour = new Date(snap.ts);
|
|
29870
30307
|
tsHour.setUTCMinutes(0, 0, 0);
|
|
29871
|
-
|
|
29872
|
-
|
|
29873
|
-
|
|
29874
|
-
|
|
29875
|
-
|
|
29876
|
-
|
|
29877
|
-
|
|
29878
|
-
|
|
29879
|
-
|
|
29880
|
-
|
|
29881
|
-
|
|
29882
|
-
|
|
29883
|
-
|
|
29884
|
-
}).onConflictDoUpdate({
|
|
29885
|
-
target: [
|
|
29886
|
-
crawlerEventsHourly.projectId,
|
|
29887
|
-
crawlerEventsHourly.sourceId,
|
|
29888
|
-
crawlerEventsHourly.tsHour,
|
|
29889
|
-
crawlerEventsHourly.botId,
|
|
29890
|
-
crawlerEventsHourly.verificationStatus,
|
|
29891
|
-
crawlerEventsHourly.pathNormalized,
|
|
29892
|
-
crawlerEventsHourly.status
|
|
29893
|
-
],
|
|
29894
|
-
set: {
|
|
29895
|
-
hits: sql15`${crawlerEventsHourly.hits} + 1`,
|
|
30308
|
+
if (userFetch) {
|
|
30309
|
+
db.insert(aiUserFetchEventsHourly).values({
|
|
30310
|
+
projectId: snap.projectId,
|
|
30311
|
+
sourceId: snap.sourceId,
|
|
30312
|
+
tsHour: tsHour.toISOString(),
|
|
30313
|
+
botId: userFetch.botId,
|
|
30314
|
+
operator: userFetch.operator,
|
|
30315
|
+
verificationStatus: userFetch.verificationStatus,
|
|
30316
|
+
pathNormalized: snap.pathNormalized,
|
|
30317
|
+
status: snap.status ?? 200,
|
|
30318
|
+
hits: 1,
|
|
30319
|
+
sampledUserAgent: snap.userAgent,
|
|
30320
|
+
createdAt: now,
|
|
29896
30321
|
updatedAt: now
|
|
29897
|
-
}
|
|
29898
|
-
|
|
30322
|
+
}).onConflictDoUpdate({
|
|
30323
|
+
target: [
|
|
30324
|
+
aiUserFetchEventsHourly.projectId,
|
|
30325
|
+
aiUserFetchEventsHourly.sourceId,
|
|
30326
|
+
aiUserFetchEventsHourly.tsHour,
|
|
30327
|
+
aiUserFetchEventsHourly.botId,
|
|
30328
|
+
aiUserFetchEventsHourly.verificationStatus,
|
|
30329
|
+
aiUserFetchEventsHourly.pathNormalized,
|
|
30330
|
+
aiUserFetchEventsHourly.status
|
|
30331
|
+
],
|
|
30332
|
+
set: {
|
|
30333
|
+
hits: sql15`${aiUserFetchEventsHourly.hits} + 1`,
|
|
30334
|
+
updatedAt: now
|
|
30335
|
+
}
|
|
30336
|
+
}).run();
|
|
30337
|
+
} else {
|
|
30338
|
+
db.insert(crawlerEventsHourly).values({
|
|
30339
|
+
projectId: snap.projectId,
|
|
30340
|
+
sourceId: snap.sourceId,
|
|
30341
|
+
tsHour: tsHour.toISOString(),
|
|
30342
|
+
botId: classified.botId,
|
|
30343
|
+
operator: classified.operator,
|
|
30344
|
+
verificationStatus: classified.verificationStatus,
|
|
30345
|
+
pathNormalized: snap.pathNormalized,
|
|
30346
|
+
status: snap.status ?? 200,
|
|
30347
|
+
hits: 1,
|
|
30348
|
+
sampledUserAgent: snap.userAgent,
|
|
30349
|
+
createdAt: now,
|
|
30350
|
+
updatedAt: now
|
|
30351
|
+
}).onConflictDoUpdate({
|
|
30352
|
+
target: [
|
|
30353
|
+
crawlerEventsHourly.projectId,
|
|
30354
|
+
crawlerEventsHourly.sourceId,
|
|
30355
|
+
crawlerEventsHourly.tsHour,
|
|
30356
|
+
crawlerEventsHourly.botId,
|
|
30357
|
+
crawlerEventsHourly.verificationStatus,
|
|
30358
|
+
crawlerEventsHourly.pathNormalized,
|
|
30359
|
+
crawlerEventsHourly.status
|
|
30360
|
+
],
|
|
30361
|
+
set: {
|
|
30362
|
+
hits: sql15`${crawlerEventsHourly.hits} + 1`,
|
|
30363
|
+
updatedAt: now
|
|
30364
|
+
}
|
|
30365
|
+
}).run();
|
|
30366
|
+
}
|
|
29899
30367
|
}
|
|
29900
30368
|
if (!isDryRun) {
|
|
29901
30369
|
const afterRow = db.select({ n: sql15`count(*)` }).from(rawEventSamples).where(and28(
|