@ainyc/canonry 4.46.0 → 4.47.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,7 @@ import {
5
5
  loadConfig,
6
6
  loadConfigRaw,
7
7
  saveConfigPatch
8
- } from "./chunk-GRFMZ7PD.js";
8
+ } from "./chunk-ON545FBK.js";
9
9
  import {
10
10
  DEFAULT_RUN_HISTORY_LIMIT,
11
11
  IntelligenceService,
@@ -80,10 +80,13 @@ import {
80
80
  smoothedRunDelta,
81
81
  trafficSources,
82
82
  usageCounters
83
- } from "./chunk-TBADB57G.js";
83
+ } from "./chunk-M7MSNUNQ.js";
84
84
  import {
85
85
  AGENT_MEMORY_VALUE_MAX_BYTES,
86
86
  AGENT_PROVIDER_IDS,
87
+ AI_ENGINE_DOMAINS,
88
+ AI_ENGINE_SELF_DOMAINS,
89
+ AI_PROVIDER_INFRA_DOMAINS,
87
90
  AgentProviderIds,
88
91
  AppError,
89
92
  CcReleaseSyncStatuses,
@@ -110,6 +113,7 @@ import {
110
113
  TrafficSourceAuthModes,
111
114
  TrafficSourceStatuses,
112
115
  TrafficSourceTypes,
116
+ VERTEX_AI_SEARCH_PROXY_DOMAIN,
113
117
  VerificationStatuses,
114
118
  absolutizeProjectUrl,
115
119
  actionConfidenceLabel,
@@ -193,7 +197,7 @@ import {
193
197
  visibilityStateFromAnswerMentioned,
194
198
  windowCutoff,
195
199
  wordpressEnvSchema
196
- } from "./chunk-Q7XFJO2V.js";
200
+ } from "./chunk-4WXY57ET.js";
197
201
 
198
202
  // src/telemetry.ts
199
203
  import crypto from "crypto";
@@ -2764,16 +2768,10 @@ async function analyticsRoutes(app) {
2764
2768
  return reply.send({ overall, byQuery, runId: latestRunId, window });
2765
2769
  });
2766
2770
  }
2767
- var PROVIDER_INFRA_DOMAINS = /* @__PURE__ */ new Set([
2768
- "vertexaisearch.cloud.google.com",
2769
- "openai.com",
2770
- "anthropic.com",
2771
- "googleapis.com"
2772
- ]);
2773
2771
  function isProviderInfraDomain(uri) {
2774
2772
  try {
2775
2773
  const host = new URL(uri).hostname.toLowerCase();
2776
- for (const blocked of PROVIDER_INFRA_DOMAINS) {
2774
+ for (const blocked of AI_PROVIDER_INFRA_DOMAINS) {
2777
2775
  if (host === blocked || host.endsWith(`.${blocked}`)) return true;
2778
2776
  }
2779
2777
  } catch {
@@ -5636,8 +5634,13 @@ function normalizeDomain(domain) {
5636
5634
  }
5637
5635
  function extractPath(url) {
5638
5636
  if (!url) return "";
5639
- const match = /^https?:\/\/[^/]+(.*)$/.exec(url.trim());
5640
- const path16 = match ? match[1] : url.trim();
5637
+ const trimmed = url.trim();
5638
+ let path16;
5639
+ try {
5640
+ path16 = new URL(trimmed).pathname;
5641
+ } catch {
5642
+ path16 = trimmed;
5643
+ }
5641
5644
  const stripped = path16.replace(/\/+$/, "");
5642
5645
  return stripped || "/";
5643
5646
  }
@@ -11127,7 +11130,7 @@ function buildOperationId(method, path16) {
11127
11130
  }
11128
11131
  return part;
11129
11132
  });
11130
- return [method, ...parts].join("-").replace(/[^a-zA-Z0-9]+(.)/g, (_match, char) => char.toUpperCase()).replace(/^[^a-zA-Z]+/, "");
11133
+ return [method, ...parts].join("-").replace(/[^a-z0-9]+(.)/gi, (_match, char) => char.toUpperCase()).replace(/^[^a-z]+/i, "");
11131
11134
  }
11132
11135
 
11133
11136
  // ../api-routes/src/settings.ts
@@ -11290,7 +11293,7 @@ function parseKindParam(raw) {
11290
11293
  if (raw === void 0 || raw === null || raw === "") return SchedulableRunKinds["answer-visibility"];
11291
11294
  const parsed = schedulableRunKindSchema.safeParse(raw);
11292
11295
  if (!parsed.success) {
11293
- throw validationError(`Invalid kind "${String(raw)}". Must be one of: ${Object.values(SchedulableRunKinds).join(", ")}`);
11296
+ throw validationError(`Invalid kind "${JSON.stringify(raw)}". Must be one of: ${Object.values(SchedulableRunKinds).join(", ")}`);
11294
11297
  }
11295
11298
  return parsed.data;
11296
11299
  }
@@ -11941,6 +11944,22 @@ var GA4_DEFAULT_SYNC_DAYS = 30;
11941
11944
  var GA4_MAX_SYNC_DAYS = 90;
11942
11945
  var GA4_REQUEST_TIMEOUT_MS = 3e4;
11943
11946
  var GA4_MAX_PAGES = 50;
11947
+ var GA4_DIMENSIONS = {
11948
+ date: "date",
11949
+ landingPagePlusQueryString: "landingPagePlusQueryString",
11950
+ sessionSource: "sessionSource",
11951
+ sessionMedium: "sessionMedium",
11952
+ sessionManualSource: "sessionManualSource",
11953
+ sessionManualMedium: "sessionManualMedium",
11954
+ firstUserSource: "firstUserSource",
11955
+ firstUserMedium: "firstUserMedium",
11956
+ sessionDefaultChannelGrouping: "sessionDefaultChannelGrouping",
11957
+ sessionDefaultChannelGroup: "sessionDefaultChannelGroup"
11958
+ };
11959
+ var GA4_METRICS = {
11960
+ sessions: "sessions",
11961
+ totalUsers: "totalUsers"
11962
+ };
11944
11963
 
11945
11964
  // ../integration-google-analytics/src/types.ts
11946
11965
  var GA4ApiError = class extends Error {
@@ -12045,7 +12064,9 @@ function escapeRegExp2(str) {
12045
12064
  return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
12046
12065
  }
12047
12066
  async function runReport(accessToken, propertyId, request) {
12048
- const url = `${GA4_DATA_API_BASE}/properties/${propertyId}:runReport`;
12067
+ validatePropertyId(propertyId);
12068
+ const safePropertyId = encodeURIComponent(propertyId);
12069
+ const url = `${GA4_DATA_API_BASE}/properties/${safePropertyId}:runReport`;
12049
12070
  const res = await fetch(url, {
12050
12071
  method: "POST",
12051
12072
  headers: {
@@ -12119,7 +12140,7 @@ async function batchRunReports(accessToken, propertyId, requests) {
12119
12140
  return data.reports;
12120
12141
  }
12121
12142
  function formatDate2(d) {
12122
- return d.toISOString().split("T")[0];
12143
+ return d.toISOString().slice(0, 10);
12123
12144
  }
12124
12145
  var AI_REFERRAL_SOURCE_FILTERS = [
12125
12146
  { matchType: "CONTAINS", value: "perplexity" },
@@ -12130,8 +12151,8 @@ var AI_REFERRAL_SOURCE_FILTERS = [
12130
12151
  { matchType: "CONTAINS", value: "anthropic" },
12131
12152
  { matchType: "CONTAINS", value: "copilot" },
12132
12153
  { matchType: "CONTAINS", value: "phind" },
12133
- { matchType: "EXACT", value: "you.com" },
12134
- { matchType: "CONTAINS", value: "meta.ai" }
12154
+ { matchType: "EXACT", value: AI_ENGINE_DOMAINS.you },
12155
+ { matchType: "CONTAINS", value: AI_ENGINE_DOMAINS.metaAi }
12135
12156
  ];
12136
12157
  async function fetchTrafficByLandingPage(accessToken, propertyId, days) {
12137
12158
  validateAccessToken2(accessToken);
@@ -12150,12 +12171,12 @@ async function fetchTrafficByLandingPage(accessToken, propertyId, days) {
12150
12171
  const request = {
12151
12172
  dateRanges: [{ startDate: formatDate2(startDate), endDate: formatDate2(endDate) }],
12152
12173
  dimensions: [
12153
- { name: "date" },
12154
- { name: "landingPagePlusQueryString" }
12174
+ { name: GA4_DIMENSIONS.date },
12175
+ { name: GA4_DIMENSIONS.landingPagePlusQueryString }
12155
12176
  ],
12156
12177
  metrics: [
12157
- { name: "sessions" },
12158
- { name: "totalUsers" }
12178
+ { name: GA4_METRICS.sessions },
12179
+ { name: GA4_METRICS.totalUsers }
12159
12180
  ],
12160
12181
  limit: PAGE_SIZE,
12161
12182
  offset
@@ -12184,11 +12205,11 @@ async function fetchTrafficByLandingPage(accessToken, propertyId, days) {
12184
12205
  organicPageCount++;
12185
12206
  const organicRequest = {
12186
12207
  dateRanges: [{ startDate: formatDate2(startDate), endDate: formatDate2(endDate) }],
12187
- dimensions: [{ name: "date" }, { name: "landingPagePlusQueryString" }],
12188
- metrics: [{ name: "sessions" }],
12208
+ dimensions: [{ name: GA4_DIMENSIONS.date }, { name: GA4_DIMENSIONS.landingPagePlusQueryString }],
12209
+ metrics: [{ name: GA4_METRICS.sessions }],
12189
12210
  dimensionFilter: {
12190
12211
  filter: {
12191
- fieldName: "sessionDefaultChannelGrouping",
12212
+ fieldName: GA4_DIMENSIONS.sessionDefaultChannelGrouping,
12192
12213
  stringFilter: { matchType: "EXACT", value: "Organic Search" }
12193
12214
  }
12194
12215
  },
@@ -12212,11 +12233,11 @@ async function fetchTrafficByLandingPage(accessToken, propertyId, days) {
12212
12233
  directPageCount++;
12213
12234
  const directRequest = {
12214
12235
  dateRanges: [{ startDate: formatDate2(startDate), endDate: formatDate2(endDate) }],
12215
- dimensions: [{ name: "date" }, { name: "landingPagePlusQueryString" }],
12216
- metrics: [{ name: "sessions" }],
12236
+ dimensions: [{ name: GA4_DIMENSIONS.date }, { name: GA4_DIMENSIONS.landingPagePlusQueryString }],
12237
+ metrics: [{ name: GA4_METRICS.sessions }],
12217
12238
  dimensionFilter: {
12218
12239
  filter: {
12219
- fieldName: "sessionDefaultChannelGrouping",
12240
+ fieldName: GA4_DIMENSIONS.sessionDefaultChannelGrouping,
12220
12241
  stringFilter: { matchType: "EXACT", value: "Direct" }
12221
12242
  }
12222
12243
  },
@@ -12261,8 +12282,8 @@ async function verifyConnectionWithToken(accessToken, propertyId) {
12261
12282
  startDate.setDate(startDate.getDate() - 1);
12262
12283
  await runReport(accessToken, propertyId, {
12263
12284
  dateRanges: [{ startDate: formatDate2(startDate), endDate: formatDate2(endDate) }],
12264
- dimensions: [{ name: "date" }],
12265
- metrics: [{ name: "sessions" }],
12285
+ dimensions: [{ name: GA4_DIMENSIONS.date }],
12286
+ metrics: [{ name: GA4_METRICS.sessions }],
12266
12287
  limit: 1
12267
12288
  });
12268
12289
  return true;
@@ -12280,16 +12301,16 @@ async function fetchAggregateSummary(accessToken, propertyId, days) {
12280
12301
  {
12281
12302
  dateRanges: [dateRange],
12282
12303
  dimensions: [],
12283
- metrics: [{ name: "sessions" }, { name: "totalUsers" }],
12304
+ metrics: [{ name: GA4_METRICS.sessions }, { name: GA4_METRICS.totalUsers }],
12284
12305
  limit: 1
12285
12306
  },
12286
12307
  {
12287
12308
  dateRanges: [dateRange],
12288
12309
  dimensions: [],
12289
- metrics: [{ name: "sessions" }],
12310
+ metrics: [{ name: GA4_METRICS.sessions }],
12290
12311
  dimensionFilter: {
12291
12312
  filter: {
12292
- fieldName: "sessionDefaultChannelGrouping",
12313
+ fieldName: GA4_DIMENSIONS.sessionDefaultChannelGrouping,
12293
12314
  stringFilter: { matchType: "EXACT", value: "Organic Search" }
12294
12315
  }
12295
12316
  },
@@ -12325,16 +12346,16 @@ async function fetchWindowSummary(accessToken, propertyId, windowKey) {
12325
12346
  {
12326
12347
  dateRanges: [dateRange],
12327
12348
  dimensions: [],
12328
- metrics: [{ name: "sessions" }, { name: "totalUsers" }],
12349
+ metrics: [{ name: GA4_METRICS.sessions }, { name: GA4_METRICS.totalUsers }],
12329
12350
  limit: 1
12330
12351
  },
12331
12352
  {
12332
12353
  dateRanges: [dateRange],
12333
12354
  dimensions: [],
12334
- metrics: [{ name: "sessions" }],
12355
+ metrics: [{ name: GA4_METRICS.sessions }],
12335
12356
  dimensionFilter: {
12336
12357
  filter: {
12337
- fieldName: "sessionDefaultChannelGrouping",
12358
+ fieldName: GA4_DIMENSIONS.sessionDefaultChannelGrouping,
12338
12359
  stringFilter: { matchType: "EXACT", value: "Organic Search" }
12339
12360
  }
12340
12361
  },
@@ -12343,10 +12364,10 @@ async function fetchWindowSummary(accessToken, propertyId, windowKey) {
12343
12364
  {
12344
12365
  dateRanges: [dateRange],
12345
12366
  dimensions: [],
12346
- metrics: [{ name: "sessions" }],
12367
+ metrics: [{ name: GA4_METRICS.sessions }],
12347
12368
  dimensionFilter: {
12348
12369
  filter: {
12349
- fieldName: "sessionDefaultChannelGrouping",
12370
+ fieldName: GA4_DIMENSIONS.sessionDefaultChannelGrouping,
12350
12371
  stringFilter: { matchType: "EXACT", value: "Direct" }
12351
12372
  }
12352
12373
  },
@@ -12379,9 +12400,9 @@ async function fetchAiReferrals(accessToken, propertyId, days) {
12379
12400
  const PAGE_SIZE = 1e3;
12380
12401
  const rows = [];
12381
12402
  const dimensionPairs = [
12382
- ["sessionSource", "sessionMedium", "session"],
12383
- ["firstUserSource", "firstUserMedium", "first_user"],
12384
- ["sessionManualSource", "sessionManualMedium", "manual_utm"]
12403
+ [GA4_DIMENSIONS.sessionSource, GA4_DIMENSIONS.sessionMedium, "session"],
12404
+ [GA4_DIMENSIONS.firstUserSource, GA4_DIMENSIONS.firstUserMedium, "first_user"],
12405
+ [GA4_DIMENSIONS.sessionManualSource, GA4_DIMENSIONS.sessionManualMedium, "manual_utm"]
12385
12406
  ];
12386
12407
  for (const [sourceDim, mediumDim, dimLabel] of dimensionPairs) {
12387
12408
  let offset = 0;
@@ -12391,15 +12412,15 @@ async function fetchAiReferrals(accessToken, propertyId, days) {
12391
12412
  const request = {
12392
12413
  dateRanges: [{ startDate: formatDate2(startDate), endDate: formatDate2(endDate) }],
12393
12414
  dimensions: [
12394
- { name: "date" },
12415
+ { name: GA4_DIMENSIONS.date },
12395
12416
  { name: sourceDim },
12396
12417
  { name: mediumDim },
12397
- { name: "sessionDefaultChannelGroup" },
12398
- { name: "landingPagePlusQueryString" }
12418
+ { name: GA4_DIMENSIONS.sessionDefaultChannelGrouping },
12419
+ { name: GA4_DIMENSIONS.landingPagePlusQueryString }
12399
12420
  ],
12400
12421
  metrics: [
12401
- { name: "sessions" },
12402
- { name: "totalUsers" }
12422
+ { name: GA4_METRICS.sessions },
12423
+ { name: GA4_METRICS.totalUsers }
12403
12424
  ],
12404
12425
  dimensionFilter: {
12405
12426
  orGroup: {
@@ -12467,24 +12488,25 @@ async function fetchSocialReferrals(accessToken, propertyId, days) {
12467
12488
  const PAGE_SIZE = 1e3;
12468
12489
  const rows = [];
12469
12490
  let offset = 0;
12470
- while (true) {
12491
+ let pageCount = 0;
12492
+ while (pageCount < GA4_MAX_PAGES) {
12471
12493
  const request = {
12472
12494
  dateRanges: [{ startDate: formatDate2(startDate), endDate: formatDate2(endDate) }],
12473
12495
  dimensions: [
12474
- { name: "date" },
12475
- { name: "sessionSource" },
12476
- { name: "sessionMedium" },
12477
- { name: "sessionDefaultChannelGroup" }
12496
+ { name: GA4_DIMENSIONS.date },
12497
+ { name: GA4_DIMENSIONS.sessionSource },
12498
+ { name: GA4_DIMENSIONS.sessionMedium },
12499
+ { name: GA4_DIMENSIONS.sessionDefaultChannelGroup }
12478
12500
  ],
12479
12501
  metrics: [
12480
- { name: "sessions" },
12481
- { name: "totalUsers" }
12502
+ { name: GA4_METRICS.sessions },
12503
+ { name: GA4_METRICS.totalUsers }
12482
12504
  ],
12483
12505
  dimensionFilter: {
12484
12506
  orGroup: {
12485
12507
  expressions: SOCIAL_CHANNEL_GROUPS.map((value) => ({
12486
12508
  filter: {
12487
- fieldName: "sessionDefaultChannelGroup",
12509
+ fieldName: GA4_DIMENSIONS.sessionDefaultChannelGroup,
12488
12510
  stringFilter: { matchType: "EXACT", value }
12489
12511
  }
12490
12512
  }))
@@ -12506,6 +12528,7 @@ async function fetchSocialReferrals(accessToken, propertyId, days) {
12506
12528
  rows.push(...pageRows);
12507
12529
  const totalRows = response.rowCount ?? 0;
12508
12530
  offset += pageRows.length;
12531
+ pageCount += 1;
12509
12532
  if (pageRows.length < PAGE_SIZE || offset >= totalRows) break;
12510
12533
  }
12511
12534
  for (const row of rows) {
@@ -12653,7 +12676,8 @@ async function googleRoutes(app, opts) {
12653
12676
  <li>Under "Authorized redirect URIs", add:<br><code style="background:#1e1e1e;color:#e0e0e0;padding:4px 8px;border-radius:4px;display:inline-block;margin-top:4px">${request.query.state ? (() => {
12654
12677
  try {
12655
12678
  const s = verifySignedState(request.query.state, stateSecret);
12656
- return escapeHtml2(String(s?.redirectUri ?? "Could not determine URI"));
12679
+ const uri = s?.redirectUri;
12680
+ return escapeHtml2(typeof uri === "string" ? uri : "Could not determine URI");
12657
12681
  } catch {
12658
12682
  return "Could not determine URI";
12659
12683
  }
@@ -13421,7 +13445,7 @@ async function getCrawlIssues(apiKey, siteUrl) {
13421
13445
  // ../api-routes/src/bing.ts
13422
13446
  function parseBingDate(value) {
13423
13447
  if (!value) return null;
13424
- const match = /\/Date\((-?\d+)[^)]*\)\//.exec(value);
13448
+ const match = /\/Date\((-?\d+)(?:[-+]\d+)?\)\//.exec(value);
13425
13449
  if (!match) return null;
13426
13450
  const ms = parseInt(match[1], 10);
13427
13451
  if (ms <= 0) return null;
@@ -13438,7 +13462,7 @@ function isBlockingIssueType(issueType) {
13438
13462
  if (!issueType) return true;
13439
13463
  const trimmed = issueType.trim();
13440
13464
  if (!trimmed) return true;
13441
- return trimmed.split(/\s+/).some((flag) => !/^(None|Seo(Issues|Concerns))$/i.test(flag));
13465
+ return trimmed.split(/\s+/).some((flag) => !/^(?:None|Seo(?:Issues|Concerns))$/i.test(flag));
13442
13466
  }
13443
13467
  async function loadBlockingCrawlIssues(apiKey, siteUrl, domain) {
13444
13468
  const now = Date.now();
@@ -15163,7 +15187,7 @@ async function fetchText(url) {
15163
15187
  }
15164
15188
  }
15165
15189
  function stripHtml(input) {
15166
- return input.replace(/<script[\s\S]*?<\/script>/gi, " ").replace(/<style[\s\S]*?<\/style>/gi, " ").replace(/<[^>]+>/g, " ").replace(/&nbsp;/gi, " ").replace(/&amp;/gi, "&").replace(/&quot;/gi, '"').replace(/&#39;/gi, "'").replace(/&lt;/gi, "<").replace(/&gt;/gi, ">").replace(/\s+/g, " ").trim();
15190
+ return input.replace(/<script[\s\S]*?<\/script>/gi, " ").replace(/<style[\s\S]*?<\/style>/gi, " ").replace(/<[^>]+>/g, " ").replace(/&nbsp;/gi, " ").replace(/&amp;/gi, "&").replace(/&quot;/gi, '"').replace(/&#39;/g, "'").replace(/&lt;/gi, "<").replace(/&gt;/gi, ">").replace(/\s+/g, " ").trim();
15167
15191
  }
15168
15192
  function extractMetaContent(html, name) {
15169
15193
  const patterns = [
@@ -15183,7 +15207,7 @@ function extractTitle(html) {
15183
15207
  function extractGeneratorVersion(html) {
15184
15208
  const generator = extractMetaContent(html, "generator");
15185
15209
  if (!generator) return null;
15186
- const match = /WordPress\s+([0-9][^ ]*)/i.exec(generator);
15210
+ const match = /WordPress\s+(\d[^ ]*)/i.exec(generator);
15187
15211
  return match?.[1] ?? generator;
15188
15212
  }
15189
15213
  function extractSchemaBlocks(html) {
@@ -15660,7 +15684,7 @@ async function deploySchema(connection, slug, schemas, env) {
15660
15684
  return {
15661
15685
  slug,
15662
15686
  status: "stripped",
15663
- schemasInjected: schemas.map((s) => String(s["@type"] ?? "Unknown")),
15687
+ schemasInjected: schemas.map((s) => typeof s["@type"] === "string" ? s["@type"] : "Unknown"),
15664
15688
  manualAssist: {
15665
15689
  manualRequired: true,
15666
15690
  targetUrl: page.link ?? `${site.siteUrl}/${slug}`,
@@ -15677,7 +15701,7 @@ async function deploySchema(connection, slug, schemas, env) {
15677
15701
  return {
15678
15702
  slug,
15679
15703
  status: "deployed",
15680
- schemasInjected: schemas.map((s) => String(s["@type"] ?? "Unknown"))
15704
+ schemasInjected: schemas.map((s) => typeof s["@type"] === "string" ? s["@type"] : "Unknown")
15681
15705
  };
15682
15706
  } catch (error) {
15683
15707
  if (error instanceof WordpressApiError && error.code === "NOT_FOUND") {
@@ -15737,7 +15761,7 @@ async function getSchemaStatus(connection, env) {
15737
15761
  while ((jsonMatch = jsonLdRegex.exec(match[1])) !== null) {
15738
15762
  try {
15739
15763
  const parsed = JSON.parse(jsonMatch[1].trim());
15740
- canonrySchemas.push(String(parsed["@type"] ?? "Unknown"));
15764
+ canonrySchemas.push(typeof parsed["@type"] === "string" ? parsed["@type"] : "Unknown");
15741
15765
  } catch {
15742
15766
  }
15743
15767
  }
@@ -16468,7 +16492,7 @@ async function wordpressRoutes(app, opts) {
16468
16492
  steps.push({ name: "google-submit", status: "completed", summary: `${succeeded}/${pageUrls.length} URLs submitted` });
16469
16493
  } else {
16470
16494
  const body = JSON.parse(googleRes.body);
16471
- const msg = body.message || body.error || `HTTP ${googleRes.statusCode}`;
16495
+ const msg = body.message ?? body.error ?? `HTTP ${googleRes.statusCode}`;
16472
16496
  if (googleRes.statusCode === 400 || googleRes.statusCode === 404) {
16473
16497
  steps.push({ name: "google-submit", status: "skipped", summary: msg });
16474
16498
  } else {
@@ -16493,7 +16517,7 @@ async function wordpressRoutes(app, opts) {
16493
16517
  steps.push({ name: "bing-submit", status: "completed", summary: `${succeeded}/${pageUrls.length} URLs submitted` });
16494
16518
  } else {
16495
16519
  const body = JSON.parse(bingRes.body);
16496
- const msg = body.message || body.error || `HTTP ${bingRes.statusCode}`;
16520
+ const msg = body.message ?? body.error ?? `HTTP ${bingRes.statusCode}`;
16497
16521
  if (bingRes.statusCode === 400 || bingRes.statusCode === 404) {
16498
16522
  steps.push({ name: "bing-submit", status: "skipped", summary: msg });
16499
16523
  } else {
@@ -17345,7 +17369,7 @@ function numberOrNull(value) {
17345
17369
  }
17346
17370
  function latencyToMs(value) {
17347
17371
  if (!value) return null;
17348
- const secondsMatch = /^([0-9]+(?:\.[0-9]+)?)s$/.exec(value.trim());
17372
+ const secondsMatch = /^(\d+(?:\.\d+)?)s$/.exec(value.trim());
17349
17373
  if (!secondsMatch) return null;
17350
17374
  const seconds = Number(secondsMatch[1]);
17351
17375
  return Number.isFinite(seconds) ? Math.round(seconds * 1e6) / 1e3 : null;
@@ -17512,6 +17536,7 @@ async function listCloudRunTrafficEvents(accessToken, options) {
17512
17536
  }
17513
17537
 
17514
17538
  // ../integration-traffic/src/rules.ts
17539
+ var LEGACY_CHATGPT_DOMAIN = "chat.openai.com";
17515
17540
  var DEFAULT_AI_CRAWLER_RULES = [
17516
17541
  {
17517
17542
  id: "openai-gptbot",
@@ -17606,15 +17631,15 @@ var DEFAULT_AI_CRAWLER_RULES = [
17606
17631
  }
17607
17632
  ];
17608
17633
  var DEFAULT_AI_REFERRER_RULES = [
17609
- { domain: "chatgpt.com", operator: "OpenAI", product: "ChatGPT" },
17610
- { domain: "chat.openai.com", operator: "OpenAI", product: "ChatGPT" },
17611
- { domain: "perplexity.ai", operator: "Perplexity", product: "Perplexity" },
17612
- { domain: "claude.ai", operator: "Anthropic", product: "Claude" },
17613
- { domain: "gemini.google.com", operator: "Google", product: "Gemini" },
17614
- { domain: "copilot.microsoft.com", operator: "Microsoft", product: "Copilot" },
17615
- { domain: "phind.com", operator: "Phind", product: "Phind" },
17616
- { domain: "you.com", operator: "You.com", product: "You.com" },
17617
- { domain: "meta.ai", operator: "Meta", product: "Meta AI" }
17634
+ { domain: AI_ENGINE_DOMAINS.chatgpt, operator: "OpenAI", product: "ChatGPT" },
17635
+ { domain: LEGACY_CHATGPT_DOMAIN, operator: "OpenAI", product: "ChatGPT" },
17636
+ { domain: AI_ENGINE_DOMAINS.perplexity, operator: "Perplexity", product: "Perplexity" },
17637
+ { domain: AI_ENGINE_DOMAINS.claude, operator: "Anthropic", product: "Claude" },
17638
+ { domain: AI_ENGINE_DOMAINS.gemini, operator: "Google", product: "Gemini" },
17639
+ { domain: AI_ENGINE_DOMAINS.copilotMicrosoft, operator: "Microsoft", product: "Copilot" },
17640
+ { domain: AI_ENGINE_DOMAINS.phind, operator: "Phind", product: "Phind" },
17641
+ { domain: AI_ENGINE_DOMAINS.you, operator: "You.com", product: "You.com" },
17642
+ { domain: AI_ENGINE_DOMAINS.metaAi, operator: "Meta", product: "Meta AI" }
17618
17643
  ];
17619
17644
 
17620
17645
  // ../integration-traffic/src/classifier.ts
@@ -21542,7 +21567,7 @@ function extractCitedDomainsFromSources(groundingSources) {
21542
21567
  }
21543
21568
  function extractDomainFromTitle(title) {
21544
21569
  const trimmed = title.trim().toLowerCase();
21545
- if (/^[a-z0-9]([a-z0-9-]*[a-z0-9])?(\.[a-z]{2,})+$/.test(trimmed)) {
21570
+ if (/^[a-z0-9](?:[a-z0-9-]*[a-z0-9])?(?:\.[a-z]{2,})+$/.test(trimmed)) {
21546
21571
  return trimmed.replace(/^www\./, "");
21547
21572
  }
21548
21573
  return null;
@@ -21551,10 +21576,10 @@ function extractDomainFromUri(uri) {
21551
21576
  try {
21552
21577
  const url = new URL(uri);
21553
21578
  const hostname = url.hostname.replace(/^www\./, "").toLowerCase();
21554
- if (hostname.includes("chatgpt.com") || hostname.includes("openai.com")) {
21579
+ if (AI_ENGINE_SELF_DOMAINS.chatgpt.some((self) => hostname.includes(self))) {
21555
21580
  return null;
21556
21581
  }
21557
- if (hostname === "vertexaisearch.cloud.google.com") {
21582
+ if (hostname === VERTEX_AI_SEARCH_PROXY_DOMAIN) {
21558
21583
  const redirectPath = url.pathname.replace(/^\/grounding-api-redirect\//, "");
21559
21584
  if (redirectPath && redirectPath !== url.pathname) {
21560
21585
  try {
@@ -21984,7 +22009,7 @@ function extractDomainFromUri2(uri) {
21984
22009
  try {
21985
22010
  const url = new URL(uri);
21986
22011
  const hostname = url.hostname.replace(/^www\./, "").toLowerCase();
21987
- if (hostname.includes("chatgpt.com") || hostname.includes("openai.com")) {
22012
+ if (AI_ENGINE_SELF_DOMAINS.chatgpt.some((self) => hostname.includes(self))) {
21988
22013
  return null;
21989
22014
  }
21990
22015
  return hostname;
@@ -22363,7 +22388,7 @@ function extractDomainFromUri3(uri) {
22363
22388
  try {
22364
22389
  const url = new URL(uri);
22365
22390
  const hostname = url.hostname.replace(/^www\./, "").toLowerCase();
22366
- if (hostname.includes("chatgpt.com") || hostname.includes("openai.com")) {
22391
+ if (AI_ENGINE_SELF_DOMAINS.chatgpt.some((self) => hostname.includes(self))) {
22367
22392
  return null;
22368
22393
  }
22369
22394
  return hostname;
@@ -22908,8 +22933,8 @@ async function waitForStabilization(client, selector, opts = {}) {
22908
22933
  // ../provider-cdp/src/targets/chatgpt.ts
22909
22934
  var chatgptTarget = {
22910
22935
  name: "chatgpt",
22911
- baseUrl: "https://chatgpt.com",
22912
- newConversationUrl: "https://chatgpt.com/?model=auto",
22936
+ baseUrl: `https://${AI_ENGINE_DOMAINS.chatgpt}`,
22937
+ newConversationUrl: `https://${AI_ENGINE_DOMAINS.chatgpt}/?model=auto`,
22913
22938
  responseSelector: '[data-testid="conversation-turn-3"], article:last-of-type, .agent-turn:last-of-type',
22914
22939
  async submitQuery(client, query) {
22915
22940
  const inputReady = await waitForElement(
@@ -23013,8 +23038,10 @@ var chatgptTarget = {
23013
23038
  },
23014
23039
  extractCitations(client) {
23015
23040
  return (async () => {
23041
+ const selfDomainsLiteral = JSON.stringify(AI_ENGINE_SELF_DOMAINS.chatgpt);
23016
23042
  const { result } = await client.Runtime.evaluate({
23017
23043
  expression: `(() => {
23044
+ const SELF_DOMAINS = ${selfDomainsLiteral};
23018
23045
  const sources = [];
23019
23046
  const seen = new Set();
23020
23047
  const turns = document.querySelectorAll('[data-message-author-role="assistant"]');
@@ -23038,7 +23065,7 @@ var chatgptTarget = {
23038
23065
  }
23039
23066
  }
23040
23067
 
23041
- if (!seen.has(href) && hostname !== 'chatgpt.com' && hostname !== 'openai.com') {
23068
+ if (!seen.has(href) && !SELF_DOMAINS.includes(hostname)) {
23042
23069
  seen.add(href);
23043
23070
  sources.push({
23044
23071
  uri: href,
@@ -23055,7 +23082,7 @@ var chatgptTarget = {
23055
23082
  if (href && !seen.has(href)) {
23056
23083
  let hostname = '';
23057
23084
  try { hostname = new URL(href).hostname.replace(/^www\\./, ''); } catch {}
23058
- if (hostname !== 'chatgpt.com' && hostname !== 'openai.com') {
23085
+ if (!SELF_DOMAINS.includes(hostname)) {
23059
23086
  seen.add(href);
23060
23087
  sources.push({ uri: href, title: title || hostname || href });
23061
23088
  }
@@ -23145,7 +23172,7 @@ function extractCitedDomains(groundingSources) {
23145
23172
  try {
23146
23173
  const url = new URL(source.uri);
23147
23174
  const domain = url.hostname.replace(/^www\./, "").toLowerCase();
23148
- if (!domain.includes("chatgpt.com") && !domain.includes("openai.com")) {
23175
+ if (!AI_ENGINE_SELF_DOMAINS.chatgpt.some((self) => domain.includes(self))) {
23149
23176
  domains.add(domain);
23150
23177
  }
23151
23178
  } catch {
@@ -23156,7 +23183,7 @@ function extractCitedDomains(groundingSources) {
23156
23183
  return [...domains];
23157
23184
  }
23158
23185
  function extractDomainFromTitle2(title) {
23159
- const domainPattern = /^([a-z0-9]([a-z0-9-]*[a-z0-9])?\.)+[a-z]{2,}$/i;
23186
+ const domainPattern = /^(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z]{2,}$/i;
23160
23187
  const firstWord = title.split(/[\s\-–—|]/)[0]?.trim();
23161
23188
  if (firstWord && domainPattern.test(firstWord)) {
23162
23189
  return firstWord.replace(/^www\./, "").toLowerCase();
@@ -23530,7 +23557,7 @@ function extractDomainFromUri4(uri) {
23530
23557
  try {
23531
23558
  const url = new URL(uri);
23532
23559
  const hostname = url.hostname.replace(/^www\./, "").toLowerCase();
23533
- if (hostname.includes("chatgpt.com") || hostname.includes("openai.com")) {
23560
+ if (AI_ENGINE_SELF_DOMAINS.chatgpt.some((self) => hostname.includes(self))) {
23534
23561
  return null;
23535
23562
  }
23536
23563
  return hostname;
@@ -24016,10 +24043,10 @@ function extractRecommendedCompetitors(answerText, ownDomains, citedDomains, com
24016
24043
  );
24017
24044
  if (knownCompetitorKeys.size === 0) return [];
24018
24045
  const candidatePatterns = [
24019
- /^\s*(?:[-*]|\d+\.)\s+(?:\*\*)?([A-Z0-9][A-Za-z0-9][\w\s.&',/()-]{1,50}?)(?:\*\*)?\s*[:\u2014\u2013–-]/gm,
24020
- /\*\*([A-Z0-9][A-Za-z0-9][\w\s.&',/()-]{1,50}?)\*\*/g,
24046
+ /^\s*(?:[-*]|\d+\.)\s+(?:\*\*)?([A-Z0-9][A-Za-z0-9][\w\s.&',/()-]{1,50}?)(?:\*\*)?\s*[:\u2014\u2013-]/gm,
24047
+ /\*\*([A-Z0-9][A-Za-z0-9][\w\s.&',/()-]{1,50})\*\*/g,
24021
24048
  /^#{1,4}\s+(?:\d+\.\s+)?(?:\*\*)?([A-Z0-9][A-Za-z0-9][\w\s.&',/()-]{1,50}?)(?:\*\*)?$/gm,
24022
- /\[([A-Z0-9][A-Za-z0-9][\w\s.&',/()-]{1,50}?)\]\(https?:\/\/[^\s)]+\)/g
24049
+ /\[([A-Z0-9][A-Za-z0-9][\w\s.&',/()-]{1,50})\]\(https?:\/\/[^\s)]+\)/g
24023
24050
  ];
24024
24051
  const genericKeys = /* @__PURE__ */ new Set([
24025
24052
  "additional",
@@ -24814,7 +24841,7 @@ import { eq as eq29, and as and20 } from "drizzle-orm";
24814
24841
 
24815
24842
  // src/sitemap-parser.ts
24816
24843
  var log3 = createLogger("SitemapParser");
24817
- var LOC_REGEX = /<loc>\s*([^<]+?)\s*<\/loc>/gi;
24844
+ var LOC_REGEX = /<loc>([^<]+)<\/loc>/gi;
24818
24845
  var SITEMAP_TAG_REGEX = /<sitemap>[\s\S]*?<\/sitemap>/gi;
24819
24846
  var PRIVATE_IP_PATTERNS = [
24820
24847
  /^169\.254\./,
@@ -24905,9 +24932,10 @@ async function parseSitemapRecursive(url, urls, visited, depth, isChild) {
24905
24932
  for (const entry of sitemapEntries) {
24906
24933
  const locMatch = LOC_REGEX.exec(entry);
24907
24934
  LOC_REGEX.lastIndex = 0;
24908
- if (locMatch?.[1]) {
24935
+ const inner = locMatch?.[1]?.trim();
24936
+ if (inner) {
24909
24937
  await parseSitemapRecursive(
24910
- locMatch[1],
24938
+ inner,
24911
24939
  urls,
24912
24940
  visited,
24913
24941
  depth + 1,
@@ -24920,8 +24948,9 @@ async function parseSitemapRecursive(url, urls, visited, depth, isChild) {
24920
24948
  }
24921
24949
  let match;
24922
24950
  while ((match = LOC_REGEX.exec(xml)) !== null) {
24923
- if (match[1]) {
24924
- urls.add(match[1]);
24951
+ const inner = match[1]?.trim();
24952
+ if (inner) {
24953
+ urls.add(inner);
24925
24954
  }
24926
24955
  }
24927
24956
  LOC_REGEX.lastIndex = 0;
@@ -25054,7 +25083,7 @@ import { eq as eq30, desc as desc14 } from "drizzle-orm";
25054
25083
  var log5 = createLogger("BingInspectSitemap");
25055
25084
  function parseBingDate2(value) {
25056
25085
  if (!value) return null;
25057
- const match = /\/Date\((-?\d+)[^)]*\)\//.exec(value);
25086
+ const match = /\/Date\((-?\d+)(?:[-+]\d+)?\)\//.exec(value);
25058
25087
  if (!match) return null;
25059
25088
  const ms = parseInt(match[1], 10);
25060
25089
  if (ms <= 0) return null;
@@ -25064,7 +25093,7 @@ function isBlockingIssueType2(issueType) {
25064
25093
  if (!issueType) return true;
25065
25094
  const trimmed = issueType.trim();
25066
25095
  if (!trimmed) return true;
25067
- return trimmed.split(/\s+/).some((flag) => !/^(None|Seo(Issues|Concerns))$/i.test(flag));
25096
+ return trimmed.split(/\s+/).some((flag) => !/^(?:None|Seo(?:Issues|Concerns))$/i.test(flag));
25068
25097
  }
25069
25098
  async function executeBingInspectSitemap(db, runId, projectId, opts) {
25070
25099
  const startedAt = (/* @__PURE__ */ new Date()).toISOString();
@@ -25781,7 +25810,7 @@ function parseQueryLines(text, max) {
25781
25810
  if (!line) continue;
25782
25811
  line = line.replace(/^\s*(?:\d+[.)]\s*|[-*•]\s*)/, "").replace(/^["']|["']$/g, "").trim();
25783
25812
  if (!line) continue;
25784
- if (/^(here are|sure|certainly|of course|i['']ve|these are|below are)/i.test(line)) continue;
25813
+ if (/^(?:here are|sure|certainly|of course|i[']ve|these are|below are)/i.test(line)) continue;
25785
25814
  const key = line.toLowerCase();
25786
25815
  if (seen.has(key)) continue;
25787
25816
  seen.add(key);
@@ -26314,7 +26343,7 @@ function readStoredGroundingSources(rawResponse) {
26314
26343
  return result;
26315
26344
  }
26316
26345
  async function backfillInsightsCommand(project, opts) {
26317
- const { IntelligenceService: IntelligenceService2 } = await import("./intelligence-service-YFBVWCK2.js");
26346
+ const { IntelligenceService: IntelligenceService2 } = await import("./intelligence-service-ADZRFCGO.js");
26318
26347
  const config = loadConfig();
26319
26348
  const db = createClient(config.database);
26320
26349
  migrate(db);
@@ -26529,8 +26558,8 @@ var Scheduler = class {
26529
26558
  }
26530
26559
  }
26531
26560
  stopTask(key, task, verb) {
26532
- task.stop();
26533
- task.destroy();
26561
+ void task.stop();
26562
+ void task.destroy();
26534
26563
  log9.info(`task.${verb.toLowerCase()}`, { key });
26535
26564
  }
26536
26565
  registerCronTask(schedule) {
@@ -27093,7 +27122,7 @@ function parseDescription(body) {
27093
27122
  if (end === -1) return "(no description)";
27094
27123
  const block = body.slice(3, end);
27095
27124
  for (const line of block.split("\n")) {
27096
- const match = line.match(/^description:\s*(.+)$/);
27125
+ const match = line.match(/^description:\s*(\S.*)$/);
27097
27126
  if (match) return match[1].trim().replace(/^["']|["']$/g, "");
27098
27127
  }
27099
27128
  return "(no description)";
@@ -28797,7 +28826,7 @@ function mapAuditFactor(factor) {
28797
28826
  };
28798
28827
  }
28799
28828
  function parseJsonObject(input) {
28800
- const fenced = input.match(/```(?:json)?\s*([\s\S]*?)```/i);
28829
+ const fenced = input.match(/```(?:json)?([\s\S]*?)```/i);
28801
28830
  const candidate = fenced?.[1] ?? input;
28802
28831
  const start = candidate.indexOf("{");
28803
28832
  const end = candidate.lastIndexOf("}");
@@ -28864,7 +28893,6 @@ var API_ADAPTERS = [
28864
28893
  var BROWSER_ADAPTERS = [
28865
28894
  cdpChatgptAdapter
28866
28895
  ];
28867
- var ALL_ADAPTERS = [...API_ADAPTERS, ...BROWSER_ADAPTERS];
28868
28896
  var adapterMap = Object.fromEntries(
28869
28897
  API_ADAPTERS.map((a) => [a.name, a])
28870
28898
  );
@@ -29880,7 +29908,7 @@ function parseQueryResponse(raw, count) {
29880
29908
  let cleaned = line.replace(/^\s*(?:\d+[.)]\s*|[-*•]\s*)/, "").trim();
29881
29909
  cleaned = cleaned.replace(/^["']|["']$/g, "").trim();
29882
29910
  if (!cleaned) continue;
29883
- if (/^(here are|sure|certainly|of course|i've|these are|below are)/i.test(cleaned)) continue;
29911
+ if (/^(?:here are|sure|certainly|of course|i['’]ve|these are|below are)/i.test(cleaned)) continue;
29884
29912
  if (cleaned.split(/\s+/).length > 8) continue;
29885
29913
  const key = cleaned.toLowerCase();
29886
29914
  if (seen.has(key)) continue;