@ainyc/canonry 2.4.3 → 2.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1036,7 +1036,22 @@ var MIGRATIONS = [
1036
1036
  // v42: Per-project auto-extract toggle — when a release sync transitions
1037
1037
  // to ready, projects with this flag get a backlink-extract run enqueued.
1038
1038
  // Stored as INTEGER (0/1) to match SQLite boolean convention.
1039
- `ALTER TABLE projects ADD COLUMN auto_extract_backlinks INTEGER NOT NULL DEFAULT 0`
1039
+ `ALTER TABLE projects ADD COLUMN auto_extract_backlinks INTEGER NOT NULL DEFAULT 0`,
1040
+ // v43: Backfill bing_url_inspections.in_index using the new crawl-signal
1041
+ // decision tree. Legacy rows were classified with the retired Bing `InIndex`
1042
+ // flag plus a DocumentSize>0 check, which mis-classifies URLs that modern
1043
+ // Bing returns with DocumentSize=0 but a valid LastCrawledDate. Use a
1044
+ // created_at cutoff so rows written by the new code (which applies a live
1045
+ // GetCrawlIssues demotion that can't be replayed offline) are preserved.
1046
+ `UPDATE bing_url_inspections
1047
+ SET in_index = CASE
1048
+ WHEN document_size IS NOT NULL AND document_size > 0 THEN 1
1049
+ WHEN last_crawled_date IS NOT NULL AND http_code IS NOT NULL AND http_code >= 400 THEN 0
1050
+ WHEN last_crawled_date IS NOT NULL THEN 1
1051
+ WHEN discovery_date IS NOT NULL THEN 0
1052
+ ELSE NULL
1053
+ END
1054
+ WHERE created_at < '2026-04-22T00:00:00Z'`
1040
1055
  ];
1041
1056
  function isDuplicateColumnError(err) {
1042
1057
  if (!(err instanceof Error)) return false;
@@ -30,7 +30,7 @@ import {
30
30
  runs,
31
31
  schedules,
32
32
  usageCounters
33
- } from "./chunk-GZF3YIHY.js";
33
+ } from "./chunk-32YTAZBL.js";
34
34
 
35
35
  // src/config.ts
36
36
  import fs from "fs";
@@ -8148,6 +8148,13 @@ async function getKeywordStats(apiKey, siteUrl) {
8148
8148
  const data = await bingFetch(apiKey, `GetQueryStats?siteUrl=${encodedSite}`);
8149
8149
  return data ?? [];
8150
8150
  }
8151
+ async function getCrawlIssues(apiKey, siteUrl) {
8152
+ validateApiKey(apiKey);
8153
+ validateSiteUrl2(siteUrl);
8154
+ const encodedSite = encodeURIComponent(siteUrl);
8155
+ const data = await bingFetch(apiKey, `GetCrawlIssues?siteUrl=${encodedSite}`);
8156
+ return data ?? [];
8157
+ }
8151
8158
 
8152
8159
  // ../api-routes/src/bing.ts
8153
8160
  function parseBingDate(value) {
@@ -8163,6 +8170,30 @@ function bingLog(level, action, ctx) {
8163
8170
  const stream = level === "error" ? process.stderr : process.stdout;
8164
8171
  stream.write(JSON.stringify(entry) + "\n");
8165
8172
  }
8173
+ var CRAWL_ISSUES_CACHE_TTL_MS = 6e4;
8174
+ var crawlIssuesCache = /* @__PURE__ */ new Map();
8175
+ function isBlockingIssueType(issueType) {
8176
+ if (!issueType) return true;
8177
+ const trimmed = issueType.trim();
8178
+ if (!trimmed) return true;
8179
+ return trimmed.split(/\s+/).some((flag) => !/^(None|Seo(Issues|Concerns))$/i.test(flag));
8180
+ }
8181
+ async function loadBlockingCrawlIssues(apiKey, siteUrl, domain) {
8182
+ const now = Date.now();
8183
+ const cached = crawlIssuesCache.get(domain);
8184
+ if (cached && now - cached.fetchedAt < CRAWL_ISSUES_CACHE_TTL_MS) {
8185
+ return cached.blockedUrls;
8186
+ }
8187
+ const issues = await getCrawlIssues(apiKey, siteUrl);
8188
+ const blockedUrls = /* @__PURE__ */ new Set();
8189
+ for (const issue of issues) {
8190
+ if (issue.Url && isBlockingIssueType(issue.IssueType ?? null)) {
8191
+ blockedUrls.add(issue.Url);
8192
+ }
8193
+ }
8194
+ crawlIssuesCache.set(domain, { blockedUrls, fetchedAt: now });
8195
+ return blockedUrls;
8196
+ }
8166
8197
  async function bingRoutes(app, opts) {
8167
8198
  function requireConnectionStore() {
8168
8199
  if (opts.bingConnectionStore) return opts.bingConnectionStore;
@@ -8411,22 +8442,38 @@ async function bingRoutes(app, opts) {
8411
8442
  domain: project.canonicalDomain,
8412
8443
  url,
8413
8444
  httpStatus: result.HttpStatus ?? result.HttpCode ?? null,
8414
- inIndex: result.InIndex ?? null,
8415
8445
  documentSize: result.DocumentSize ?? null,
8416
- lastCrawledDate: result.LastCrawledDate ?? null
8446
+ lastCrawledDate: result.LastCrawledDate ?? null,
8447
+ discoveryDate: result.DiscoveryDate ?? null
8417
8448
  });
8418
8449
  const now = (/* @__PURE__ */ new Date()).toISOString();
8419
8450
  const id = crypto15.randomUUID();
8420
8451
  const httpCode = result.HttpStatus ?? result.HttpCode ?? null;
8421
- let derivedInIndex = null;
8422
- if (result.InIndex != null) {
8423
- derivedInIndex = result.InIndex;
8424
- } else if (result.DocumentSize != null && result.DocumentSize > 0) {
8425
- derivedInIndex = true;
8426
- }
8427
8452
  const lastCrawledDate = parseBingDate(result.LastCrawledDate);
8428
8453
  const inIndexDate = parseBingDate(result.InIndexDate);
8429
8454
  const discoveryDate = parseBingDate(result.DiscoveryDate);
8455
+ let derivedInIndex = null;
8456
+ if (result.DocumentSize != null && result.DocumentSize > 0) {
8457
+ derivedInIndex = true;
8458
+ } else if (lastCrawledDate != null) {
8459
+ const httpStatus = result.HttpStatus ?? result.HttpCode;
8460
+ derivedInIndex = httpStatus != null && httpStatus >= 400 ? false : true;
8461
+ } else if (discoveryDate != null) {
8462
+ derivedInIndex = false;
8463
+ }
8464
+ if (derivedInIndex === true) {
8465
+ try {
8466
+ const blockedUrls = await loadBlockingCrawlIssues(conn.apiKey, conn.siteUrl, project.canonicalDomain);
8467
+ if (blockedUrls.has(url)) {
8468
+ derivedInIndex = false;
8469
+ }
8470
+ } catch (e) {
8471
+ bingLog("warn", "inspect-url.crawl-issues-lookup-failed", {
8472
+ domain: project.canonicalDomain,
8473
+ error: e instanceof Error ? e.message : String(e)
8474
+ });
8475
+ }
8476
+ }
8430
8477
  app.db.insert(bingUrlInspections).values({
8431
8478
  id,
8432
8479
  projectId: project.id,
package/dist/cli.js CHANGED
@@ -38,7 +38,7 @@ import {
38
38
  showFirstRunNotice,
39
39
  trackEvent,
40
40
  usageError
41
- } from "./chunk-KGOT5OFT.js";
41
+ } from "./chunk-6UY2PETG.js";
42
42
  import {
43
43
  apiKeys,
44
44
  competitors,
@@ -48,7 +48,7 @@ import {
48
48
  projects,
49
49
  querySnapshots,
50
50
  runs
51
- } from "./chunk-GZF3YIHY.js";
51
+ } from "./chunk-32YTAZBL.js";
52
52
 
53
53
  // src/cli.ts
54
54
  import { pathToFileURL } from "url";
@@ -295,7 +295,7 @@ async function backfillAnswerVisibilityCommand(opts) {
295
295
  console.log(` Errors: ${providerErrors}`);
296
296
  }
297
297
  async function backfillInsightsCommand(project, opts) {
298
- const { IntelligenceService } = await import("./intelligence-service-KM64AW7J.js");
298
+ const { IntelligenceService } = await import("./intelligence-service-U7YQ4NXV.js");
299
299
  const config = loadConfig();
300
300
  const db = createClient(config.database);
301
301
  migrate(db);
package/dist/index.js CHANGED
@@ -1,8 +1,8 @@
1
1
  import {
2
2
  createServer,
3
3
  loadConfig
4
- } from "./chunk-KGOT5OFT.js";
5
- import "./chunk-GZF3YIHY.js";
4
+ } from "./chunk-6UY2PETG.js";
5
+ import "./chunk-32YTAZBL.js";
6
6
  export {
7
7
  createServer,
8
8
  loadConfig
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  IntelligenceService
3
- } from "./chunk-GZF3YIHY.js";
3
+ } from "./chunk-32YTAZBL.js";
4
4
  export {
5
5
  IntelligenceService
6
6
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ainyc/canonry",
3
- "version": "2.4.3",
3
+ "version": "2.4.5",
4
4
  "type": "module",
5
5
  "description": "The ultimate open-source AEO monitoring tool - track how answer engines cite your domain",
6
6
  "license": "FSL-1.1-ALv2",
@@ -57,21 +57,21 @@
57
57
  "@types/node-cron": "^3.0.11",
58
58
  "tsup": "^8.5.1",
59
59
  "tsx": "^4.19.0",
60
+ "@ainyc/canonry-db": "0.0.0",
60
61
  "@ainyc/canonry-api-routes": "0.0.0",
61
- "@ainyc/canonry-config": "0.0.0",
62
+ "@ainyc/canonry-intelligence": "0.0.0",
62
63
  "@ainyc/canonry-contracts": "0.0.0",
63
- "@ainyc/canonry-db": "0.0.0",
64
64
  "@ainyc/canonry-integration-bing": "0.0.0",
65
- "@ainyc/canonry-intelligence": "0.0.0",
65
+ "@ainyc/canonry-config": "0.0.0",
66
66
  "@ainyc/canonry-integration-commoncrawl": "0.0.0",
67
67
  "@ainyc/canonry-integration-google": "0.0.0",
68
68
  "@ainyc/canonry-integration-wordpress": "0.0.0",
69
- "@ainyc/canonry-provider-claude": "0.0.0",
70
- "@ainyc/canonry-provider-cdp": "0.0.0",
71
69
  "@ainyc/canonry-provider-gemini": "0.0.0",
72
70
  "@ainyc/canonry-provider-local": "0.0.0",
71
+ "@ainyc/canonry-provider-cdp": "0.0.0",
73
72
  "@ainyc/canonry-provider-openai": "0.0.0",
74
- "@ainyc/canonry-provider-perplexity": "0.0.0"
73
+ "@ainyc/canonry-provider-perplexity": "0.0.0",
74
+ "@ainyc/canonry-provider-claude": "0.0.0"
75
75
  },
76
76
  "scripts": {
77
77
  "build": "tsx scripts/copy-agent-assets.ts && tsup && tsx build-web.ts",