@ainyc/canonry 2.4.3 → 2.4.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1036,7 +1036,22 @@ var MIGRATIONS = [
|
|
|
1036
1036
|
// v42: Per-project auto-extract toggle — when a release sync transitions
|
|
1037
1037
|
// to ready, projects with this flag get a backlink-extract run enqueued.
|
|
1038
1038
|
// Stored as INTEGER (0/1) to match SQLite boolean convention.
|
|
1039
|
-
`ALTER TABLE projects ADD COLUMN auto_extract_backlinks INTEGER NOT NULL DEFAULT 0
|
|
1039
|
+
`ALTER TABLE projects ADD COLUMN auto_extract_backlinks INTEGER NOT NULL DEFAULT 0`,
|
|
1040
|
+
// v43: Backfill bing_url_inspections.in_index using the new crawl-signal
|
|
1041
|
+
// decision tree. Legacy rows were classified with the retired Bing `InIndex`
|
|
1042
|
+
// flag plus a DocumentSize>0 check, which mis-classifies URLs that modern
|
|
1043
|
+
// Bing returns with DocumentSize=0 but a valid LastCrawledDate. Use a
|
|
1044
|
+
// created_at cutoff so rows written by the new code (which applies a live
|
|
1045
|
+
// GetCrawlIssues demotion that can't be replayed offline) are preserved.
|
|
1046
|
+
`UPDATE bing_url_inspections
|
|
1047
|
+
SET in_index = CASE
|
|
1048
|
+
WHEN document_size IS NOT NULL AND document_size > 0 THEN 1
|
|
1049
|
+
WHEN last_crawled_date IS NOT NULL AND http_code IS NOT NULL AND http_code >= 400 THEN 0
|
|
1050
|
+
WHEN last_crawled_date IS NOT NULL THEN 1
|
|
1051
|
+
WHEN discovery_date IS NOT NULL THEN 0
|
|
1052
|
+
ELSE NULL
|
|
1053
|
+
END
|
|
1054
|
+
WHERE created_at < '2026-04-22T00:00:00Z'`
|
|
1040
1055
|
];
|
|
1041
1056
|
function isDuplicateColumnError(err) {
|
|
1042
1057
|
if (!(err instanceof Error)) return false;
|
|
@@ -30,7 +30,7 @@ import {
|
|
|
30
30
|
runs,
|
|
31
31
|
schedules,
|
|
32
32
|
usageCounters
|
|
33
|
-
} from "./chunk-
|
|
33
|
+
} from "./chunk-32YTAZBL.js";
|
|
34
34
|
|
|
35
35
|
// src/config.ts
|
|
36
36
|
import fs from "fs";
|
|
@@ -8148,6 +8148,13 @@ async function getKeywordStats(apiKey, siteUrl) {
|
|
|
8148
8148
|
const data = await bingFetch(apiKey, `GetQueryStats?siteUrl=${encodedSite}`);
|
|
8149
8149
|
return data ?? [];
|
|
8150
8150
|
}
|
|
8151
|
+
async function getCrawlIssues(apiKey, siteUrl) {
|
|
8152
|
+
validateApiKey(apiKey);
|
|
8153
|
+
validateSiteUrl2(siteUrl);
|
|
8154
|
+
const encodedSite = encodeURIComponent(siteUrl);
|
|
8155
|
+
const data = await bingFetch(apiKey, `GetCrawlIssues?siteUrl=${encodedSite}`);
|
|
8156
|
+
return data ?? [];
|
|
8157
|
+
}
|
|
8151
8158
|
|
|
8152
8159
|
// ../api-routes/src/bing.ts
|
|
8153
8160
|
function parseBingDate(value) {
|
|
@@ -8163,6 +8170,30 @@ function bingLog(level, action, ctx) {
|
|
|
8163
8170
|
const stream = level === "error" ? process.stderr : process.stdout;
|
|
8164
8171
|
stream.write(JSON.stringify(entry) + "\n");
|
|
8165
8172
|
}
|
|
8173
|
+
var CRAWL_ISSUES_CACHE_TTL_MS = 6e4;
|
|
8174
|
+
var crawlIssuesCache = /* @__PURE__ */ new Map();
|
|
8175
|
+
function isBlockingIssueType(issueType) {
|
|
8176
|
+
if (!issueType) return true;
|
|
8177
|
+
const trimmed = issueType.trim();
|
|
8178
|
+
if (!trimmed) return true;
|
|
8179
|
+
return trimmed.split(/\s+/).some((flag) => !/^(None|Seo(Issues|Concerns))$/i.test(flag));
|
|
8180
|
+
}
|
|
8181
|
+
async function loadBlockingCrawlIssues(apiKey, siteUrl, domain) {
|
|
8182
|
+
const now = Date.now();
|
|
8183
|
+
const cached = crawlIssuesCache.get(domain);
|
|
8184
|
+
if (cached && now - cached.fetchedAt < CRAWL_ISSUES_CACHE_TTL_MS) {
|
|
8185
|
+
return cached.blockedUrls;
|
|
8186
|
+
}
|
|
8187
|
+
const issues = await getCrawlIssues(apiKey, siteUrl);
|
|
8188
|
+
const blockedUrls = /* @__PURE__ */ new Set();
|
|
8189
|
+
for (const issue of issues) {
|
|
8190
|
+
if (issue.Url && isBlockingIssueType(issue.IssueType ?? null)) {
|
|
8191
|
+
blockedUrls.add(issue.Url);
|
|
8192
|
+
}
|
|
8193
|
+
}
|
|
8194
|
+
crawlIssuesCache.set(domain, { blockedUrls, fetchedAt: now });
|
|
8195
|
+
return blockedUrls;
|
|
8196
|
+
}
|
|
8166
8197
|
async function bingRoutes(app, opts) {
|
|
8167
8198
|
function requireConnectionStore() {
|
|
8168
8199
|
if (opts.bingConnectionStore) return opts.bingConnectionStore;
|
|
@@ -8411,22 +8442,38 @@ async function bingRoutes(app, opts) {
|
|
|
8411
8442
|
domain: project.canonicalDomain,
|
|
8412
8443
|
url,
|
|
8413
8444
|
httpStatus: result.HttpStatus ?? result.HttpCode ?? null,
|
|
8414
|
-
inIndex: result.InIndex ?? null,
|
|
8415
8445
|
documentSize: result.DocumentSize ?? null,
|
|
8416
|
-
lastCrawledDate: result.LastCrawledDate ?? null
|
|
8446
|
+
lastCrawledDate: result.LastCrawledDate ?? null,
|
|
8447
|
+
discoveryDate: result.DiscoveryDate ?? null
|
|
8417
8448
|
});
|
|
8418
8449
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
8419
8450
|
const id = crypto15.randomUUID();
|
|
8420
8451
|
const httpCode = result.HttpStatus ?? result.HttpCode ?? null;
|
|
8421
|
-
let derivedInIndex = null;
|
|
8422
|
-
if (result.InIndex != null) {
|
|
8423
|
-
derivedInIndex = result.InIndex;
|
|
8424
|
-
} else if (result.DocumentSize != null && result.DocumentSize > 0) {
|
|
8425
|
-
derivedInIndex = true;
|
|
8426
|
-
}
|
|
8427
8452
|
const lastCrawledDate = parseBingDate(result.LastCrawledDate);
|
|
8428
8453
|
const inIndexDate = parseBingDate(result.InIndexDate);
|
|
8429
8454
|
const discoveryDate = parseBingDate(result.DiscoveryDate);
|
|
8455
|
+
let derivedInIndex = null;
|
|
8456
|
+
if (result.DocumentSize != null && result.DocumentSize > 0) {
|
|
8457
|
+
derivedInIndex = true;
|
|
8458
|
+
} else if (lastCrawledDate != null) {
|
|
8459
|
+
const httpStatus = result.HttpStatus ?? result.HttpCode;
|
|
8460
|
+
derivedInIndex = httpStatus != null && httpStatus >= 400 ? false : true;
|
|
8461
|
+
} else if (discoveryDate != null) {
|
|
8462
|
+
derivedInIndex = false;
|
|
8463
|
+
}
|
|
8464
|
+
if (derivedInIndex === true) {
|
|
8465
|
+
try {
|
|
8466
|
+
const blockedUrls = await loadBlockingCrawlIssues(conn.apiKey, conn.siteUrl, project.canonicalDomain);
|
|
8467
|
+
if (blockedUrls.has(url)) {
|
|
8468
|
+
derivedInIndex = false;
|
|
8469
|
+
}
|
|
8470
|
+
} catch (e) {
|
|
8471
|
+
bingLog("warn", "inspect-url.crawl-issues-lookup-failed", {
|
|
8472
|
+
domain: project.canonicalDomain,
|
|
8473
|
+
error: e instanceof Error ? e.message : String(e)
|
|
8474
|
+
});
|
|
8475
|
+
}
|
|
8476
|
+
}
|
|
8430
8477
|
app.db.insert(bingUrlInspections).values({
|
|
8431
8478
|
id,
|
|
8432
8479
|
projectId: project.id,
|
package/dist/cli.js
CHANGED
|
@@ -38,7 +38,7 @@ import {
|
|
|
38
38
|
showFirstRunNotice,
|
|
39
39
|
trackEvent,
|
|
40
40
|
usageError
|
|
41
|
-
} from "./chunk-
|
|
41
|
+
} from "./chunk-6UY2PETG.js";
|
|
42
42
|
import {
|
|
43
43
|
apiKeys,
|
|
44
44
|
competitors,
|
|
@@ -48,7 +48,7 @@ import {
|
|
|
48
48
|
projects,
|
|
49
49
|
querySnapshots,
|
|
50
50
|
runs
|
|
51
|
-
} from "./chunk-
|
|
51
|
+
} from "./chunk-32YTAZBL.js";
|
|
52
52
|
|
|
53
53
|
// src/cli.ts
|
|
54
54
|
import { pathToFileURL } from "url";
|
|
@@ -295,7 +295,7 @@ async function backfillAnswerVisibilityCommand(opts) {
|
|
|
295
295
|
console.log(` Errors: ${providerErrors}`);
|
|
296
296
|
}
|
|
297
297
|
async function backfillInsightsCommand(project, opts) {
|
|
298
|
-
const { IntelligenceService } = await import("./intelligence-service-
|
|
298
|
+
const { IntelligenceService } = await import("./intelligence-service-U7YQ4NXV.js");
|
|
299
299
|
const config = loadConfig();
|
|
300
300
|
const db = createClient(config.database);
|
|
301
301
|
migrate(db);
|
package/dist/index.js
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ainyc/canonry",
|
|
3
|
-
"version": "2.4.
|
|
3
|
+
"version": "2.4.5",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "The ultimate open-source AEO monitoring tool - track how answer engines cite your domain",
|
|
6
6
|
"license": "FSL-1.1-ALv2",
|
|
@@ -57,21 +57,21 @@
|
|
|
57
57
|
"@types/node-cron": "^3.0.11",
|
|
58
58
|
"tsup": "^8.5.1",
|
|
59
59
|
"tsx": "^4.19.0",
|
|
60
|
+
"@ainyc/canonry-db": "0.0.0",
|
|
60
61
|
"@ainyc/canonry-api-routes": "0.0.0",
|
|
61
|
-
"@ainyc/canonry-
|
|
62
|
+
"@ainyc/canonry-intelligence": "0.0.0",
|
|
62
63
|
"@ainyc/canonry-contracts": "0.0.0",
|
|
63
|
-
"@ainyc/canonry-db": "0.0.0",
|
|
64
64
|
"@ainyc/canonry-integration-bing": "0.0.0",
|
|
65
|
-
"@ainyc/canonry-
|
|
65
|
+
"@ainyc/canonry-config": "0.0.0",
|
|
66
66
|
"@ainyc/canonry-integration-commoncrawl": "0.0.0",
|
|
67
67
|
"@ainyc/canonry-integration-google": "0.0.0",
|
|
68
68
|
"@ainyc/canonry-integration-wordpress": "0.0.0",
|
|
69
|
-
"@ainyc/canonry-provider-claude": "0.0.0",
|
|
70
|
-
"@ainyc/canonry-provider-cdp": "0.0.0",
|
|
71
69
|
"@ainyc/canonry-provider-gemini": "0.0.0",
|
|
72
70
|
"@ainyc/canonry-provider-local": "0.0.0",
|
|
71
|
+
"@ainyc/canonry-provider-cdp": "0.0.0",
|
|
73
72
|
"@ainyc/canonry-provider-openai": "0.0.0",
|
|
74
|
-
"@ainyc/canonry-provider-perplexity": "0.0.0"
|
|
73
|
+
"@ainyc/canonry-provider-perplexity": "0.0.0",
|
|
74
|
+
"@ainyc/canonry-provider-claude": "0.0.0"
|
|
75
75
|
},
|
|
76
76
|
"scripts": {
|
|
77
77
|
"build": "tsx scripts/copy-agent-assets.ts && tsup && tsx build-web.ts",
|