@ainyc/canonry 2.4.3 → 2.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -166,7 +166,17 @@ Integration setup guides: [Google Search Console](docs/google-search-console-set
166
166
 
167
167
  ## Skills
168
168
 
169
- Canonry ships a bundled `canonry-setup` skill that documents the CLI commands, provider setup, analysis workflows, and troubleshooting patterns an agent needs to operate the platform. **Claude Code** picks up the skill automatically from `.claude/skills/canonry-setup/` when you open this repo.
169
+ Canonry ships a bundled `canonry-setup` skill that turns Aero (or any Claude-powered agent) into an AEO/SEO operator. **Claude Code** picks it up automatically from `.claude/skills/canonry-setup/` when you open this repo; the same content lives under [`skills/canonry-setup/`](skills/canonry-setup/) for portable use with other harnesses.
170
+
171
+ The skill covers the end-to-end answer-engine optimization loop:
172
+
173
+ - **AEO monitoring.** Running citation sweeps across Gemini, ChatGPT, Claude, and Perplexity via `canonry run` / `canonry evidence` / `canonry status`, including how to interpret per-phrase citation state and regressions.
174
+ - **Technical SEO audits.** Driving the companion [`@ainyc/aeo-audit`](https://www.npmjs.com/package/@ainyc/aeo-audit) CLI for 14-factor scoring — structured data (JSON-LD), content depth, AI-readable files (`llms.txt`, `llms-full.txt`), E-E-A-T signals, FAQ blocks, definition blocks, H1/alt/meta hygiene.
175
+ - **Indexing diagnosis.** Google Search Console and Bing Webmaster Tools coverage, URL inspection, and one-shot submissions via `canonry google request-indexing` / `canonry bing request-indexing`.
176
+ - **Schema & content execution.** Patterns for injecting LocalBusiness/FAQPage JSON-LD, writing `llms.txt` with service-area detail, trimming keyphrase lists to high-intent queries, and handling WordPress/Elementor specifics (REST API, Application Passwords, Elementor Custom Code).
177
+ - **Diagnose → prioritize → execute → monitor → report workflow.** Opinionated defaults for new sites (0 citations), regressions on established sites, and county-level targeting — with guardrails like "never fabricate citation data" and "back up `~/.canonry/config.yaml` before editing".
178
+
179
+ See [`skills/canonry-setup/SKILL.md`](skills/canonry-setup/SKILL.md) plus the reference files under [`skills/canonry-setup/references/`](skills/canonry-setup/references/) (`canonry-cli.md`, `aeo-analysis.md`, `indexing.md`, `wordpress-integration.md`) for the full playbook. Aero loads the same material natively, so anything an external agent can do through the skill, Aero can do from the CLI or dashboard command bar.
170
180
 
171
181
  ## Deployment
172
182
 
@@ -1036,7 +1036,22 @@ var MIGRATIONS = [
1036
1036
  // v42: Per-project auto-extract toggle — when a release sync transitions
1037
1037
  // to ready, projects with this flag get a backlink-extract run enqueued.
1038
1038
  // Stored as INTEGER (0/1) to match SQLite boolean convention.
1039
- `ALTER TABLE projects ADD COLUMN auto_extract_backlinks INTEGER NOT NULL DEFAULT 0`
1039
+ `ALTER TABLE projects ADD COLUMN auto_extract_backlinks INTEGER NOT NULL DEFAULT 0`,
1040
+ // v43: Backfill bing_url_inspections.in_index using the new crawl-signal
1041
+ // decision tree. Legacy rows were classified with the retired Bing `InIndex`
1042
+ // flag plus a DocumentSize>0 check, which mis-classifies URLs that modern
1043
+ // Bing returns with DocumentSize=0 but a valid LastCrawledDate. Use a
1044
+ // created_at cutoff so rows written by the new code (which applies a live
1045
+ // GetCrawlIssues demotion that can't be replayed offline) are preserved.
1046
+ `UPDATE bing_url_inspections
1047
+ SET in_index = CASE
1048
+ WHEN document_size IS NOT NULL AND document_size > 0 THEN 1
1049
+ WHEN last_crawled_date IS NOT NULL AND http_code IS NOT NULL AND http_code >= 400 THEN 0
1050
+ WHEN last_crawled_date IS NOT NULL THEN 1
1051
+ WHEN discovery_date IS NOT NULL THEN 0
1052
+ ELSE NULL
1053
+ END
1054
+ WHERE created_at < '2026-04-22T00:00:00Z'`
1040
1055
  ];
1041
1056
  function isDuplicateColumnError(err) {
1042
1057
  if (!(err instanceof Error)) return false;
@@ -30,7 +30,7 @@ import {
30
30
  runs,
31
31
  schedules,
32
32
  usageCounters
33
- } from "./chunk-GZF3YIHY.js";
33
+ } from "./chunk-32YTAZBL.js";
34
34
 
35
35
  // src/config.ts
36
36
  import fs from "fs";
@@ -8148,6 +8148,13 @@ async function getKeywordStats(apiKey, siteUrl) {
8148
8148
  const data = await bingFetch(apiKey, `GetQueryStats?siteUrl=${encodedSite}`);
8149
8149
  return data ?? [];
8150
8150
  }
8151
+ async function getCrawlIssues(apiKey, siteUrl) {
8152
+ validateApiKey(apiKey);
8153
+ validateSiteUrl2(siteUrl);
8154
+ const encodedSite = encodeURIComponent(siteUrl);
8155
+ const data = await bingFetch(apiKey, `GetCrawlIssues?siteUrl=${encodedSite}`);
8156
+ return data ?? [];
8157
+ }
8151
8158
 
8152
8159
  // ../api-routes/src/bing.ts
8153
8160
  function parseBingDate(value) {
@@ -8163,6 +8170,30 @@ function bingLog(level, action, ctx) {
8163
8170
  const stream = level === "error" ? process.stderr : process.stdout;
8164
8171
  stream.write(JSON.stringify(entry) + "\n");
8165
8172
  }
8173
+ var CRAWL_ISSUES_CACHE_TTL_MS = 6e4;
8174
+ var crawlIssuesCache = /* @__PURE__ */ new Map();
8175
+ function isBlockingIssueType(issueType) {
8176
+ if (!issueType) return true;
8177
+ const trimmed = issueType.trim();
8178
+ if (!trimmed) return true;
8179
+ return trimmed.split(/\s+/).some((flag) => !/^(None|Seo(Issues|Concerns))$/i.test(flag));
8180
+ }
8181
+ async function loadBlockingCrawlIssues(apiKey, siteUrl, domain) {
8182
+ const now = Date.now();
8183
+ const cached = crawlIssuesCache.get(domain);
8184
+ if (cached && now - cached.fetchedAt < CRAWL_ISSUES_CACHE_TTL_MS) {
8185
+ return cached.blockedUrls;
8186
+ }
8187
+ const issues = await getCrawlIssues(apiKey, siteUrl);
8188
+ const blockedUrls = /* @__PURE__ */ new Set();
8189
+ for (const issue of issues) {
8190
+ if (issue.Url && isBlockingIssueType(issue.IssueType ?? null)) {
8191
+ blockedUrls.add(issue.Url);
8192
+ }
8193
+ }
8194
+ crawlIssuesCache.set(domain, { blockedUrls, fetchedAt: now });
8195
+ return blockedUrls;
8196
+ }
8166
8197
  async function bingRoutes(app, opts) {
8167
8198
  function requireConnectionStore() {
8168
8199
  if (opts.bingConnectionStore) return opts.bingConnectionStore;
@@ -8411,22 +8442,38 @@ async function bingRoutes(app, opts) {
8411
8442
  domain: project.canonicalDomain,
8412
8443
  url,
8413
8444
  httpStatus: result.HttpStatus ?? result.HttpCode ?? null,
8414
- inIndex: result.InIndex ?? null,
8415
8445
  documentSize: result.DocumentSize ?? null,
8416
- lastCrawledDate: result.LastCrawledDate ?? null
8446
+ lastCrawledDate: result.LastCrawledDate ?? null,
8447
+ discoveryDate: result.DiscoveryDate ?? null
8417
8448
  });
8418
8449
  const now = (/* @__PURE__ */ new Date()).toISOString();
8419
8450
  const id = crypto15.randomUUID();
8420
8451
  const httpCode = result.HttpStatus ?? result.HttpCode ?? null;
8421
- let derivedInIndex = null;
8422
- if (result.InIndex != null) {
8423
- derivedInIndex = result.InIndex;
8424
- } else if (result.DocumentSize != null && result.DocumentSize > 0) {
8425
- derivedInIndex = true;
8426
- }
8427
8452
  const lastCrawledDate = parseBingDate(result.LastCrawledDate);
8428
8453
  const inIndexDate = parseBingDate(result.InIndexDate);
8429
8454
  const discoveryDate = parseBingDate(result.DiscoveryDate);
8455
+ let derivedInIndex = null;
8456
+ if (result.DocumentSize != null && result.DocumentSize > 0) {
8457
+ derivedInIndex = true;
8458
+ } else if (lastCrawledDate != null) {
8459
+ const httpStatus = result.HttpStatus ?? result.HttpCode;
8460
+ derivedInIndex = httpStatus != null && httpStatus >= 400 ? false : true;
8461
+ } else if (discoveryDate != null) {
8462
+ derivedInIndex = false;
8463
+ }
8464
+ if (derivedInIndex === true) {
8465
+ try {
8466
+ const blockedUrls = await loadBlockingCrawlIssues(conn.apiKey, conn.siteUrl, project.canonicalDomain);
8467
+ if (blockedUrls.has(url)) {
8468
+ derivedInIndex = false;
8469
+ }
8470
+ } catch (e) {
8471
+ bingLog("warn", "inspect-url.crawl-issues-lookup-failed", {
8472
+ domain: project.canonicalDomain,
8473
+ error: e instanceof Error ? e.message : String(e)
8474
+ });
8475
+ }
8476
+ }
8430
8477
  app.db.insert(bingUrlInspections).values({
8431
8478
  id,
8432
8479
  projectId: project.id,
package/dist/cli.js CHANGED
@@ -38,7 +38,7 @@ import {
38
38
  showFirstRunNotice,
39
39
  trackEvent,
40
40
  usageError
41
- } from "./chunk-KGOT5OFT.js";
41
+ } from "./chunk-6UY2PETG.js";
42
42
  import {
43
43
  apiKeys,
44
44
  competitors,
@@ -48,7 +48,7 @@ import {
48
48
  projects,
49
49
  querySnapshots,
50
50
  runs
51
- } from "./chunk-GZF3YIHY.js";
51
+ } from "./chunk-32YTAZBL.js";
52
52
 
53
53
  // src/cli.ts
54
54
  import { pathToFileURL } from "url";
@@ -295,7 +295,7 @@ async function backfillAnswerVisibilityCommand(opts) {
295
295
  console.log(` Errors: ${providerErrors}`);
296
296
  }
297
297
  async function backfillInsightsCommand(project, opts) {
298
- const { IntelligenceService } = await import("./intelligence-service-KM64AW7J.js");
298
+ const { IntelligenceService } = await import("./intelligence-service-U7YQ4NXV.js");
299
299
  const config = loadConfig();
300
300
  const db = createClient(config.database);
301
301
  migrate(db);
@@ -2181,6 +2181,7 @@ var COMPETITOR_CLI_COMMANDS = [
2181
2181
  ];
2182
2182
 
2183
2183
  // src/commands/google.ts
2184
+ var INDEXING_API_SCOPE_NOTICE = "Note: Google's Indexing API officially supports only pages with JobPosting or BroadcastEvent (livestream VideoObject) structured data. For other URL types, submissions are accepted (HTTP 200) but not guaranteed to be prioritized for crawling. For general pages, submit a sitemap and use URL Inspection to monitor status.";
2184
2185
  function getClient6() {
2185
2186
  return createApiClient();
2186
2187
  }
@@ -2630,27 +2631,32 @@ async function googleRequestIndexing(project, opts) {
2630
2631
  details: { command: "google.request-indexing" }
2631
2632
  });
2632
2633
  }
2634
+ if (opts.format !== "json") {
2635
+ console.error(INDEXING_API_SCOPE_NOTICE);
2636
+ console.error();
2637
+ }
2633
2638
  const result = await client.googleRequestIndexing(project, body);
2634
2639
  let indexingConfirmed = false;
2640
+ const lastInspection = /* @__PURE__ */ new Map();
2635
2641
  if (opts.wait && result.results.some((r) => r.status === "success")) {
2636
2642
  const successUrls = result.results.filter((r) => r.status === "success").map((r) => r.url);
2637
- const timeout = 10 * 60 * 1e3;
2643
+ const timeout = opts.waitTimeoutMs ?? 10 * 60 * 1e3;
2644
+ const pollInterval = opts.waitPollIntervalMs ?? 1e4;
2638
2645
  const start = Date.now();
2639
- process.stderr.write("Waiting for indexing confirmation");
2646
+ process.stderr.write("Polling URL Inspection for indexed verdict");
2640
2647
  while (Date.now() - start < timeout) {
2641
- await new Promise((r) => setTimeout(r, 1e4));
2648
+ await new Promise((r) => setTimeout(r, pollInterval));
2642
2649
  process.stderr.write(".");
2643
2650
  let allIndexed = true;
2644
2651
  for (const url of successUrls) {
2645
2652
  try {
2646
2653
  const inspection = await client.gscInspect(project, url);
2647
- if (inspection.indexingState !== "INDEXING_ALLOWED") {
2654
+ lastInspection.set(url, inspection);
2655
+ if (inspection.verdict !== "PASS") {
2648
2656
  allIndexed = false;
2649
- break;
2650
2657
  }
2651
2658
  } catch {
2652
2659
  allIndexed = false;
2653
- break;
2654
2660
  }
2655
2661
  }
2656
2662
  if (allIndexed) {
@@ -2661,13 +2667,23 @@ async function googleRequestIndexing(project, opts) {
2661
2667
  }
2662
2668
  if (!indexingConfirmed) {
2663
2669
  process.stderr.write("\n");
2670
+ const observed = successUrls.map((url) => {
2671
+ const i = lastInspection.get(url);
2672
+ return {
2673
+ url,
2674
+ verdict: i?.verdict ?? null,
2675
+ coverageState: i?.coverageState ?? null,
2676
+ indexingState: i?.indexingState ?? null
2677
+ };
2678
+ });
2664
2679
  throw new CliError({
2665
2680
  code: "GOOGLE_INDEXING_CONFIRMATION_TIMEOUT",
2666
- message: "Timed out waiting for indexing confirmation. URLs may still be processing.",
2667
- displayMessage: "Timed out waiting for indexing confirmation. URLs may still be processing.",
2681
+ message: "Timed out waiting for Google to report verdict=PASS. Google typically takes hours to days to index new URLs, so this is expected and does not mean the submission failed. Re-check later with `canonry google gsc inspect <url>`.",
2682
+ displayMessage: "Timed out waiting for Google to report verdict=PASS. Google typically takes hours to days to index new URLs \u2014 this is not a failure. Re-check later with `canonry google gsc inspect <url>`.",
2668
2683
  details: {
2669
2684
  project,
2670
- urls: successUrls
2685
+ urls: successUrls,
2686
+ lastObserved: observed
2671
2687
  }
2672
2688
  });
2673
2689
  }
@@ -2692,7 +2708,7 @@ async function googleRequestIndexing(project, opts) {
2692
2708
  console.log(`Summary: ${result.summary.succeeded} succeeded, ${result.summary.failed} failed (${result.summary.total} total)`);
2693
2709
  }
2694
2710
  if (indexingConfirmed) {
2695
- console.log("All requested URLs are now indexed.");
2711
+ console.log("URL Inspection now reports verdict=PASS for the requested URLs (indexed in Google Search).");
2696
2712
  }
2697
2713
  }
2698
2714
  async function googleRefresh(project, format) {
package/dist/index.js CHANGED
@@ -1,8 +1,8 @@
1
1
  import {
2
2
  createServer,
3
3
  loadConfig
4
- } from "./chunk-KGOT5OFT.js";
5
- import "./chunk-GZF3YIHY.js";
4
+ } from "./chunk-6UY2PETG.js";
5
+ import "./chunk-32YTAZBL.js";
6
6
  export {
7
7
  createServer,
8
8
  loadConfig
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  IntelligenceService
3
- } from "./chunk-GZF3YIHY.js";
3
+ } from "./chunk-32YTAZBL.js";
4
4
  export {
5
5
  IntelligenceService
6
6
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ainyc/canonry",
3
- "version": "2.4.3",
3
+ "version": "2.4.6",
4
4
  "type": "module",
5
5
  "description": "The ultimate open-source AEO monitoring tool - track how answer engines cite your domain",
6
6
  "license": "FSL-1.1-ALv2",
@@ -60,16 +60,16 @@
60
60
  "@ainyc/canonry-api-routes": "0.0.0",
61
61
  "@ainyc/canonry-config": "0.0.0",
62
62
  "@ainyc/canonry-contracts": "0.0.0",
63
- "@ainyc/canonry-db": "0.0.0",
64
- "@ainyc/canonry-integration-bing": "0.0.0",
65
63
  "@ainyc/canonry-intelligence": "0.0.0",
66
- "@ainyc/canonry-integration-commoncrawl": "0.0.0",
64
+ "@ainyc/canonry-db": "0.0.0",
67
65
  "@ainyc/canonry-integration-google": "0.0.0",
66
+ "@ainyc/canonry-integration-bing": "0.0.0",
67
+ "@ainyc/canonry-provider-cdp": "0.0.0",
68
68
  "@ainyc/canonry-integration-wordpress": "0.0.0",
69
69
  "@ainyc/canonry-provider-claude": "0.0.0",
70
- "@ainyc/canonry-provider-cdp": "0.0.0",
71
- "@ainyc/canonry-provider-gemini": "0.0.0",
70
+ "@ainyc/canonry-integration-commoncrawl": "0.0.0",
72
71
  "@ainyc/canonry-provider-local": "0.0.0",
72
+ "@ainyc/canonry-provider-gemini": "0.0.0",
73
73
  "@ainyc/canonry-provider-openai": "0.0.0",
74
74
  "@ainyc/canonry-provider-perplexity": "0.0.0"
75
75
  },