@xyleapp/cli 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -34,6 +34,18 @@ xyle rewrite --url https://example.com/blog/seo-guide --type title
34
34
  # Crawl a page
35
35
  xyle crawl --url https://example.com/blog/seo-guide
36
36
 
37
+ # Full-site crawl (BFS every internal page, detect site-wide issues)
38
+ xyle site-crawl https://example.com --max-pages 500
39
+
40
+ # View snapshot history
41
+ xyle history --url https://example.com
42
+
43
+ # View score trends
44
+ xyle trends --site example.com --days 30
45
+
46
+ # Compare two snapshots
47
+ xyle diff --before <snapshot-id> --after <snapshot-id>
48
+
37
49
  # Sync Search Console data
38
50
  xyle sync --site https://example.com
39
51
  ```
@@ -50,6 +62,10 @@ xyle sync --site https://example.com
50
62
  | `analyze` | Analyze page content against competitors |
51
63
  | `rewrite` | Get AI rewrite suggestions |
52
64
  | `crawl` | Crawl a URL and extract SEO metadata |
65
+ | `site-crawl` | Full-site BFS crawl with issue detection, link graph, and site health score |
66
+ | `history` | View snapshot history for a URL or site |
67
+ | `trends` | View score trends over time |
68
+ | `diff` | Compare two snapshots side-by-side |
53
69
  | `sync` | Sync Google Search Console data |
54
70
  | `login` | Authenticate with Google OAuth |
55
71
  | `logout` | Remove stored credentials |
package/bin/xyle.mjs CHANGED
@@ -8,7 +8,7 @@ const program = new Command();
8
8
  program
9
9
  .name("xyle")
10
10
  .description("SEO & AEO Intelligence Engine CLI")
11
- .version("0.8.0");
11
+ .version("0.10.0");
12
12
 
13
13
  registerCommands(program);
14
14
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@xyleapp/cli",
3
- "version": "0.8.0",
3
+ "version": "0.10.0",
4
4
  "description": "CLI for the Xyle SEO & AEO Intelligence Engine",
5
5
  "type": "module",
6
6
  "bin": {
package/src/api.mjs CHANGED
@@ -49,7 +49,12 @@ async function request(method, path, { params, body, timeout = 30000, auth = tru
49
49
  let detail;
50
50
  try {
51
51
  const json = await resp.json();
52
- detail = json.detail || resp.statusText;
52
+ const raw = json.detail;
53
+ if (Array.isArray(raw)) {
54
+ detail = raw.map((e) => e.msg || JSON.stringify(e)).join("; ");
55
+ } else {
56
+ detail = raw || resp.statusText;
57
+ }
53
58
  } catch {
54
59
  detail = resp.statusText;
55
60
  }
@@ -65,7 +70,9 @@ export function checkHealth() {
65
70
  }
66
71
 
67
72
  export function getTopQueries(site, limit = 20) {
68
- return request("GET", "/queries", { params: { site, limit } });
73
+ const creds = getCredentials();
74
+ const email = creds?.email || null;
75
+ return request("GET", "/queries", { params: { site, limit, email } });
69
76
  }
70
77
 
71
78
  export function getCompetitors(query) {
@@ -93,7 +100,9 @@ export function crawlPage(url) {
93
100
  }
94
101
 
95
102
  export function syncGsc(site) {
96
- return request("POST", "/admin/sync", { params: { site } });
103
+ const creds = getCredentials();
104
+ const email = creds?.email || null;
105
+ return request("POST", "/admin/sync", { params: { site, email } });
97
106
  }
98
107
 
99
108
  export function listSites() {
@@ -117,4 +126,64 @@ export function getInstructions(tool) {
117
126
  return request("GET", "/seed/instructions", { params: { tool }, timeout: 10000 });
118
127
  }
119
128
 
129
+ export function listSnapshots(url, siteDomain, limit = 20) {
130
+ return request("GET", "/snapshots", { params: { url, site_domain: siteDomain, limit } });
131
+ }
132
+
133
+ export function getSnapshotTrends(url, siteDomain, days = 90) {
134
+ return request("GET", "/snapshots/trends", { params: { url, site_domain: siteDomain, days } });
135
+ }
136
+
137
+ export function diffSnapshots(beforeId, afterId) {
138
+ return request("GET", "/snapshots/diff", { params: { before: beforeId, after: afterId } });
139
+ }
140
+
141
+ export function askKnowledgeBase(question, url, topic, nSources = 5) {
142
+ const body = { question, n_sources: nSources };
143
+ if (url) body.context_url = url;
144
+ if (topic) body.topic = topic;
145
+ return request("POST", "/kb/ask", { body, timeout: 60000 });
146
+ }
147
+
148
+ export function getKbStats() {
149
+ return request("GET", "/kb/stats", { timeout: 10000 });
150
+ }
151
+
152
+ // ---------------------------------------------------------------------------
153
+ // Site Crawl
154
+ // ---------------------------------------------------------------------------
155
+
156
+ export function startSiteCrawl(seedUrl, config = {}) {
157
+ // immediate=1 tells the API to run the crawl in-process via BackgroundTask.
158
+ // The CLI keeps polling /status, which keeps the Cloud Run instance warm
159
+ // for the duration. The web UI uses Trigger.dev instead and omits this.
160
+ return request("POST", "/site-crawl", {
161
+ params: { immediate: 1 },
162
+ body: { seed_url: seedUrl, config },
163
+ timeout: 30000,
164
+ });
165
+ }
166
+
167
+ export function getSiteCrawlStatus(jobId) {
168
+ return request("GET", `/site-crawl/${jobId}`, { timeout: 15000 });
169
+ }
170
+
171
+ export function getSiteCrawlPages(jobId, { limit = 50, offset = 0, filter } = {}) {
172
+ return request("GET", `/site-crawl/${jobId}/pages`, {
173
+ params: { limit, offset, filter },
174
+ timeout: 15000,
175
+ });
176
+ }
177
+
178
+ export function getSiteCrawlIssues(jobId, { severity, category, limit = 200 } = {}) {
179
+ return request("GET", `/site-crawl/${jobId}/issues`, {
180
+ params: { severity, category, limit },
181
+ timeout: 15000,
182
+ });
183
+ }
184
+
185
+ export function listSiteCrawls(limit = 25) {
186
+ return request("GET", "/site-crawl", { params: { limit }, timeout: 15000 });
187
+ }
188
+
120
189
  export { SEO_BASE };
package/src/commands.mjs CHANGED
@@ -3,10 +3,6 @@
3
3
  * Mirrors the Python CLI 1:1.
4
4
  */
5
5
 
6
- import { createRequire } from "node:module";
7
- import { existsSync } from "node:fs";
8
- import { resolve } from "node:path";
9
- import { execSync } from "node:child_process";
10
6
  import { printJson, printTable } from "./formatting.mjs";
11
7
  import {
12
8
  checkHealth,
@@ -18,6 +14,16 @@ import {
18
14
  crawlPage,
19
15
  syncGsc,
20
16
  listSites,
17
+ listSnapshots,
18
+ getSnapshotTrends,
19
+ diffSnapshots,
20
+ askKnowledgeBase,
21
+ getKbStats,
22
+ startSiteCrawl,
23
+ getSiteCrawlStatus,
24
+ getSiteCrawlPages,
25
+ getSiteCrawlIssues,
26
+ listSiteCrawls,
21
27
  SEO_BASE,
22
28
  } from "./api.mjs";
23
29
  import { getCredentials, clearCredentials, runLoginFlow } from "./auth.mjs";
@@ -171,6 +177,12 @@ export function registerCommands(program) {
171
177
  console.log(`${aeoColor}AEO Score: ${Math.round(data.aeo_score * 100)}%\x1b[0m`);
172
178
  }
173
179
 
180
+ // GEO Score
181
+ if (data.geo_score != null) {
182
+ const geoColor = data.geo_score >= 0.7 ? "\x1b[32m" : "\x1b[33m";
183
+ console.log(`${geoColor}GEO Score: ${Math.round(data.geo_score * 100)}%\x1b[0m`);
184
+ }
185
+
174
186
  // Structured Recommendations
175
187
  const structured = data.recommendations || [];
176
188
  if (structured.length) {
@@ -352,6 +364,23 @@ export function registerCommands(program) {
352
364
  }
353
365
  }
354
366
 
367
+ // GEO Signals
368
+ const geo = data.geo_signals;
369
+ if (geo) {
370
+ console.log(`\n\x1b[1mGEO Signals\x1b[0m`);
371
+ console.log(
372
+ ` ${check(geo.has_summary_section)} Summary Section ${check(geo.has_last_reviewed_date)} Reviewed Date ${check(geo.has_methodology_section)} Methodology`
373
+ );
374
+ console.log(
375
+ ` Quotable: ${geo.quotable_statement_count} Statistics: ${geo.statistic_count} Entities: ${geo.named_entity_count} Definitions: ${geo.definition_clarity_count}`
376
+ );
377
+ console.log(
378
+ ` Comparisons: ${geo.comparison_structure_count} Steps: ${geo.step_by_step_count} Sources: ${geo.source_attribution_count} Expertise: ${geo.author_expertise_signals}`
379
+ );
380
+ console.log(scoreBar("Topical Coverage", geo.topical_coverage_score));
381
+ console.log(scoreBar("Content Segmentation", geo.content_segmentation_score));
382
+ }
383
+
355
384
  const wc = data.word_count || 0;
356
385
  if (wc > 0 && wc < 50) {
357
386
  console.log(
@@ -382,6 +411,9 @@ export function registerCommands(program) {
382
411
  console.log(
383
412
  `\x1b[32mSynced ${data.synced_queries || 0} queries for ${data.site || opts.site}\x1b[0m`
384
413
  );
414
+ if (data.warning) {
415
+ console.log(`\x1b[33mWarning: ${data.warning}\x1b[0m`);
416
+ }
385
417
  }
386
418
  } catch (e) {
387
419
  handleError(e);
@@ -594,37 +626,463 @@ export function registerCommands(program) {
594
626
  }
595
627
  });
596
628
 
597
- // --- deploy ---
629
+ // --- history ---
598
630
  program
599
- .command("deploy")
600
- .description("Deploy Xyle services (API, frontend, trigger.dev)")
601
- .option("--api", "Deploy API to Cloud Run")
602
- .option("--frontend", "Deploy frontend to Vercel")
603
- .option("--trigger", "Deploy Trigger.dev tasks")
604
- .option("--dir <path>", "Project root directory", process.cwd())
631
+ .command("history")
632
+ .description("View snapshot history for a URL or site")
633
+ .option("--url <url>", "Page URL to view history for")
634
+ .option("--site <domain>", "Site domain to view history for")
635
+ .option("--limit <n>", "Max snapshots to return", "10")
636
+ .option("--json", "Output as JSON")
605
637
  .action(async (opts) => {
606
- const scriptPath = resolve(opts.dir, "scripts", "deploy.sh");
607
- if (!existsSync(scriptPath)) {
608
- process.stderr.write(
609
- `\x1b[31mDeploy script not found: ${scriptPath}\x1b[0m\n` +
610
- `\x1b[2mRun this command from the project root or use --dir <path>\x1b[0m\n`
611
- );
638
+ if (!opts.url && !opts.site) {
639
+ process.stderr.write("\x1b[31mProvide --url or --site\x1b[0m\n");
640
+ process.exit(1);
641
+ }
642
+ try {
643
+ const data = await listSnapshots(opts.url, opts.site, parseInt(opts.limit, 10));
644
+ if (opts.json) {
645
+ console.log(printJson(data));
646
+ } else if (!data || data.length === 0) {
647
+ console.log("\x1b[33mNo snapshots found.\x1b[0m");
648
+ } else {
649
+ const rows = data.map((s) => {
650
+ const seo = s.seo_score != null ? Math.round(s.seo_score * 100) : null;
651
+ const aeo = s.aeo_score != null ? Math.round(s.aeo_score * 100) : null;
652
+ const seoColor = seo != null ? (seo >= 70 ? "\x1b[32m" : seo >= 40 ? "\x1b[33m" : "\x1b[31m") : "\x1b[2m";
653
+ const aeoColor = aeo != null ? (aeo >= 70 ? "\x1b[32m" : aeo >= 40 ? "\x1b[33m" : "\x1b[31m") : "\x1b[2m";
654
+ return {
655
+ created_at: new Date(s.created_at).toLocaleString(),
656
+ trigger: s.trigger_source || "-",
657
+ seo_score: `${seoColor}${seo != null ? seo + "%" : "-"}\x1b[0m`,
658
+ aeo_score: `${aeoColor}${aeo != null ? aeo + "%" : "-"}\x1b[0m`,
659
+ signals: `${s.passing_signals}/${s.total_signals}`,
660
+ };
661
+ });
662
+ console.log(`\n\x1b[1mSnapshot History\x1b[0m (${data.length} snapshots)\n`);
663
+ console.log(printTable(rows, ["created_at", "trigger", "seo_score", "aeo_score", "signals"]));
664
+ }
665
+ } catch (e) {
666
+ handleError(e);
667
+ }
668
+ });
669
+
670
+ // --- trends ---
671
+ program
672
+ .command("trends")
673
+ .description("View score trends over time")
674
+ .option("--url <url>", "Page URL to view trends for")
675
+ .option("--site <domain>", "Site domain to view trends for")
676
+ .option("--days <n>", "Number of days to look back", "90")
677
+ .option("--json", "Output as JSON")
678
+ .action(async (opts) => {
679
+ if (!opts.url && !opts.site) {
680
+ process.stderr.write("\x1b[31mProvide --url or --site\x1b[0m\n");
612
681
  process.exit(1);
613
682
  }
683
+ try {
684
+ const data = await getSnapshotTrends(opts.url, opts.site, parseInt(opts.days, 10));
685
+ if (opts.json) {
686
+ console.log(printJson(data));
687
+ } else {
688
+ const points = data.points || [];
689
+ if (points.length === 0) {
690
+ console.log("\x1b[33mNo trend data found.\x1b[0m");
691
+ } else {
692
+ const target = data.url || data.site || opts.url || opts.site;
693
+ console.log(`\n\x1b[1mScore Trends\x1b[0m ${target} (${data.period_days} days)\n`);
694
+
695
+ // ASCII sparkline
696
+ const seoVals = points.map((p) => (p.seo_score != null ? Math.round(p.seo_score * 100) : null));
697
+ const aeoVals = points.map((p) => (p.aeo_score != null ? Math.round(p.aeo_score * 100) : null));
698
+ const geoVals = points.map((p) => (p.geo_score != null ? Math.round(p.geo_score * 100) : null));
699
+ const spark = (vals) => {
700
+ const ticks = "\u2581\u2582\u2583\u2584\u2585\u2586\u2587\u2588";
701
+ const valid = vals.filter((v) => v != null);
702
+ if (valid.length === 0) return "-";
703
+ const min = Math.min(...valid);
704
+ const max = Math.max(...valid);
705
+ const range = max - min || 1;
706
+ return vals.map((v) => (v != null ? ticks[Math.min(7, Math.floor(((v - min) / range) * 7))] : " ")).join("");
707
+ };
614
708
 
615
- const flags = [];
616
- if (opts.api) flags.push("--api");
617
- if (opts.frontend) flags.push("--frontend");
618
- if (opts.trigger) flags.push("--trigger");
619
- // No flags = deploy all (script's default behavior)
709
+ console.log(` SEO ${spark(seoVals)} ${seoVals.filter((v) => v != null).slice(-1)[0] ?? "-"}%`);
710
+ console.log(` AEO ${spark(aeoVals)} ${aeoVals.filter((v) => v != null).slice(-1)[0] ?? "-"}%`);
711
+ console.log(` GEO ${spark(geoVals)} ${geoVals.filter((v) => v != null).slice(-1)[0] ?? "-"}%`);
712
+ console.log();
713
+ console.log(printTable(
714
+ points.map((p) => ({
715
+ date: p.date,
716
+ seo: p.seo_score != null ? Math.round(p.seo_score * 100) + "%" : "-",
717
+ aeo: p.aeo_score != null ? Math.round(p.aeo_score * 100) + "%" : "-",
718
+ geo: p.geo_score != null ? Math.round(p.geo_score * 100) + "%" : "-",
719
+ signals: p.passing_signals,
720
+ })),
721
+ ["date", "seo", "aeo", "geo", "signals"]
722
+ ));
723
+ }
724
+ }
725
+ } catch (e) {
726
+ handleError(e);
727
+ }
728
+ });
620
729
 
621
- const cmd = `bash "${scriptPath}" ${flags.join(" ")}`;
622
- console.log(`\x1b[36mRunning:\x1b[0m ${cmd}\n`);
730
+ // --- diff ---
731
+ program
732
+ .command("diff")
733
+ .description("Compare two snapshots")
734
+ .requiredOption("--before <id>", "Before snapshot ID")
735
+ .requiredOption("--after <id>", "After snapshot ID")
736
+ .option("--json", "Output as JSON")
737
+ .action(async (opts) => {
623
738
  try {
624
- execSync(cmd, { stdio: "inherit", cwd: opts.dir });
739
+ const data = await diffSnapshots(opts.before, opts.after);
740
+ if (opts.json) {
741
+ console.log(printJson(data));
742
+ } else {
743
+ console.log(`\n\x1b[1mSnapshot Diff\x1b[0m\n`);
744
+
745
+ const delta = (label, val) => {
746
+ if (val == null) return ` ${label.padEnd(20)} -`;
747
+ const pct = Math.round(val * 100);
748
+ const sign = pct > 0 ? "+" : "";
749
+ const color = pct > 0 ? "\x1b[32m" : pct < 0 ? "\x1b[31m" : "\x1b[2m";
750
+ return ` ${label.padEnd(20)} ${color}${sign}${pct}%\x1b[0m`;
751
+ };
752
+
753
+ console.log(delta("SEO Score", data.seo_delta));
754
+ console.log(delta("AEO Score", data.aeo_delta));
755
+ console.log(delta("GEO Score", data.geo_delta));
756
+
757
+ if (data.breakdown_delta) {
758
+ for (const [key, val] of Object.entries(data.breakdown_delta)) {
759
+ console.log(delta(` ${key}`, val));
760
+ }
761
+ }
762
+
763
+ const sigDelta = data.signals_delta || 0;
764
+ const sigColor = sigDelta > 0 ? "\x1b[32m" : sigDelta < 0 ? "\x1b[31m" : "\x1b[2m";
765
+ const sigSign = sigDelta > 0 ? "+" : "";
766
+ console.log(` ${"Signals".padEnd(20)} ${sigColor}${sigSign}${sigDelta}\x1b[0m`);
767
+
768
+ const resolved = data.resolved_issues || [];
769
+ if (resolved.length) {
770
+ console.log(`\n \x1b[32mResolved:\x1b[0m`);
771
+ for (const issue of resolved) {
772
+ console.log(` \x1b[32m\u2713\x1b[0m ${issue}`);
773
+ }
774
+ }
775
+
776
+ const newIssues = data.new_issues || [];
777
+ if (newIssues.length) {
778
+ console.log(`\n \x1b[31mNew Issues:\x1b[0m`);
779
+ for (const issue of newIssues) {
780
+ console.log(` \x1b[31m\u2717\x1b[0m ${issue}`);
781
+ }
782
+ }
783
+ }
625
784
  } catch (e) {
626
- process.stderr.write(`\x1b[31mDeploy failed.\x1b[0m\n`);
627
- process.exit(e.status || 1);
785
+ handleError(e);
628
786
  }
629
787
  });
788
+
789
+ // --- ask ---
790
+ program
791
+ .command("ask")
792
+ .description("Query the knowledge base for expert SEO/AEO/GEO guidance")
793
+ .requiredOption("--question <text>", "Question to ask the knowledge base")
794
+ .option("--url <url>", "Context URL for the question")
795
+ .option("--topic <topic>", "Topic filter (e.g., schema, technical-seo)")
796
+ .option("--sources <n>", "Number of sources to retrieve", "5")
797
+ .option("--json", "Output as JSON")
798
+ .action(async (opts) => {
799
+ try {
800
+ const data = await askKnowledgeBase(
801
+ opts.question,
802
+ opts.url,
803
+ opts.topic,
804
+ parseInt(opts.sources, 10)
805
+ );
806
+ if (opts.json) {
807
+ console.log(printJson(data));
808
+ } else {
809
+ console.log(`\n\x1b[1mAnswer\x1b[0m${data.grounded ? " \x1b[32m(grounded)\x1b[0m" : ""}\n`);
810
+ console.log(data.answer);
811
+ console.log();
812
+ }
813
+ } catch (e) {
814
+ handleError(e);
815
+ }
816
+ });
817
+
818
+ // --- geo ---
819
+ program
820
+ .command("geo")
821
+ .description("Show GEO (Generative Engine Optimization) signals and score for a URL")
822
+ .requiredOption("--url <url>", "URL to analyze for GEO signals")
823
+ .option("--json", "Output as JSON")
824
+ .action(async (opts) => {
825
+ try {
826
+ const data = await crawlPage(opts.url);
827
+ if (opts.json) {
828
+ console.log(printJson({
829
+ url: data.url,
830
+ geo_signals: data.geo_signals,
831
+ }));
832
+ } else {
833
+ const geo = data.geo_signals;
834
+ if (!geo) {
835
+ console.log("\x1b[33mGEO signals not available for this page.\x1b[0m");
836
+ return;
837
+ }
838
+ const check = (v) => (v ? "\x1b[32m\u2713\x1b[0m" : "\x1b[31m\u2717\x1b[0m");
839
+ const scoreBar = (label, score) => {
840
+ const pct = Math.round((score || 0) * 100);
841
+ const filled = Math.round(pct / 5);
842
+ const bar = "\u2588".repeat(filled) + "\u2591".repeat(20 - filled);
843
+ const color = pct >= 70 ? "\x1b[32m" : pct >= 40 ? "\x1b[33m" : "\x1b[31m";
844
+ return ` ${label.padEnd(28)} ${color}${bar} ${pct}%\x1b[0m`;
845
+ };
846
+
847
+ console.log(`\n\x1b[1mGEO Signals\x1b[0m ${data.url}\n`);
848
+
849
+ console.log(`\x1b[1mCitability (40%)\x1b[0m`);
850
+ console.log(` Quotable statements: ${geo.quotable_statement_count}`);
851
+ console.log(` Statistics: ${geo.statistic_count}`);
852
+ console.log(scoreBar("Unique insight density", geo.unique_insight_density));
853
+ console.log(` Source attributions: ${geo.source_attribution_count}`);
854
+
855
+ console.log(`\n\x1b[1mEntity/Topical (25%)\x1b[0m`);
856
+ console.log(` Named entities: ${geo.named_entity_count}`);
857
+ console.log(scoreBar("Topical coverage", geo.topical_coverage_score));
858
+ console.log(` Clear definitions: ${geo.definition_clarity_count}`);
859
+
860
+ console.log(`\n\x1b[1mStructural (25%)\x1b[0m`);
861
+ console.log(` ${check(geo.has_summary_section)} Summary/TL;DR section`);
862
+ console.log(` Comparison structures: ${geo.comparison_structure_count}`);
863
+ console.log(` Step-by-step content: ${geo.step_by_step_count}`);
864
+ console.log(scoreBar("Content segmentation", geo.content_segmentation_score));
865
+
866
+ console.log(`\n\x1b[1mAuthority (10%)\x1b[0m`);
867
+ console.log(` ${check(geo.has_last_reviewed_date)} Last reviewed date`);
868
+ console.log(` Expertise signals: ${geo.author_expertise_signals}`);
869
+ console.log(` ${check(geo.has_methodology_section)} Methodology section`);
870
+ console.log(scoreBar("Authority link ratio", geo.outbound_authority_ratio));
871
+ }
872
+ } catch (e) {
873
+ handleError(e);
874
+ }
875
+ });
876
+
877
+ // --- site-crawl ---
878
+ const siteCrawlCmd = program
879
+ .command("site-crawl")
880
+ .description("Full-site SEO crawler — Screaming Frog-style audit");
881
+
882
+ // site-crawl <url> — start a crawl and poll until complete
883
+ siteCrawlCmd
884
+ .command("start")
885
+ .description("Start a full-site crawl")
886
+ .argument("<url>", "Seed URL to crawl")
887
+ .option("--max-pages <n>", "Maximum pages to crawl", "500")
888
+ .option("--max-depth <n>", "Maximum crawl depth", "5")
889
+ .option("--no-robots", "Ignore robots.txt")
890
+ .option("--no-js", "Skip JavaScript rendering")
891
+ .option("--json", "Output as JSON (no live polling)")
892
+ .action(async (url, opts) => {
893
+ try {
894
+ const config = {
895
+ max_pages: parseInt(opts.maxPages, 10),
896
+ max_depth: parseInt(opts.maxDepth, 10),
897
+ respect_robots: opts.robots !== false,
898
+ render_js: opts.js !== false,
899
+ };
900
+ const data = await startSiteCrawl(url, config);
901
+ const jobId = data.job_id;
902
+
903
+ if (opts.json) {
904
+ console.log(printJson(data));
905
+ return;
906
+ }
907
+
908
+ console.log(`\x1b[36mCrawl started:\x1b[0m ${jobId}`);
909
+
910
+ // Poll every 2 seconds
911
+ const POLL_MS = 2000;
912
+ let prev = 0;
913
+ while (true) {
914
+ await new Promise((r) => setTimeout(r, POLL_MS));
915
+ const status = await getSiteCrawlStatus(jobId);
916
+ const crawled = status.pages_crawled || 0;
917
+ const discovered = status.pages_discovered || 0;
918
+ const errors = status.errors_count || 0;
919
+
920
+ if (crawled !== prev) {
921
+ const pct = discovered > 0 ? Math.round((crawled / discovered) * 100) : 0;
922
+ const bar = "\u2588".repeat(Math.round(pct / 5)) + "\u2591".repeat(20 - Math.round(pct / 5));
923
+ process.stdout.write(`\r ${bar} ${pct}% ${crawled}/${discovered} pages ${errors} errors`);
924
+ prev = crawled;
925
+ }
926
+
927
+ if (["completed", "failed", "cancelled"].includes(status.status)) {
928
+ process.stdout.write("\n");
929
+ if (status.status === "completed") {
930
+ const h = status.health_score != null ? Math.round(status.health_score * 100) : null;
931
+ const hColor = h != null ? (h >= 70 ? "\x1b[32m" : h >= 40 ? "\x1b[33m" : "\x1b[31m") : "\x1b[2m";
932
+ console.log(`\x1b[32mCompleted.\x1b[0m Pages: ${crawled} Errors: ${errors}`);
933
+ if (h != null) {
934
+ console.log(`Site Health: ${hColor}${h}%\x1b[0m`);
935
+ }
936
+ console.log(`\nView pages: xyle site-crawl pages ${jobId}`);
937
+ console.log(`View issues: xyle site-crawl issues ${jobId}`);
938
+ } else {
939
+ console.log(`\x1b[31mCrawl ${status.status}\x1b[0m${status.error_message ? ": " + status.error_message : ""}`);
940
+ }
941
+ break;
942
+ }
943
+ }
944
+ } catch (e) {
945
+ handleError(e);
946
+ }
947
+ });
948
+
949
+ // site-crawl status <job_id>
950
+ siteCrawlCmd
951
+ .command("status")
952
+ .description("Check crawl job status")
953
+ .argument("<jobId>", "Crawl job ID")
954
+ .option("--json", "Output as JSON")
955
+ .action(async (jobId, opts) => {
956
+ try {
957
+ const data = await getSiteCrawlStatus(jobId);
958
+ if (opts.json) {
959
+ console.log(printJson(data));
960
+ } else {
961
+ const color = data.status === "completed" ? "\x1b[32m" : data.status === "failed" ? "\x1b[31m" : "\x1b[33m";
962
+ console.log(`Status: ${color}${data.status}\x1b[0m`);
963
+ console.log(`Pages: ${data.pages_crawled}/${data.pages_discovered} Errors: ${data.errors_count}`);
964
+ if (data.health_score != null) {
965
+ console.log(`Health: ${Math.round(data.health_score * 100)}%`);
966
+ }
967
+ }
968
+ } catch (e) {
969
+ handleError(e);
970
+ }
971
+ });
972
+
973
+ // site-crawl pages <job_id>
974
+ siteCrawlCmd
975
+ .command("pages")
976
+ .description("List crawled pages")
977
+ .argument("<jobId>", "Crawl job ID")
978
+ .option("--limit <n>", "Results per page", "50")
979
+ .option("--filter <type>", "Filter: broken, redirect, thin")
980
+ .option("--json", "Output as JSON")
981
+ .action(async (jobId, opts) => {
982
+ try {
983
+ const data = await getSiteCrawlPages(jobId, {
984
+ limit: parseInt(opts.limit, 10),
985
+ filter: opts.filter,
986
+ });
987
+ if (opts.json) {
988
+ console.log(printJson(data));
989
+ } else {
990
+ console.log(`\n\x1b[1mCrawled Pages\x1b[0m (${data.total} total)\n`);
991
+ const rows = (data.pages || []).map((p) => ({
992
+ status: p.http_status || "-",
993
+ depth: p.depth,
994
+ seo: p.seo_score != null ? Math.round(p.seo_score * 100) + "%" : "-",
995
+ aeo: p.aeo_score != null ? Math.round(p.aeo_score * 100) + "%" : "-",
996
+ geo: p.geo_score != null ? Math.round(p.geo_score * 100) + "%" : "-",
997
+ words: p.word_count,
998
+ issues: p.issues_count,
999
+ url: p.url.length > 60 ? p.url.slice(0, 57) + "..." : p.url,
1000
+ }));
1001
+ console.log(printTable(rows, ["status", "depth", "seo", "aeo", "geo", "words", "issues", "url"]));
1002
+ }
1003
+ } catch (e) {
1004
+ handleError(e);
1005
+ }
1006
+ });
1007
+
1008
+ // site-crawl issues <job_id>
1009
+ siteCrawlCmd
1010
+ .command("issues")
1011
+ .description("List crawl issues")
1012
+ .argument("<jobId>", "Crawl job ID")
1013
+ .option("--severity <level>", "Filter: critical, warning, info")
1014
+ .option("--category <cat>", "Filter by category")
1015
+ .option("--json", "Output as JSON")
1016
+ .action(async (jobId, opts) => {
1017
+ try {
1018
+ const data = await getSiteCrawlIssues(jobId, {
1019
+ severity: opts.severity,
1020
+ category: opts.category,
1021
+ });
1022
+ if (opts.json) {
1023
+ console.log(printJson(data));
1024
+ } else {
1025
+ const counts = data.counts_by_severity || {};
1026
+ console.log(`\n\x1b[1mCrawl Issues\x1b[0m \x1b[31m${counts.critical || 0} critical\x1b[0m \x1b[33m${counts.warning || 0} warning\x1b[0m \x1b[2m${counts.info || 0} info\x1b[0m\n`);
1027
+ for (const issue of data.issues || []) {
1028
+ const sevColor = issue.severity === "critical" ? "\x1b[31m" : issue.severity === "warning" ? "\x1b[33m" : "\x1b[2m";
1029
+ console.log(` ${sevColor}[${issue.severity}]\x1b[0m \x1b[36m${issue.category}\x1b[0m ${issue.message}`);
1030
+ }
1031
+ }
1032
+ } catch (e) {
1033
+ handleError(e);
1034
+ }
1035
+ });
1036
+
1037
+ // site-crawl list
1038
+ siteCrawlCmd
1039
+ .command("list")
1040
+ .description("List recent crawl jobs")
1041
+ .option("--json", "Output as JSON")
1042
+ .action(async (opts) => {
1043
+ try {
1044
+ const data = await listSiteCrawls();
1045
+ if (opts.json) {
1046
+ console.log(printJson(data));
1047
+ } else {
1048
+ const jobs = data.jobs || [];
1049
+ if (!jobs.length) {
1050
+ console.log("\x1b[33mNo crawl jobs found.\x1b[0m");
1051
+ return;
1052
+ }
1053
+ const rows = jobs.map((j) => ({
1054
+ id: j.job_id.slice(0, 8),
1055
+ status: j.status,
1056
+ pages: j.pages_crawled,
1057
+ errors: j.errors_count,
1058
+ health: j.health_score != null ? Math.round(j.health_score * 100) + "%" : "-",
1059
+ seed: j.seed_url.length > 40 ? j.seed_url.slice(0, 37) + "..." : j.seed_url,
1060
+ }));
1061
+ console.log(printTable(rows, ["id", "status", "pages", "errors", "health", "seed"]));
1062
+ }
1063
+ } catch (e) {
1064
+ handleError(e);
1065
+ }
1066
+ });
1067
+
1068
+ // --- kb-stats ---
1069
+ program
1070
+ .command("kb-stats")
1071
+ .description("Show knowledge base indexing statistics")
1072
+ .option("--json", "Output as JSON")
1073
+ .action(async (opts) => {
1074
+ try {
1075
+ const data = await getKbStats();
1076
+ if (opts.json) {
1077
+ console.log(printJson(data));
1078
+ } else {
1079
+ console.log(`\n\x1b[1mKnowledge Base\x1b[0m\n`);
1080
+ const ready = data.ready === true || data.status === "indexed";
1081
+ console.log(` Status: ${ready ? "\x1b[32mready\x1b[0m" : "\x1b[31mnot ready\x1b[0m"}`);
1082
+ }
1083
+ } catch (e) {
1084
+ handleError(e);
1085
+ }
1086
+ });
1087
+
630
1088
  }
package/src/seed.mjs CHANGED
@@ -89,6 +89,10 @@ npx @xyleapp/cli <command> [options]
89
89
  | \`xyle sync --site <url> [--json]\` | \`--site\` (required) | Syncs Search Console data; returns synced_queries count |
90
90
  | \`xyle queries --site <domain> [--limit N] [--json]\` | \`--site\` (required), \`--limit\` (default 20) | query, impressions, clicks, ctr, position |
91
91
  | \`xyle crawl --url <url> [--json]\` | \`--url\` (required) | title, meta_desc, word_count, headings |
92
+ | \`xyle site-crawl <url> [--max-pages N] [--max-depth N] [--no-robots] [--no-js] [--json]\` | \`url\` (required) | Full-site BFS crawl; returns job_id, polls to completion, prints site health score |
93
+ | \`xyle site-crawl status <job_id>\` | \`job_id\` | Crawl job status + progress |
94
+ | \`xyle site-crawl pages <job_id> [--limit N] [--filter broken\\|redirect\\|thin] [--json]\` | \`job_id\` | Per-page SEO/AEO/GEO scores |
95
+ | \`xyle site-crawl issues <job_id> [--severity critical\\|warning] [--json]\` | \`job_id\` | Site-wide issues (broken links, duplicates, orphans, thin content, canonicals) |
92
96
 
93
97
  ### Analysis
94
98
  | Command | Key Flags | Returns |
@@ -112,8 +116,19 @@ Always use \`--json\` when parsing output programmatically.
112
116
 
113
117
  ## Strategic Workflows
114
118
 
115
- ### 1. Full SEO Audit
116
- **When:** User wants a health check on their site's SEO performance.
119
+ ### 1. Full-Site Audit (Screaming Frog replacement)
120
+ **When:** User wants a whole-site health report, not a single-page audit.
121
+ **Goal:** BFS every internal page, detect site-wide issues, deliver a prioritized fix plan with a Site Health Score.
122
+
123
+ 1. \`xyle status --json\` — verify connectivity
124
+ 2. \`xyle site-crawl https://<domain> --max-pages 500 --json\` — run the crawl; the CLI polls until complete and prints a progress bar
125
+ 3. \`xyle site-crawl issues <job_id> --severity critical --json\` — triage broken links, redirect chains, canonical mismatches, duplicates, orphans, thin content
126
+ 4. \`xyle site-crawl pages <job_id> --filter thin --json\` — find pages that need content work
127
+ 5. \`xyle site-crawl pages <job_id> --limit 50 --json\` — sort by lowest SEO/AEO/GEO scores
128
+ 6. **Deliver a prioritized report**: Site Health Score, critical issue count, top 10 lowest-scoring pages, 30-day fix roadmap
129
+
130
+ ### 2. Single-Page / Query-Driven SEO Audit
131
+ **When:** User wants a deep dive on one page or a performance check driven by Search Console data.
117
132
  **Goal:** Categorize queries by intent, flag striking-distance opportunities, and deliver a prioritized action plan.
118
133
 
119
134
  1. \`xyle status --json\` — verify connectivity
@@ -174,7 +189,8 @@ When the user asks something SEO-related, route to the right workflow:
174
189
 
175
190
  | User Says | Workflow | Why |
176
191
  |-----------|----------|-----|
177
- | "How's my SEO?" / "Audit my site" | Full SEO Audit | Need holistic view before specific fixes |
192
+ | "Audit my whole site" / "Crawl my site" / "Screaming Frog" / "Site health" | Full-Site Audit | Need site-wide view: broken links, duplicates, orphans, thin content, link graph |
193
+ | "How's my SEO?" / "Audit my site" | Single-Page / Query-Driven SEO Audit | Need holistic view before specific fixes |
178
194
  | "Optimize this page" / "Improve rankings for X" | Page Optimization | Specific page needs score-based action |
179
195
  | "Who is my audience?" / "What should I write about?" | ICP Discovery | Need strategy before tactics |
180
196
  | "What content am I missing?" / "Find gaps" | Content Gap Sprint | Ready to create, need briefs |