@schemasentry/cli 0.3.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +65 -0
  2. package/dist/index.js +718 -14
  3. package/package.json +2 -2
package/README.md CHANGED
@@ -56,6 +56,65 @@ pnpm schemasentry audit \
56
56
  --output ./report.html
57
57
  ```
58
58
 
59
+ ### `collect`
60
+
61
+ Collect JSON-LD blocks from built HTML output and emit schema data JSON:
62
+
63
+ ```bash
64
+ pnpm schemasentry collect --root ./out --output ./schema-sentry.data.json
65
+ ```
66
+
67
+ Check collected output against your current data file (CI drift guard):
68
+
69
+ ```bash
70
+ pnpm schemasentry collect --root ./out --check --data ./schema-sentry.data.json
71
+ ```
72
+
73
+ Collect and compare only selected routes, failing if any required route is missing:
74
+
75
+ ```bash
76
+ pnpm schemasentry collect \
77
+ --root ./out \
78
+ --routes / /blog /faq \
79
+ --strict-routes \
80
+ --check \
81
+ --data ./schema-sentry.data.json
82
+ ```
83
+
84
+ ### `scaffold`
85
+
86
+ Auto-generate schema stubs for routes without schema (dry-run by default):
87
+
88
+ ```bash
89
+ pnpm schemasentry scaffold --manifest ./schema-sentry.manifest.json --data ./schema-sentry.data.json
90
+ ```
91
+
92
+ Preview what would be generated without writing files:
93
+
94
+ ```bash
95
+ pnpm schemasentry scaffold
96
+ ```
97
+
98
+ Apply scaffolded schema to your files:
99
+
100
+ ```bash
101
+ pnpm schemasentry scaffold --write
102
+ ```
103
+
104
+ Skip confirmation prompts:
105
+
106
+ ```bash
107
+ pnpm schemasentry scaffold --write --force
108
+ ```
109
+
110
+ **Pattern-based auto-detection** infers schema types from URL patterns:
111
+ - `/blog/*` → BlogPosting
112
+ - `/products/*` → Product
113
+ - `/faq` → FAQPage
114
+ - `/events/*` → Event
115
+ - `/howto/*` → HowTo
116
+ - and more...
117
+
59
118
  ## Options
60
119
 
61
120
  | Option | Description |
@@ -63,6 +122,12 @@ pnpm schemasentry audit \
63
122
  | `--format json\|html` | Output format |
64
123
  | `--annotations none\|github` | CI annotations |
65
124
  | `-o, --output <path>` | Write output to file |
125
+ | `--root <path>` | Root directory to scan (`collect`, `scaffold`) |
126
+ | `--routes <routes...>` | Collect only specific routes (`collect`) |
127
+ | `--strict-routes` | Fail when any route passed to `--routes` is missing (`collect`) |
128
+ | `--check` | Compare collected output with existing data and fail on drift (`collect`) |
129
+ | `--write` | Apply scaffolded changes to files (`scaffold`) |
130
+ | `--force` | Skip confirmation prompts (`scaffold`) |
66
131
  | `--recommended / --no-recommended` | Enable recommended field checks |
67
132
 
68
133
  ## Documentation
package/dist/index.js CHANGED
@@ -4,8 +4,8 @@
4
4
  import { Command } from "commander";
5
5
  import { mkdir, readFile, writeFile } from "fs/promises";
6
6
  import { readFileSync } from "fs";
7
- import path4 from "path";
8
- import { stableStringify as stableStringify2 } from "@schemasentry/core";
7
+ import path6 from "path";
8
+ import { stableStringify as stableStringify4 } from "@schemasentry/core";
9
9
 
10
10
  // src/report.ts
11
11
  import {
@@ -654,6 +654,458 @@ var emitGitHubAnnotations = (report, commandLabel) => {
654
654
  }
655
655
  };
656
656
 
657
+ // src/collect.ts
658
+ import { promises as fs4 } from "fs";
659
+ import path4 from "path";
660
+ import { stableStringify as stableStringify2 } from "@schemasentry/core";
661
+ var IGNORED_DIRECTORIES = /* @__PURE__ */ new Set([".git", "node_modules", ".pnpm-store"]);
662
+ var SCRIPT_TAG_REGEX = /<script\b([^>]*)>([\s\S]*?)<\/script>/gi;
663
+ var JSON_LD_TYPE_REGEX = /\btype\s*=\s*(?:"application\/ld\+json"|'application\/ld\+json'|application\/ld\+json)/i;
664
+ var collectSchemaData = async (options) => {
665
+ const rootDir = path4.resolve(options.rootDir);
666
+ const requestedRoutes = normalizeRouteFilter(options.routes ?? []);
667
+ const htmlFiles = (await walkHtmlFiles(rootDir)).sort((a, b) => a.localeCompare(b));
668
+ const routes = {};
669
+ const warnings = [];
670
+ let blockCount = 0;
671
+ let invalidBlocks = 0;
672
+ for (const filePath of htmlFiles) {
673
+ const route = filePathToRoute(rootDir, filePath);
674
+ if (!route) {
675
+ continue;
676
+ }
677
+ const html = await fs4.readFile(filePath, "utf8");
678
+ const extracted = extractSchemaNodes(html, filePath);
679
+ if (extracted.nodes.length > 0) {
680
+ routes[route] = [...routes[route] ?? [], ...extracted.nodes];
681
+ blockCount += extracted.nodes.length;
682
+ }
683
+ invalidBlocks += extracted.invalidBlocks;
684
+ warnings.push(...extracted.warnings);
685
+ }
686
+ const missingRoutes = [];
687
+ const filteredRoutes = requestedRoutes.length > 0 ? filterRoutesByAllowlist(routes, requestedRoutes) : routes;
688
+ if (requestedRoutes.length > 0) {
689
+ for (const route of requestedRoutes) {
690
+ if (!Object.prototype.hasOwnProperty.call(filteredRoutes, route)) {
691
+ missingRoutes.push(route);
692
+ }
693
+ }
694
+ }
695
+ const filteredBlockCount = Object.values(filteredRoutes).reduce(
696
+ (total, nodes) => total + nodes.length,
697
+ 0
698
+ );
699
+ return {
700
+ data: {
701
+ routes: sortRoutes(filteredRoutes)
702
+ },
703
+ stats: {
704
+ htmlFiles: htmlFiles.length,
705
+ routes: Object.keys(filteredRoutes).length,
706
+ blocks: filteredBlockCount,
707
+ invalidBlocks
708
+ },
709
+ warnings,
710
+ requestedRoutes,
711
+ missingRoutes
712
+ };
713
+ };
714
+ var compareSchemaData = (existing, collected) => {
715
+ const existingRoutes = existing.routes ?? {};
716
+ const collectedRoutes = collected.routes ?? {};
717
+ const existingKeys = Object.keys(existingRoutes);
718
+ const collectedKeys = Object.keys(collectedRoutes);
719
+ const addedRoutes = collectedKeys.filter((route) => !Object.prototype.hasOwnProperty.call(existingRoutes, route)).sort();
720
+ const removedRoutes = existingKeys.filter((route) => !Object.prototype.hasOwnProperty.call(collectedRoutes, route)).sort();
721
+ const changedRoutes = existingKeys.filter((route) => Object.prototype.hasOwnProperty.call(collectedRoutes, route)).filter(
722
+ (route) => stableStringify2(existingRoutes[route]) !== stableStringify2(collectedRoutes[route])
723
+ ).sort();
724
+ const changedRouteDetails = changedRoutes.map(
725
+ (route) => buildRouteDriftDetail(route, existingRoutes[route] ?? [], collectedRoutes[route] ?? [])
726
+ );
727
+ return {
728
+ hasChanges: addedRoutes.length > 0 || removedRoutes.length > 0 || changedRoutes.length > 0,
729
+ addedRoutes,
730
+ removedRoutes,
731
+ changedRoutes,
732
+ changedRouteDetails
733
+ };
734
+ };
735
+ var formatSchemaDataDrift = (drift, maxRoutes = 5) => {
736
+ if (!drift.hasChanges) {
737
+ return "No schema data drift detected.";
738
+ }
739
+ const lines = [
740
+ `Schema data drift detected: added_routes=${drift.addedRoutes.length} removed_routes=${drift.removedRoutes.length} changed_routes=${drift.changedRoutes.length}`
741
+ ];
742
+ if (drift.addedRoutes.length > 0) {
743
+ lines.push(formatRoutePreview("Added routes", drift.addedRoutes, maxRoutes));
744
+ }
745
+ if (drift.removedRoutes.length > 0) {
746
+ lines.push(formatRoutePreview("Removed routes", drift.removedRoutes, maxRoutes));
747
+ }
748
+ if (drift.changedRoutes.length > 0) {
749
+ lines.push(formatRoutePreview("Changed routes", drift.changedRoutes, maxRoutes));
750
+ const details = drift.changedRouteDetails.slice(0, maxRoutes).map((detail) => formatRouteDriftDetail(detail));
751
+ if (details.length > 0) {
752
+ lines.push("Changed route details:");
753
+ for (const detail of details) {
754
+ lines.push(`- ${detail}`);
755
+ }
756
+ }
757
+ }
758
+ return lines.join("\n");
759
+ };
760
+ var formatRoutePreview = (label, routes, maxRoutes) => {
761
+ const preview = routes.slice(0, maxRoutes);
762
+ const suffix = routes.length > maxRoutes ? ` (+${routes.length - maxRoutes} more)` : "";
763
+ return `${label}: ${preview.join(", ")}${suffix}`;
764
+ };
765
+ var formatRouteDriftDetail = (detail) => {
766
+ const added = detail.addedTypes.length > 0 ? detail.addedTypes.join(",") : "(none)";
767
+ const removed = detail.removedTypes.length > 0 ? detail.removedTypes.join(",") : "(none)";
768
+ return `${detail.route} blocks ${detail.beforeBlocks}->${detail.afterBlocks} | +types ${added} | -types ${removed}`;
769
+ };
770
+ var sortRoutes = (routes) => Object.fromEntries(
771
+ Object.entries(routes).sort(([a], [b]) => a.localeCompare(b))
772
+ );
773
+ var filterRoutesByAllowlist = (routes, allowlist) => {
774
+ const filtered = {};
775
+ for (const route of allowlist) {
776
+ if (Object.prototype.hasOwnProperty.call(routes, route)) {
777
+ filtered[route] = routes[route];
778
+ }
779
+ }
780
+ return filtered;
781
+ };
782
+ var normalizeRouteFilter = (input2) => {
783
+ const normalized = input2.flatMap((entry) => entry.split(",")).map((route) => route.trim()).filter((route) => route.length > 0);
784
+ return Array.from(new Set(normalized)).sort();
785
+ };
786
+ var walkHtmlFiles = async (rootDir) => {
787
+ const entries = await fs4.readdir(rootDir, { withFileTypes: true });
788
+ const files = [];
789
+ for (const entry of entries) {
790
+ if (entry.isDirectory() && IGNORED_DIRECTORIES.has(entry.name)) {
791
+ continue;
792
+ }
793
+ const resolved = path4.join(rootDir, entry.name);
794
+ if (entry.isDirectory()) {
795
+ files.push(...await walkHtmlFiles(resolved));
796
+ continue;
797
+ }
798
+ if (entry.isFile() && entry.name.endsWith(".html")) {
799
+ files.push(resolved);
800
+ }
801
+ }
802
+ return files;
803
+ };
804
+ var filePathToRoute = (rootDir, filePath) => {
805
+ const relative = path4.relative(rootDir, filePath).replace(/\\/g, "/");
806
+ if (relative === "index.html") {
807
+ return "/";
808
+ }
809
+ if (relative.endsWith("/index.html")) {
810
+ return `/${relative.slice(0, -"/index.html".length)}`;
811
+ }
812
+ if (relative.endsWith(".html")) {
813
+ return `/${relative.slice(0, -".html".length)}`;
814
+ }
815
+ return null;
816
+ };
817
+ var extractSchemaNodes = (html, filePath) => {
818
+ const nodes = [];
819
+ const warnings = [];
820
+ let invalidBlocks = 0;
821
+ let scriptIndex = 0;
822
+ for (const match of html.matchAll(SCRIPT_TAG_REGEX)) {
823
+ scriptIndex += 1;
824
+ const attributes = match[1] ?? "";
825
+ if (!JSON_LD_TYPE_REGEX.test(attributes)) {
826
+ continue;
827
+ }
828
+ const scriptBody = (match[2] ?? "").trim();
829
+ if (!scriptBody) {
830
+ continue;
831
+ }
832
+ let parsed;
833
+ try {
834
+ parsed = JSON.parse(scriptBody);
835
+ } catch {
836
+ invalidBlocks += 1;
837
+ warnings.push({
838
+ file: filePath,
839
+ message: `Invalid JSON-LD block at script #${scriptIndex}`
840
+ });
841
+ continue;
842
+ }
843
+ const normalized = normalizeParsedBlock(parsed);
844
+ nodes.push(...normalized);
845
+ }
846
+ return { nodes, invalidBlocks, warnings };
847
+ };
848
+ var normalizeParsedBlock = (value) => {
849
+ if (Array.isArray(value)) {
850
+ return value.filter(isJsonObject);
851
+ }
852
+ if (!isJsonObject(value)) {
853
+ return [];
854
+ }
855
+ const graph = value["@graph"];
856
+ if (Array.isArray(graph)) {
857
+ return graph.filter(isJsonObject);
858
+ }
859
+ return [value];
860
+ };
861
+ var isJsonObject = (value) => Boolean(value) && typeof value === "object" && !Array.isArray(value);
862
+ var buildRouteDriftDetail = (route, beforeNodes, afterNodes) => {
863
+ const beforeTypes = new Set(beforeNodes.map((node) => schemaTypeLabel(node)));
864
+ const afterTypes = new Set(afterNodes.map((node) => schemaTypeLabel(node)));
865
+ const addedTypes = Array.from(afterTypes).filter((type) => !beforeTypes.has(type)).sort();
866
+ const removedTypes = Array.from(beforeTypes).filter((type) => !afterTypes.has(type)).sort();
867
+ return {
868
+ route,
869
+ beforeBlocks: beforeNodes.length,
870
+ afterBlocks: afterNodes.length,
871
+ addedTypes,
872
+ removedTypes
873
+ };
874
+ };
875
+ var schemaTypeLabel = (node) => {
876
+ const type = node["@type"];
877
+ return typeof type === "string" && type.trim().length > 0 ? type : "(unknown)";
878
+ };
879
+
880
+ // src/scaffold.ts
881
+ import { promises as fs5 } from "fs";
882
+ import path5 from "path";
883
+ import { stableStringify as stableStringify3 } from "@schemasentry/core";
884
+
885
+ // src/patterns.ts
886
+ var DEFAULT_PATTERNS = [
887
+ { pattern: "/blog/*", schemaType: "BlogPosting", priority: 10 },
888
+ { pattern: "/blog", schemaType: "WebPage", priority: 5 },
889
+ { pattern: "/products/*", schemaType: "Product", priority: 10 },
890
+ { pattern: "/product/*", schemaType: "Product", priority: 10 },
891
+ { pattern: "/faq", schemaType: "FAQPage", priority: 10 },
892
+ { pattern: "/faqs", schemaType: "FAQPage", priority: 10 },
893
+ { pattern: "/how-to/*", schemaType: "HowTo", priority: 10 },
894
+ { pattern: "/howto/*", schemaType: "HowTo", priority: 10 },
895
+ { pattern: "/events/*", schemaType: "Event", priority: 10 },
896
+ { pattern: "/event/*", schemaType: "Event", priority: 10 },
897
+ { pattern: "/reviews/*", schemaType: "Review", priority: 10 },
898
+ { pattern: "/review/*", schemaType: "Review", priority: 10 },
899
+ { pattern: "/videos/*", schemaType: "VideoObject", priority: 10 },
900
+ { pattern: "/video/*", schemaType: "VideoObject", priority: 10 },
901
+ { pattern: "/images/*", schemaType: "ImageObject", priority: 10 },
902
+ { pattern: "/image/*", schemaType: "ImageObject", priority: 10 },
903
+ { pattern: "/about", schemaType: "WebPage", priority: 10 },
904
+ { pattern: "/contact", schemaType: "WebPage", priority: 10 },
905
+ { pattern: "/", schemaType: "WebSite", priority: 1 }
906
+ ];
907
+ var matchRouteToPatterns = (route, patterns = DEFAULT_PATTERNS) => {
908
+ const matches = [];
909
+ for (const rule of patterns) {
910
+ if (routeMatchesPattern(route, rule.pattern)) {
911
+ matches.push({
912
+ type: rule.schemaType,
913
+ priority: rule.priority ?? 5
914
+ });
915
+ }
916
+ }
917
+ matches.sort((a, b) => b.priority - a.priority);
918
+ return [...new Set(matches.map((m) => m.type))];
919
+ };
920
+ var routeMatchesPattern = (route, pattern) => {
921
+ if (pattern === route) {
922
+ return true;
923
+ }
924
+ if (pattern.endsWith("/*")) {
925
+ const prefix = pattern.slice(0, -1);
926
+ return route.startsWith(prefix);
927
+ }
928
+ const patternRegex = pattern.replace(/\*/g, "[^/]+").replace(/\?/g, ".");
929
+ const regex = new RegExp(`^${patternRegex}$`);
930
+ return regex.test(route);
931
+ };
932
+ var inferSchemaTypes = (routes, customPatterns) => {
933
+ const patterns = customPatterns ?? DEFAULT_PATTERNS;
934
+ const result = /* @__PURE__ */ new Map();
935
+ for (const route of routes) {
936
+ const types = matchRouteToPatterns(route, patterns);
937
+ if (types.length > 0) {
938
+ result.set(route, types);
939
+ }
940
+ }
941
+ return result;
942
+ };
943
+ var generateManifestEntries = (routes, customPatterns) => {
944
+ const inferred = inferSchemaTypes(routes, customPatterns);
945
+ const entries = {};
946
+ for (const [route, types] of inferred) {
947
+ entries[route] = types;
948
+ }
949
+ return entries;
950
+ };
951
+
952
+ // src/scaffold.ts
953
+ var scaffoldSchema = async (options) => {
954
+ const manifest = await loadManifest(options.manifestPath);
955
+ const data = await loadData(options.dataPath);
956
+ const discoveredRoutes = await scanRoutes({ rootDir: options.rootDir });
957
+ const routesNeedingSchema = discoveredRoutes.filter(
958
+ (route) => !data.routes[route] || data.routes[route].length === 0
959
+ );
960
+ const inferredTypes = inferSchemaTypes(routesNeedingSchema, options.customPatterns);
961
+ const manifestEntries = generateManifestEntries(
962
+ routesNeedingSchema,
963
+ options.customPatterns
964
+ );
965
+ const generatedSchemas = /* @__PURE__ */ new Map();
966
+ for (const [route, types] of inferredTypes) {
967
+ const schemas = types.map((type) => generateSchemaStub(type, route));
968
+ generatedSchemas.set(route, schemas);
969
+ }
970
+ const wouldUpdate = routesNeedingSchema.length > 0;
971
+ return {
972
+ routesToScaffold: routesNeedingSchema,
973
+ generatedSchemas,
974
+ manifestUpdates: manifestEntries,
975
+ wouldUpdate
976
+ };
977
+ };
978
+ var loadManifest = async (manifestPath) => {
979
+ try {
980
+ const raw = await fs5.readFile(manifestPath, "utf8");
981
+ return JSON.parse(raw);
982
+ } catch {
983
+ return { routes: {} };
984
+ }
985
+ };
986
+ var loadData = async (dataPath) => {
987
+ try {
988
+ const raw = await fs5.readFile(dataPath, "utf8");
989
+ return JSON.parse(raw);
990
+ } catch {
991
+ return { routes: {} };
992
+ }
993
+ };
994
+ var generateSchemaStub = (type, route) => {
995
+ const base = {
996
+ "@context": "https://schema.org",
997
+ "@type": type
998
+ };
999
+ switch (type) {
1000
+ case "BlogPosting":
1001
+ return {
1002
+ ...base,
1003
+ headline: "Blog Post Title",
1004
+ author: {
1005
+ "@type": "Person",
1006
+ name: "Author Name"
1007
+ },
1008
+ datePublished: (/* @__PURE__ */ new Date()).toISOString().split("T")[0],
1009
+ url: route
1010
+ };
1011
+ case "Product":
1012
+ return {
1013
+ ...base,
1014
+ name: "Product Name",
1015
+ description: "Product description",
1016
+ offers: {
1017
+ "@type": "Offer",
1018
+ price: "0.00",
1019
+ priceCurrency: "USD"
1020
+ }
1021
+ };
1022
+ case "FAQPage":
1023
+ return {
1024
+ ...base,
1025
+ mainEntity: []
1026
+ };
1027
+ case "HowTo":
1028
+ return {
1029
+ ...base,
1030
+ name: "How-To Title",
1031
+ step: []
1032
+ };
1033
+ case "Event":
1034
+ return {
1035
+ ...base,
1036
+ name: "Event Name",
1037
+ startDate: (/* @__PURE__ */ new Date()).toISOString()
1038
+ };
1039
+ case "Organization":
1040
+ return {
1041
+ ...base,
1042
+ name: "Organization Name",
1043
+ url: route
1044
+ };
1045
+ case "WebSite":
1046
+ return {
1047
+ ...base,
1048
+ name: "Website Name",
1049
+ url: route
1050
+ };
1051
+ case "Article":
1052
+ return {
1053
+ ...base,
1054
+ headline: "Article Headline",
1055
+ author: {
1056
+ "@type": "Person",
1057
+ name: "Author Name"
1058
+ },
1059
+ datePublished: (/* @__PURE__ */ new Date()).toISOString().split("T")[0]
1060
+ };
1061
+ default:
1062
+ return {
1063
+ ...base,
1064
+ name: `${type} Name`
1065
+ };
1066
+ }
1067
+ };
1068
+ var formatScaffoldPreview = (result) => {
1069
+ if (result.routesToScaffold.length === 0) {
1070
+ return "No routes need schema generation.";
1071
+ }
1072
+ const lines = [
1073
+ `Routes to scaffold: ${result.routesToScaffold.length}`,
1074
+ ""
1075
+ ];
1076
+ for (const route of result.routesToScaffold) {
1077
+ const types = result.manifestUpdates[route] || [];
1078
+ lines.push(` ${route}`);
1079
+ lines.push(` Schema types: ${types.join(", ") || "None detected"}`);
1080
+ }
1081
+ return lines.join("\n");
1082
+ };
1083
+ var applyScaffold = async (result, options) => {
1084
+ if (!result.wouldUpdate) {
1085
+ return;
1086
+ }
1087
+ const manifest = await loadManifest(options.manifestPath);
1088
+ const data = await loadData(options.dataPath);
1089
+ for (const [route, types] of Object.entries(result.manifestUpdates)) {
1090
+ if (!manifest.routes[route]) {
1091
+ manifest.routes[route] = types;
1092
+ }
1093
+ }
1094
+ for (const [route, schemas] of result.generatedSchemas) {
1095
+ if (!data.routes[route]) {
1096
+ data.routes[route] = schemas;
1097
+ }
1098
+ }
1099
+ await fs5.mkdir(path5.dirname(options.manifestPath), { recursive: true });
1100
+ await fs5.mkdir(path5.dirname(options.dataPath), { recursive: true });
1101
+ await fs5.writeFile(
1102
+ options.manifestPath,
1103
+ stableStringify3(manifest),
1104
+ "utf8"
1105
+ );
1106
+ await fs5.writeFile(options.dataPath, stableStringify3(data), "utf8");
1107
+ };
1108
+
657
1109
  // src/index.ts
658
1110
  import { createInterface } from "readline/promises";
659
1111
  import { stdin as input, stdout as output } from "process";
@@ -672,8 +1124,8 @@ program.command("validate").description("Validate schema coverage and rules").op
672
1124
  const format = resolveOutputFormat(options.format);
673
1125
  const annotationsMode = resolveAnnotationsMode(options.annotations);
674
1126
  const recommended = await resolveRecommendedOption(options.config);
675
- const manifestPath = path4.resolve(process.cwd(), options.manifest);
676
- const dataPath = path4.resolve(process.cwd(), options.data);
1127
+ const manifestPath = path6.resolve(process.cwd(), options.manifest);
1128
+ const dataPath = path6.resolve(process.cwd(), options.data);
677
1129
  let raw;
678
1130
  try {
679
1131
  raw = await readFile(manifestPath, "utf8");
@@ -760,12 +1212,12 @@ program.command("init").description("Interactive setup wizard").option(
760
1212
  "Path to schema data JSON",
761
1213
  "schema-sentry.data.json"
762
1214
  ).option("-y, --yes", "Use defaults and skip prompts").option("-f, --force", "Overwrite existing files").option("--scan", "Scan the filesystem for routes and add WebPage entries").option("--root <path>", "Project root for scanning", ".").action(async (options) => {
763
- const manifestPath = path4.resolve(process.cwd(), options.manifest);
764
- const dataPath = path4.resolve(process.cwd(), options.data);
1215
+ const manifestPath = path6.resolve(process.cwd(), options.manifest);
1216
+ const dataPath = path6.resolve(process.cwd(), options.data);
765
1217
  const force = options.force ?? false;
766
1218
  const useDefaults = options.yes ?? false;
767
1219
  const answers = useDefaults ? getDefaultAnswers() : await promptAnswers();
768
- const scannedRoutes = options.scan ? await scanRoutes({ rootDir: path4.resolve(process.cwd(), options.root ?? ".") }) : [];
1220
+ const scannedRoutes = options.scan ? await scanRoutes({ rootDir: path6.resolve(process.cwd(), options.root ?? ".") }) : [];
769
1221
  if (options.scan && scannedRoutes.length === 0) {
770
1222
  console.error("No routes found during scan.");
771
1223
  }
@@ -794,7 +1246,7 @@ program.command("audit").description("Analyze schema health and report issues").
794
1246
  const format = resolveOutputFormat(options.format);
795
1247
  const annotationsMode = resolveAnnotationsMode(options.annotations);
796
1248
  const recommended = await resolveRecommendedOption(options.config);
797
- const dataPath = path4.resolve(process.cwd(), options.data);
1249
+ const dataPath = path6.resolve(process.cwd(), options.data);
798
1250
  let dataRaw;
799
1251
  try {
800
1252
  dataRaw = await readFile(dataPath, "utf8");
@@ -830,7 +1282,7 @@ program.command("audit").description("Analyze schema health and report issues").
830
1282
  }
831
1283
  let manifest;
832
1284
  if (options.manifest) {
833
- const manifestPath = path4.resolve(process.cwd(), options.manifest);
1285
+ const manifestPath = path6.resolve(process.cwd(), options.manifest);
834
1286
  let manifestRaw;
835
1287
  try {
836
1288
  manifestRaw = await readFile(manifestPath, "utf8");
@@ -864,7 +1316,7 @@ program.command("audit").description("Analyze schema health and report issues").
864
1316
  return;
865
1317
  }
866
1318
  }
867
- const requiredRoutes = options.scan ? await scanRoutes({ rootDir: path4.resolve(process.cwd(), options.root ?? ".") }) : [];
1319
+ const requiredRoutes = options.scan ? await scanRoutes({ rootDir: path6.resolve(process.cwd(), options.root ?? ".") }) : [];
868
1320
  if (options.scan && requiredRoutes.length === 0) {
869
1321
  console.error("No routes found during scan.");
870
1322
  }
@@ -883,6 +1335,184 @@ program.command("audit").description("Analyze schema health and report issues").
883
1335
  printAuditSummary(report, Boolean(manifest), Date.now() - start);
884
1336
  process.exit(report.ok ? 0 : 1);
885
1337
  });
1338
+ program.command("collect").description("Collect JSON-LD blocks from built HTML output").option("--root <path>", "Root directory to scan for HTML files", ".").option("--routes <routes...>", "Only collect specific routes (repeat or comma-separated)").option("--strict-routes", "Fail when any route passed via --routes is missing").option("--format <format>", "Output format (json)", "json").option("-o, --output <path>", "Write collected schema data to file").option("--check", "Compare collected output with an existing schema data file").option(
1339
+ "-d, --data <path>",
1340
+ "Path to existing schema data JSON for --check",
1341
+ "schema-sentry.data.json"
1342
+ ).action(async (options) => {
1343
+ const start = Date.now();
1344
+ const format = resolveCollectOutputFormat(options.format);
1345
+ const rootDir = path6.resolve(process.cwd(), options.root ?? ".");
1346
+ const check = options.check ?? false;
1347
+ const requestedRoutes = normalizeRouteFilter(options.routes ?? []);
1348
+ const strictRoutes = options.strictRoutes ?? false;
1349
+ let collected;
1350
+ try {
1351
+ collected = await collectSchemaData({ rootDir, routes: requestedRoutes });
1352
+ } catch (error) {
1353
+ const reason = error instanceof Error && error.message.length > 0 ? error.message : "Unknown file system error";
1354
+ printCliError(
1355
+ "collect.scan_failed",
1356
+ `Could not scan HTML output at ${rootDir}: ${reason}`,
1357
+ "Point --root to a directory containing built HTML output."
1358
+ );
1359
+ process.exit(1);
1360
+ return;
1361
+ }
1362
+ if (collected.stats.htmlFiles === 0) {
1363
+ printCliError(
1364
+ "collect.no_html",
1365
+ `No HTML files found under ${rootDir}`,
1366
+ "Point --root to a static output directory (for example ./out)."
1367
+ );
1368
+ process.exit(1);
1369
+ return;
1370
+ }
1371
+ if (strictRoutes && collected.missingRoutes.length > 0) {
1372
+ printCliError(
1373
+ "collect.missing_required_routes",
1374
+ `Required routes were not found in collected HTML: ${collected.missingRoutes.join(", ")}`,
1375
+ "Rebuild output, adjust --root, or update --routes."
1376
+ );
1377
+ process.exit(1);
1378
+ return;
1379
+ }
1380
+ let driftDetected = false;
1381
+ if (check) {
1382
+ const existingPath = path6.resolve(process.cwd(), options.data);
1383
+ let existingRaw;
1384
+ try {
1385
+ existingRaw = await readFile(existingPath, "utf8");
1386
+ } catch (error) {
1387
+ printCliError(
1388
+ "data.not_found",
1389
+ `Schema data not found at ${existingPath}`,
1390
+ "Run `schemasentry collect --output ./schema-sentry.data.json` to generate it."
1391
+ );
1392
+ process.exit(1);
1393
+ return;
1394
+ }
1395
+ let existingData;
1396
+ try {
1397
+ existingData = JSON.parse(existingRaw);
1398
+ } catch (error) {
1399
+ printCliError(
1400
+ "data.invalid_json",
1401
+ "Schema data is not valid JSON",
1402
+ "Check the JSON syntax or regenerate with `schemasentry collect --output`."
1403
+ );
1404
+ process.exit(1);
1405
+ return;
1406
+ }
1407
+ if (!isSchemaData(existingData)) {
1408
+ printCliError(
1409
+ "data.invalid_shape",
1410
+ "Schema data must contain a 'routes' object with array values",
1411
+ "Ensure each route maps to an array of JSON-LD blocks."
1412
+ );
1413
+ process.exit(1);
1414
+ return;
1415
+ }
1416
+ const existingDataForCompare = requestedRoutes.length > 0 ? filterSchemaDataByRoutes(existingData, requestedRoutes) : existingData;
1417
+ const drift = compareSchemaData(existingDataForCompare, collected.data);
1418
+ driftDetected = drift.hasChanges;
1419
+ if (driftDetected) {
1420
+ console.error(formatSchemaDataDrift(drift));
1421
+ } else {
1422
+ console.error("collect | No schema data drift detected.");
1423
+ }
1424
+ }
1425
+ const content = formatCollectOutput(collected.data, format);
1426
+ if (options.output) {
1427
+ const resolvedPath = path6.resolve(process.cwd(), options.output);
1428
+ try {
1429
+ await mkdir(path6.dirname(resolvedPath), { recursive: true });
1430
+ await writeFile(resolvedPath, `${content}
1431
+ `, "utf8");
1432
+ console.error(`Collected data written to ${resolvedPath}`);
1433
+ } catch (error) {
1434
+ const reason = error instanceof Error && error.message.length > 0 ? error.message : "Unknown file system error";
1435
+ printCliError(
1436
+ "output.write_failed",
1437
+ `Could not write collected data to ${resolvedPath}: ${reason}`
1438
+ );
1439
+ process.exit(1);
1440
+ return;
1441
+ }
1442
+ } else if (!check) {
1443
+ console.log(content);
1444
+ }
1445
+ printCollectWarnings(collected.warnings);
1446
+ printCollectSummary({
1447
+ stats: collected.stats,
1448
+ durationMs: Date.now() - start,
1449
+ checked: check,
1450
+ driftDetected,
1451
+ requestedRoutes: collected.requestedRoutes,
1452
+ missingRoutes: collected.missingRoutes,
1453
+ strictRoutes
1454
+ });
1455
+ process.exit(driftDetected ? 1 : 0);
1456
+ });
1457
+ program.command("scaffold").description("Generate schema stubs for routes without schema (dry-run by default)").option(
1458
+ "-m, --manifest <path>",
1459
+ "Path to manifest JSON",
1460
+ "schema-sentry.manifest.json"
1461
+ ).option(
1462
+ "-d, --data <path>",
1463
+ "Path to schema data JSON",
1464
+ "schema-sentry.data.json"
1465
+ ).option("--root <path>", "Project root for scanning", ".").option("--write", "Apply changes (default is dry-run)").option("-f, --force", "Skip confirmation prompts").action(async (options) => {
1466
+ const start = Date.now();
1467
+ const manifestPath = path6.resolve(process.cwd(), options.manifest);
1468
+ const dataPath = path6.resolve(process.cwd(), options.data);
1469
+ const rootDir = path6.resolve(process.cwd(), options.root ?? ".");
1470
+ const dryRun = !(options.write ?? false);
1471
+ const force = options.force ?? false;
1472
+ const result = await scaffoldSchema({
1473
+ manifestPath,
1474
+ dataPath,
1475
+ rootDir,
1476
+ dryRun,
1477
+ force
1478
+ });
1479
+ console.error(formatScaffoldPreview(result));
1480
+ if (!result.wouldUpdate) {
1481
+ process.exit(0);
1482
+ return;
1483
+ }
1484
+ if (dryRun) {
1485
+ console.error("\nDry run complete. Use --write to apply changes.");
1486
+ process.exit(0);
1487
+ return;
1488
+ }
1489
+ if (!force) {
1490
+ console.error("\nScaffolding will update:");
1491
+ console.error(` - ${manifestPath}`);
1492
+ console.error(` - ${dataPath}`);
1493
+ console.error("\nUse --force to skip this confirmation.");
1494
+ }
1495
+ try {
1496
+ await applyScaffold(result, {
1497
+ manifestPath,
1498
+ dataPath,
1499
+ rootDir,
1500
+ dryRun,
1501
+ force
1502
+ });
1503
+ console.error(`
1504
+ Scaffold complete in ${Date.now() - start}ms`);
1505
+ process.exit(0);
1506
+ } catch (error) {
1507
+ const message = error instanceof Error ? error.message : "Unknown error";
1508
+ printCliError(
1509
+ "scaffold.apply_failed",
1510
+ `Failed to apply scaffold: ${message}`,
1511
+ "Check file permissions or disk space."
1512
+ );
1513
+ process.exit(1);
1514
+ }
1515
+ });
886
1516
  function isManifest(value) {
887
1517
  if (!value || typeof value !== "object") {
888
1518
  return false;
@@ -939,11 +1569,30 @@ function resolveAnnotationsMode(value) {
939
1569
  process.exit(1);
940
1570
  return "none";
941
1571
  }
1572
+ function resolveCollectOutputFormat(value) {
1573
+ const format = (value ?? "json").trim().toLowerCase();
1574
+ if (format === "json") {
1575
+ return format;
1576
+ }
1577
+ printCliError(
1578
+ "output.invalid_format",
1579
+ `Unsupported collect output format '${value ?? ""}'`,
1580
+ "Use --format json."
1581
+ );
1582
+ process.exit(1);
1583
+ return "json";
1584
+ }
942
1585
  function formatReportOutput(report, format, title) {
943
1586
  if (format === "html") {
944
1587
  return renderHtmlReport(report, { title });
945
1588
  }
946
- return stableStringify2(report);
1589
+ return stableStringify4(report);
1590
+ }
1591
+ function formatCollectOutput(data, format) {
1592
+ if (format === "json") {
1593
+ return stableStringify4(data);
1594
+ }
1595
+ return stableStringify4(data);
947
1596
  }
948
1597
  async function emitReport(options) {
949
1598
  const { report, format, outputPath, title } = options;
@@ -952,9 +1601,9 @@ async function emitReport(options) {
952
1601
  console.log(content);
953
1602
  return;
954
1603
  }
955
- const resolvedPath = path4.resolve(process.cwd(), outputPath);
1604
+ const resolvedPath = path6.resolve(process.cwd(), outputPath);
956
1605
  try {
957
- await mkdir(path4.dirname(resolvedPath), { recursive: true });
1606
+ await mkdir(path6.dirname(resolvedPath), { recursive: true });
958
1607
  await writeFile(resolvedPath, content, "utf8");
959
1608
  console.error(`Report written to ${resolvedPath}`);
960
1609
  } catch (error) {
@@ -974,7 +1623,7 @@ function emitAnnotations(report, mode, commandLabel) {
974
1623
  }
975
1624
  function printCliError(code, message, suggestion) {
976
1625
  console.error(
977
- stableStringify2({
1626
+ stableStringify4({
978
1627
  ok: false,
979
1628
  errors: [
980
1629
  {
@@ -1040,6 +1689,61 @@ function printAuditSummary(report, coverageEnabled, durationMs) {
1040
1689
  console.error("Coverage checks skipped (no manifest provided).");
1041
1690
  }
1042
1691
  }
1692
+ function printCollectWarnings(warnings) {
1693
+ if (warnings.length === 0) {
1694
+ return;
1695
+ }
1696
+ const maxPrinted = 10;
1697
+ console.error(`collect | Warnings: ${warnings.length}`);
1698
+ for (const warning of warnings.slice(0, maxPrinted)) {
1699
+ console.error(`- ${warning.file}: ${warning.message}`);
1700
+ }
1701
+ if (warnings.length > maxPrinted) {
1702
+ console.error(`- ... ${warnings.length - maxPrinted} more warning(s)`);
1703
+ }
1704
+ }
1705
+ function printCollectSummary(options) {
1706
+ const {
1707
+ stats,
1708
+ durationMs,
1709
+ checked,
1710
+ driftDetected,
1711
+ requestedRoutes,
1712
+ missingRoutes,
1713
+ strictRoutes
1714
+ } = options;
1715
+ const parts = [
1716
+ `HTML files: ${stats.htmlFiles}`,
1717
+ `Routes: ${stats.routes}`,
1718
+ `Blocks: ${stats.blocks}`,
1719
+ `Invalid blocks: ${stats.invalidBlocks}`,
1720
+ `Duration: ${formatDuration(durationMs)}`
1721
+ ];
1722
+ if (checked) {
1723
+ parts.push(`Check: ${driftDetected ? "drift_detected" : "clean"}`);
1724
+ }
1725
+ if (requestedRoutes.length > 0) {
1726
+ parts.push(`Route filter: ${requestedRoutes.length}`);
1727
+ }
1728
+ if (missingRoutes.length > 0) {
1729
+ parts.push(`Missing filtered routes: ${missingRoutes.length}`);
1730
+ }
1731
+ if (strictRoutes) {
1732
+ parts.push("Strict routes: enabled");
1733
+ }
1734
+ console.error(`collect | ${parts.join(" | ")}`);
1735
+ }
1736
+ function filterSchemaDataByRoutes(data, routes) {
1737
+ const filteredRoutes = {};
1738
+ for (const route of routes) {
1739
+ if (Object.prototype.hasOwnProperty.call(data.routes, route)) {
1740
+ filteredRoutes[route] = data.routes[route];
1741
+ }
1742
+ }
1743
+ return {
1744
+ routes: filteredRoutes
1745
+ };
1746
+ }
1043
1747
  async function promptAnswers() {
1044
1748
  const defaults = getDefaultAnswers();
1045
1749
  const rl = createInterface({ input, output });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@schemasentry/cli",
3
- "version": "0.3.2",
3
+ "version": "0.5.0",
4
4
  "description": "CLI for Schema Sentry validation and reporting.",
5
5
  "license": "MIT",
6
6
  "type": "module",
@@ -33,7 +33,7 @@
33
33
  },
34
34
  "dependencies": {
35
35
  "commander": "^12.0.0",
36
- "@schemasentry/core": "0.3.2"
36
+ "@schemasentry/core": "0.5.0"
37
37
  },
38
38
  "scripts": {
39
39
  "build": "tsup src/index.ts --format esm --dts --clean --tsconfig tsconfig.build.json",