milieu-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. package/LICENSE +200 -0
  2. package/README.md +153 -0
  3. package/dist/bridges/index.d.ts +5 -0
  4. package/dist/bridges/index.d.ts.map +1 -0
  5. package/dist/bridges/index.js +6 -0
  6. package/dist/bridges/index.js.map +1 -0
  7. package/dist/bridges/reachability/crawler-policy.d.ts +36 -0
  8. package/dist/bridges/reachability/crawler-policy.d.ts.map +1 -0
  9. package/dist/bridges/reachability/crawler-policy.js +110 -0
  10. package/dist/bridges/reachability/crawler-policy.js.map +1 -0
  11. package/dist/bridges/reachability/http-status.d.ts +7 -0
  12. package/dist/bridges/reachability/http-status.d.ts.map +1 -0
  13. package/dist/bridges/reachability/http-status.js +74 -0
  14. package/dist/bridges/reachability/http-status.js.map +1 -0
  15. package/dist/bridges/reachability/https-check.d.ts +14 -0
  16. package/dist/bridges/reachability/https-check.d.ts.map +1 -0
  17. package/dist/bridges/reachability/https-check.js +38 -0
  18. package/dist/bridges/reachability/https-check.js.map +1 -0
  19. package/dist/bridges/reachability/index.d.ts +13 -0
  20. package/dist/bridges/reachability/index.d.ts.map +1 -0
  21. package/dist/bridges/reachability/index.js +115 -0
  22. package/dist/bridges/reachability/index.js.map +1 -0
  23. package/dist/bridges/reachability/meta-robots.d.ts +16 -0
  24. package/dist/bridges/reachability/meta-robots.d.ts.map +1 -0
  25. package/dist/bridges/reachability/meta-robots.js +119 -0
  26. package/dist/bridges/reachability/meta-robots.js.map +1 -0
  27. package/dist/bridges/reachability/robots-parser.d.ts +26 -0
  28. package/dist/bridges/reachability/robots-parser.d.ts.map +1 -0
  29. package/dist/bridges/reachability/robots-parser.js +105 -0
  30. package/dist/bridges/reachability/robots-parser.js.map +1 -0
  31. package/dist/bridges/reachability/robots-txt.d.ts +14 -0
  32. package/dist/bridges/reachability/robots-txt.d.ts.map +1 -0
  33. package/dist/bridges/reachability/robots-txt.js +80 -0
  34. package/dist/bridges/reachability/robots-txt.js.map +1 -0
  35. package/dist/bridges/separation/api-presence.d.ts +14 -0
  36. package/dist/bridges/separation/api-presence.d.ts.map +1 -0
  37. package/dist/bridges/separation/api-presence.js +96 -0
  38. package/dist/bridges/separation/api-presence.js.map +1 -0
  39. package/dist/bridges/separation/developer-docs.d.ts +21 -0
  40. package/dist/bridges/separation/developer-docs.d.ts.map +1 -0
  41. package/dist/bridges/separation/developer-docs.js +81 -0
  42. package/dist/bridges/separation/developer-docs.js.map +1 -0
  43. package/dist/bridges/separation/index.d.ts +20 -0
  44. package/dist/bridges/separation/index.d.ts.map +1 -0
  45. package/dist/bridges/separation/index.js +63 -0
  46. package/dist/bridges/separation/index.js.map +1 -0
  47. package/dist/bridges/separation/sdk-references.d.ts +12 -0
  48. package/dist/bridges/separation/sdk-references.d.ts.map +1 -0
  49. package/dist/bridges/separation/sdk-references.js +93 -0
  50. package/dist/bridges/separation/sdk-references.js.map +1 -0
  51. package/dist/bridges/separation/webhook-support.d.ts +19 -0
  52. package/dist/bridges/separation/webhook-support.d.ts.map +1 -0
  53. package/dist/bridges/separation/webhook-support.js +94 -0
  54. package/dist/bridges/separation/webhook-support.js.map +1 -0
  55. package/dist/bridges/standards/index.d.ts +13 -0
  56. package/dist/bridges/standards/index.d.ts.map +1 -0
  57. package/dist/bridges/standards/index.js +79 -0
  58. package/dist/bridges/standards/index.js.map +1 -0
  59. package/dist/bridges/standards/json-ld.d.ts +16 -0
  60. package/dist/bridges/standards/json-ld.d.ts.map +1 -0
  61. package/dist/bridges/standards/json-ld.js +63 -0
  62. package/dist/bridges/standards/json-ld.js.map +1 -0
  63. package/dist/bridges/standards/llms-txt.d.ts +19 -0
  64. package/dist/bridges/standards/llms-txt.d.ts.map +1 -0
  65. package/dist/bridges/standards/llms-txt.js +64 -0
  66. package/dist/bridges/standards/llms-txt.js.map +1 -0
  67. package/dist/bridges/standards/mcp.d.ts +13 -0
  68. package/dist/bridges/standards/mcp.d.ts.map +1 -0
  69. package/dist/bridges/standards/mcp.js +72 -0
  70. package/dist/bridges/standards/mcp.js.map +1 -0
  71. package/dist/bridges/standards/openapi.d.ts +14 -0
  72. package/dist/bridges/standards/openapi.d.ts.map +1 -0
  73. package/dist/bridges/standards/openapi.js +424 -0
  74. package/dist/bridges/standards/openapi.js.map +1 -0
  75. package/dist/bridges/standards/schema-org.d.ts +12 -0
  76. package/dist/bridges/standards/schema-org.d.ts.map +1 -0
  77. package/dist/bridges/standards/schema-org.js +101 -0
  78. package/dist/bridges/standards/schema-org.js.map +1 -0
  79. package/dist/bridges/standards/well-known.d.ts +16 -0
  80. package/dist/bridges/standards/well-known.d.ts.map +1 -0
  81. package/dist/bridges/standards/well-known.js +77 -0
  82. package/dist/bridges/standards/well-known.js.map +1 -0
  83. package/dist/bridges/stubs.d.ts +4 -0
  84. package/dist/bridges/stubs.d.ts.map +1 -0
  85. package/dist/bridges/stubs.js +25 -0
  86. package/dist/bridges/stubs.js.map +1 -0
  87. package/dist/cli/index.d.ts +4 -0
  88. package/dist/cli/index.d.ts.map +1 -0
  89. package/dist/cli/index.js +83 -0
  90. package/dist/cli/index.js.map +1 -0
  91. package/dist/core/explanations.d.ts +11 -0
  92. package/dist/core/explanations.d.ts.map +1 -0
  93. package/dist/core/explanations.js +128 -0
  94. package/dist/core/explanations.js.map +1 -0
  95. package/dist/core/index.d.ts +6 -0
  96. package/dist/core/index.d.ts.map +1 -0
  97. package/dist/core/index.js +6 -0
  98. package/dist/core/index.js.map +1 -0
  99. package/dist/core/scan.d.ts +3 -0
  100. package/dist/core/scan.d.ts.map +1 -0
  101. package/dist/core/scan.js +89 -0
  102. package/dist/core/scan.js.map +1 -0
  103. package/dist/core/types.d.ts +119 -0
  104. package/dist/core/types.d.ts.map +1 -0
  105. package/dist/core/types.js +3 -0
  106. package/dist/core/types.js.map +1 -0
  107. package/dist/core/version.d.ts +2 -0
  108. package/dist/core/version.d.ts.map +1 -0
  109. package/dist/core/version.js +7 -0
  110. package/dist/core/version.js.map +1 -0
  111. package/dist/index.d.ts +2 -0
  112. package/dist/index.d.ts.map +1 -0
  113. package/dist/index.js +4 -0
  114. package/dist/index.js.map +1 -0
  115. package/dist/render/colors.d.ts +7 -0
  116. package/dist/render/colors.d.ts.map +1 -0
  117. package/dist/render/colors.js +28 -0
  118. package/dist/render/colors.js.map +1 -0
  119. package/dist/render/format-bridge.d.ts +3 -0
  120. package/dist/render/format-bridge.d.ts.map +1 -0
  121. package/dist/render/format-bridge.js +39 -0
  122. package/dist/render/format-bridge.js.map +1 -0
  123. package/dist/render/format-scan.d.ts +3 -0
  124. package/dist/render/format-scan.d.ts.map +1 -0
  125. package/dist/render/format-scan.js +44 -0
  126. package/dist/render/format-scan.js.map +1 -0
  127. package/dist/render/format-verbose.d.ts +3 -0
  128. package/dist/render/format-verbose.d.ts.map +1 -0
  129. package/dist/render/format-verbose.js +14 -0
  130. package/dist/render/format-verbose.js.map +1 -0
  131. package/dist/render/index.d.ts +7 -0
  132. package/dist/render/index.d.ts.map +1 -0
  133. package/dist/render/index.js +8 -0
  134. package/dist/render/index.js.map +1 -0
  135. package/dist/render/progress-bar.d.ts +10 -0
  136. package/dist/render/progress-bar.d.ts.map +1 -0
  137. package/dist/render/progress-bar.js +21 -0
  138. package/dist/render/progress-bar.js.map +1 -0
  139. package/dist/render/symbols.d.ts +10 -0
  140. package/dist/render/symbols.d.ts.map +1 -0
  141. package/dist/render/symbols.js +21 -0
  142. package/dist/render/symbols.js.map +1 -0
  143. package/dist/utils/http-client.d.ts +25 -0
  144. package/dist/utils/http-client.d.ts.map +1 -0
  145. package/dist/utils/http-client.js +235 -0
  146. package/dist/utils/http-client.js.map +1 -0
  147. package/dist/utils/index.d.ts +6 -0
  148. package/dist/utils/index.d.ts.map +1 -0
  149. package/dist/utils/index.js +7 -0
  150. package/dist/utils/index.js.map +1 -0
  151. package/dist/utils/ssrf.d.ts +29 -0
  152. package/dist/utils/ssrf.d.ts.map +1 -0
  153. package/dist/utils/ssrf.js +134 -0
  154. package/dist/utils/ssrf.js.map +1 -0
  155. package/dist/utils/url.d.ts +53 -0
  156. package/dist/utils/url.d.ts.map +1 -0
  157. package/dist/utils/url.js +64 -0
  158. package/dist/utils/url.js.map +1 -0
  159. package/package.json +74 -0
@@ -0,0 +1,38 @@
1
+ import { httpGet } from "../../utils/http-client.js";
2
+ const ABORT_ERRORS = new Set(["dns", "connection_refused", "ssl_error"]);
3
+ /**
4
+ * Check HTTPS availability via HEAD request to https://<domain>.
5
+ *
6
+ * Any response (even 4xx/5xx) means HTTPS works.
7
+ * Abort-worthy errors: dns, connection_refused, ssl_error.
8
+ */
9
+ export async function checkHttps(domain, timeout) {
10
+ const result = await httpGet("https://" + domain, {
11
+ method: "HEAD",
12
+ timeout,
13
+ });
14
+ if (result.ok) {
15
+ return {
16
+ check: {
17
+ id: "https_available",
18
+ label: "HTTPS Available",
19
+ status: "pass",
20
+ detail: "HTTPS connection successful",
21
+ },
22
+ abort: false,
23
+ };
24
+ }
25
+ const { kind, message } = result.error;
26
+ const abort = ABORT_ERRORS.has(kind);
27
+ return {
28
+ check: {
29
+ id: "https_available",
30
+ label: "HTTPS Available",
31
+ status: "fail",
32
+ detail: message,
33
+ },
34
+ abort,
35
+ abortReason: abort ? kind : undefined,
36
+ };
37
+ }
38
+ //# sourceMappingURL=https-check.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"https-check.js","sourceRoot":"","sources":["../../../src/bridges/reachability/https-check.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,OAAO,EAAE,MAAM,4BAA4B,CAAC;AAQrD,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,CAAC,KAAK,EAAE,oBAAoB,EAAE,WAAW,CAAC,CAAC,CAAC;AAEzE;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,UAAU,CAC9B,MAAc,EACd,OAAgB;IAEhB,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,UAAU,GAAG,MAAM,EAAE;QAChD,MAAM,EAAE,MAAM;QACd,OAAO;KACR,CAAC,CAAC;IAEH,IAAI,MAAM,CAAC,EAAE,EAAE,CAAC;QACd,OAAO;YACL,KAAK,EAAE;gBACL,EAAE,EAAE,iBAAiB;gBACrB,KAAK,EAAE,iBAAiB;gBACxB,MAAM,EAAE,MAAM;gBACd,MAAM,EAAE,6BAA6B;aACtC;YACD,KAAK,EAAE,KAAK;SACb,CAAC;IACJ,CAAC;IAED,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,GAAG,MAAM,CAAC,KAAK,CAAC;IACvC,MAAM,KAAK,GAAG,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAErC,OAAO;QACL,KAAK,EAAE;YACL,EAAE,EAAE,iBAAiB;YACrB,KAAK,EAAE,iBAAiB;YACxB,MAAM,EAAE,MAAM;YACd,MAAM,EAAE,OAAO;SAChB;QACD,KAAK;QACL,WAAW,EAAE,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS;KACtC,CAAC;AACJ,CAAC"}
@@ -0,0 +1,13 @@
1
+ import type { BridgeResult, ScanContext } from "../../core/types.js";
2
+ /**
3
+ * Run Bridge 1: Reachability.
4
+ *
5
+ * Makes 3 HTTP calls:
6
+ * 1. HEAD https://<domain> (HTTPS availability)
7
+ * 2. GET <baseUrl> (page content for meta robots)
8
+ * 3. GET https://<domain>/robots.txt (robots.txt fetch)
9
+ *
10
+ * Aborts on dns/connection_refused/ssl_error from HTTPS check.
11
+ */
12
+ export declare function runReachabilityBridge(ctx: ScanContext): Promise<BridgeResult>;
13
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bridges/reachability/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAS,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAyC5E;;;;;;;;;GASG;AACH,wBAAsB,qBAAqB,CACzC,GAAG,EAAE,WAAW,GACf,OAAO,CAAC,YAAY,CAAC,CAwFvB"}
@@ -0,0 +1,115 @@
1
+ import { httpGet } from "../../utils/http-client.js";
2
+ import { checkHttps } from "./https-check.js";
3
+ import { checkHttpStatus } from "./http-status.js";
4
+ import { checkRobotsTxt } from "./robots-txt.js";
5
+ import { evaluateCrawlerPolicies } from "./crawler-policy.js";
6
+ import { checkMetaRobots, checkXRobotsTag } from "./meta-robots.js";
7
+ /**
8
+ * Calculate bridge score from check results.
9
+ * - Pass = 1 point, Partial = 0.5 points, Fail/Error = 0 points
10
+ * - Checks with data.policy === "skip" excluded from both numerator and denominator
11
+ */
12
+ function calculateScore(checks) {
13
+ let points = 0;
14
+ let maxPoints = 0;
15
+ for (const check of checks) {
16
+ // Skip checks excluded from scoring (e.g., crawler policy with no robots.txt)
17
+ if (check.data &&
18
+ check.data.policy === "skip") {
19
+ continue;
20
+ }
21
+ maxPoints += 1;
22
+ if (check.status === "pass")
23
+ points += 1;
24
+ else if (check.status === "partial")
25
+ points += 0.5;
26
+ // fail and error = 0 points
27
+ }
28
+ const score = maxPoints === 0 ? 0 : Math.round((points / maxPoints) * 100);
29
+ const scoreLabel = score >= 80 ? "pass" : score >= 40 ? "partial" : "fail";
30
+ return { score, scoreLabel };
31
+ }
32
+ /**
33
+ * Run Bridge 1: Reachability.
34
+ *
35
+ * Makes 3 HTTP calls:
36
+ * 1. HEAD https://<domain> (HTTPS availability)
37
+ * 2. GET <baseUrl> (page content for meta robots)
38
+ * 3. GET https://<domain>/robots.txt (robots.txt fetch)
39
+ *
40
+ * Aborts on dns/connection_refused/ssl_error from HTTPS check.
41
+ */
42
+ export async function runReachabilityBridge(ctx) {
43
+ const start = performance.now();
44
+ // 1. HTTPS check (HEAD request)
45
+ const httpsResult = await checkHttps(ctx.domain, ctx.options.timeout);
46
+ // Abort on fatal HTTPS errors
47
+ if (httpsResult.abort) {
48
+ return {
49
+ id: 1,
50
+ name: "Reachability",
51
+ status: "evaluated",
52
+ score: 0,
53
+ scoreLabel: "fail",
54
+ checks: [httpsResult.check],
55
+ durationMs: Math.round(performance.now() - start),
56
+ abort: true,
57
+ abortReason: httpsResult.abortReason,
58
+ };
59
+ }
60
+ // 2. Page GET (uses normalized baseUrl)
61
+ const pageResponse = await httpGet(ctx.baseUrl, {
62
+ timeout: ctx.options.timeout,
63
+ });
64
+ // Store page body for Bridge 2 (JSON-LD, Schema.org extraction)
65
+ if (pageResponse.ok) {
66
+ ctx.shared.pageBody = pageResponse.body;
67
+ ctx.shared.pageHeaders = pageResponse.headers;
68
+ }
69
+ // 3. HTTP status check (no HTTP call -- uses pageResponse)
70
+ const httpStatusCheck = checkHttpStatus(pageResponse);
71
+ // 4. robots.txt fetch + parse
72
+ const robotsResult = await checkRobotsTxt(ctx.domain, ctx.options.timeout);
73
+ // 5. Crawler policies (uses parsed robots.txt data)
74
+ let targetPath;
75
+ try {
76
+ targetPath = new URL(ctx.baseUrl).pathname;
77
+ }
78
+ catch {
79
+ targetPath = "/";
80
+ }
81
+ const crawlerChecks = evaluateCrawlerPolicies(robotsResult.parsed, targetPath);
82
+ // 6. Meta robots (uses page response)
83
+ let metaRobotsCheck;
84
+ let xRobotsCheck;
85
+ if (pageResponse.ok) {
86
+ metaRobotsCheck = checkMetaRobots(pageResponse.body);
87
+ xRobotsCheck = checkXRobotsTag(pageResponse.headers);
88
+ }
89
+ else {
90
+ // Page unavailable -- pass empty content (absence of restrictive tags = pass)
91
+ metaRobotsCheck = checkMetaRobots("");
92
+ xRobotsCheck = checkXRobotsTag({});
93
+ }
94
+ // 7. Collect all checks
95
+ const checks = [
96
+ httpsResult.check,
97
+ httpStatusCheck,
98
+ robotsResult.check,
99
+ ...crawlerChecks,
100
+ metaRobotsCheck,
101
+ xRobotsCheck,
102
+ ];
103
+ // 8. Calculate score
104
+ const { score, scoreLabel } = calculateScore(checks);
105
+ return {
106
+ id: 1,
107
+ name: "Reachability",
108
+ status: "evaluated",
109
+ score,
110
+ scoreLabel,
111
+ checks,
112
+ durationMs: Math.round(performance.now() - start),
113
+ };
114
+ }
115
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/bridges/reachability/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,OAAO,EAAE,MAAM,4BAA4B,CAAC;AACrD,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAC9C,OAAO,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AACnD,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAC9D,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AAEpE;;;;GAIG;AACH,SAAS,cAAc,CAAC,MAAe;IAIrC,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,8EAA8E;QAC9E,IACE,KAAK,CAAC,IAAI;YACT,KAAK,CAAC,IAAgC,CAAC,MAAM,KAAK,MAAM,EACzD,CAAC;YACD,SAAS;QACX,CAAC;QAED,SAAS,IAAI,CAAC,CAAC;QACf,IAAI,KAAK,CAAC,MAAM,KAAK,MAAM;YAAE,MAAM,IAAI,CAAC,CAAC;aACpC,IAAI,KAAK,CAAC,MAAM,KAAK,SAAS;YAAE,MAAM,IAAI,GAAG,CAAC;QACnD,4BAA4B;IAC9B,CAAC;IAED,MAAM,KAAK,GAAG,SAAS,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,GAAG,SAAS,CAAC,GAAG,GAAG,CAAC,CAAC;IAC3E,MAAM,UAAU,GACd,KAAK,IAAI,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC;IAC1D,OAAO,EAAE,KAAK,EAAE,UAAU,EAAE,CAAC;AAC/B,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,CAAC,KAAK,UAAU,qBAAqB,CACzC,GAAgB;IAEhB,MAAM,KAAK,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;IAEhC,gCAAgC;IAChC,MAAM,WAAW,GAAG,MAAM,UAAU,CAAC,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;IAEtE,8BAA8B;IAC9B,IAAI,WAAW,CAAC,KAAK,EAAE,CAAC;QACtB,OAAO;YACL,EAAE,EAAE,CAAC;YACL,IAAI,EAAE,cAAc;YACpB,MAAM,EAAE,WAAW;YACnB,KAAK,EAAE,CAAC;YACR,UAAU,EAAE,MAAM;YAClB,MAAM,EAAE,CAAC,WAAW,CAAC,KAAK,CAAC;YAC3B,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC;YACjD,KAAK,EAAE,IAAI;YACX,WAAW,EAAE,WAAW,CAAC,WAAW;SACrC,CAAC;IACJ,CAAC;IAED,wCAAwC;IACxC,MAAM,YAAY,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,OAAO,EAAE;QAC9C,OAAO,EAAE,GAAG,CAAC,OAAO,CAAC,OAAO;KAC7B,CAAC,CAAC;IAEH,gEAAgE;IAChE,IAAI,YAAY,CAAC,EAAE,EAAE,CAAC;QACpB,GAAG,CAAC,MAAM,CAAC,QAAQ,GAAG,YAAY,CAAC,IAAI,CAAC;QACxC,GAAG,CAAC,MAAM,CAAC,WAAW,GAAG,YAAY,CAAC,OAAO,CAAC;IAChD,CAAC;IAED,2DAA2D;IAC3D,MAAM,eAAe,GAAG,eAAe,CAAC,YAAY,CAAC,CAAC;IAEtD,8BAA8B;IAC9B,MAAM,YAAY,GAAG,MAAM,cAAc,CACvC,GAAG,CAAC,MAAM,EACV,GAAG,CAAC,OAAO,CAAC,OAAO,CACpB,CAAC;IAEF,oDAAoD;IACpD,IAAI,UAAkB,CAAC;IACvB,IAAI,CAAC;QACH,UAAU,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,QAAQ,CAAC;IAC7C,CAAC;IAAC,MAAM,CAAC;QACP,UAAU,GAAG,GAAG,CAAC;IACnB,CAAC;IACD,MAAM,aAAa,GAAG,uBAAuB,CAC3C,YAAY,CAAC,MAAM,EACnB,UAAU,CACX,CAAC;IAEF,sCAAsC;IACtC,IAAI,eAAsB,CAAC;IAC3B,IAAI,YAAmB,CAAC;IAExB,IAAI,YAAY,CAAC,EAAE,EAAE,CAAC;QACpB,eAAe,GAAG,eAAe,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;QACrD,YAAY,GAAG,eAAe,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC;IACvD,CAAC;SAAM,CAAC;QACN,8EAA8E;QAC9E,eAAe,GAAG,eAAe,CAAC,EAAE,CAAC,CAAC;QACtC,YAAY,GAAG,eAAe,CAAC,EAAE,CAAC,CAAC;IACrC,CAAC;IAED,wBAAwB;IACxB,MAAM,MAAM,GAAY;QACtB,WAAW,CAAC,KAAK;QACjB,eAAe;QACf,YAAY,CAAC,KAAK;QAClB,GAAG,aAAa;QAChB,eAAe;QACf,YAAY;KACb,CAAC;IAEF,qBAAqB;IACrB,MAAM,EAAE,KAAK,EAAE,UAAU,EAAE,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC;IAErD,OAAO;QACL,EAAE,EAAE,CAAC;QACL,IAAI,EAAE,cAAc;QACpB,MAAM,EAAE,WAAW;QACnB,KAAK;QACL,UAAU;QACV,MAAM;QACN,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC;KAClD,CAAC;AACJ,CAAC"}
@@ -0,0 +1,16 @@
1
+ import type { Check } from "../../core/types.js";
2
+ /**
3
+ * Check for restrictive meta robots tags in the HTML <head> section.
4
+ * Uses regex only -- no HTML parser dependency.
5
+ *
6
+ * Scans for: robots, googlebot, bingbot name attributes.
7
+ * Handles both attribute orders, single/double quotes, self-closing tags, case insensitivity.
8
+ */
9
+ export declare function checkMetaRobots(html: string): Check;
10
+ /**
11
+ * Check for X-Robots-Tag HTTP header directives.
12
+ *
13
+ * Headers object is expected to have lowercase keys (from Phase 2 httpGet).
14
+ */
15
+ export declare function checkXRobotsTag(headers: Record<string, string>): Check;
16
+ //# sourceMappingURL=meta-robots.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"meta-robots.d.ts","sourceRoot":"","sources":["../../../src/bridges/reachability/meta-robots.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,qBAAqB,CAAC;AAEjD;;;;;;GAMG;AACH,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,KAAK,CA2EnD;AAED;;;;GAIG;AACH,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,KAAK,CAoDtE"}
@@ -0,0 +1,119 @@
1
+ /**
2
+ * Check for restrictive meta robots tags in the HTML <head> section.
3
+ * Uses regex only -- no HTML parser dependency.
4
+ *
5
+ * Scans for: robots, googlebot, bingbot name attributes.
6
+ * Handles both attribute orders, single/double quotes, self-closing tags, case insensitivity.
7
+ */
8
+ export function checkMetaRobots(html) {
9
+ const id = "meta_robots";
10
+ const label = "Meta Robots Tags";
11
+ // Extract head content (case-insensitive)
12
+ const headMatch = html.match(/<head[^>]*>([\s\S]*?)<\/head>/i);
13
+ if (!headMatch) {
14
+ return {
15
+ id,
16
+ label,
17
+ status: "pass",
18
+ detail: "No restrictive meta robots tags found",
19
+ data: { directives: [] },
20
+ };
21
+ }
22
+ const headContent = headMatch[1];
23
+ const allDirectives = [];
24
+ // Pattern A: name first, then content
25
+ const patternA = /<meta\s+[^>]*name\s*=\s*["'](robots|googlebot|bingbot)["'][^>]*content\s*=\s*["']([^"']*)["'][^>]*\/?>/gi;
26
+ // Pattern B: content first, then name
27
+ const patternB = /<meta\s+[^>]*content\s*=\s*["']([^"']*)["'][^>]*name\s*=\s*["'](robots|googlebot|bingbot)["'][^>]*\/?>/gi;
28
+ let match;
29
+ while ((match = patternA.exec(headContent)) !== null) {
30
+ const contentValue = match[2];
31
+ const directives = contentValue
32
+ .split(",")
33
+ .map((d) => d.trim().toLowerCase());
34
+ allDirectives.push(...directives);
35
+ }
36
+ while ((match = patternB.exec(headContent)) !== null) {
37
+ const contentValue = match[1];
38
+ const directives = contentValue
39
+ .split(",")
40
+ .map((d) => d.trim().toLowerCase());
41
+ allDirectives.push(...directives);
42
+ }
43
+ // Filter out empty strings
44
+ const filtered = allDirectives.filter((d) => d.length > 0);
45
+ if (filtered.some((d) => d === "noindex")) {
46
+ return {
47
+ id,
48
+ label,
49
+ status: "fail",
50
+ detail: `Restrictive meta robots directives found: ${filtered.join(", ")}`,
51
+ data: { directives: filtered },
52
+ };
53
+ }
54
+ if (filtered.some((d) => d === "nofollow")) {
55
+ return {
56
+ id,
57
+ label,
58
+ status: "partial",
59
+ detail: `Restrictive meta robots directives found: ${filtered.join(", ")}`,
60
+ data: { directives: filtered },
61
+ };
62
+ }
63
+ return {
64
+ id,
65
+ label,
66
+ status: "pass",
67
+ detail: "No restrictive meta robots tags found",
68
+ data: { directives: filtered },
69
+ };
70
+ }
71
+ /**
72
+ * Check for X-Robots-Tag HTTP header directives.
73
+ *
74
+ * Headers object is expected to have lowercase keys (from Phase 2 httpGet).
75
+ */
76
+ export function checkXRobotsTag(headers) {
77
+ const id = "x_robots_tag";
78
+ const label = "X-Robots-Tag Header";
79
+ const headerValue = headers["x-robots-tag"];
80
+ if (!headerValue) {
81
+ return {
82
+ id,
83
+ label,
84
+ status: "pass",
85
+ detail: "No X-Robots-Tag header",
86
+ data: { directives: [], raw: "" },
87
+ };
88
+ }
89
+ const directives = headerValue
90
+ .split(",")
91
+ .map((d) => d.trim().toLowerCase())
92
+ .filter((d) => d.length > 0);
93
+ if (directives.some((d) => d.includes("noindex"))) {
94
+ return {
95
+ id,
96
+ label,
97
+ status: "fail",
98
+ detail: `X-Robots-Tag contains restrictive directives: ${directives.join(", ")}`,
99
+ data: { directives, raw: headerValue },
100
+ };
101
+ }
102
+ if (directives.some((d) => d.includes("nofollow") || d.includes("noarchive") || d === "none")) {
103
+ return {
104
+ id,
105
+ label,
106
+ status: "partial",
107
+ detail: `X-Robots-Tag contains directives: ${directives.join(", ")}`,
108
+ data: { directives, raw: headerValue },
109
+ };
110
+ }
111
+ return {
112
+ id,
113
+ label,
114
+ status: "pass",
115
+ detail: `X-Robots-Tag header present with no restrictive directives`,
116
+ data: { directives, raw: headerValue },
117
+ };
118
+ }
119
+ //# sourceMappingURL=meta-robots.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"meta-robots.js","sourceRoot":"","sources":["../../../src/bridges/reachability/meta-robots.ts"],"names":[],"mappings":"AAEA;;;;;;GAMG;AACH,MAAM,UAAU,eAAe,CAAC,IAAY;IAC1C,MAAM,EAAE,GAAG,aAAa,CAAC;IACzB,MAAM,KAAK,GAAG,kBAAkB,CAAC;IAEjC,0CAA0C;IAC1C,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,gCAAgC,CAAC,CAAC;IAC/D,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,OAAO;YACL,EAAE;YACF,KAAK;YACL,MAAM,EAAE,MAAM;YACd,MAAM,EAAE,uCAAuC;YAC/C,IAAI,EAAE,EAAE,UAAU,EAAE,EAAE,EAAE;SACzB,CAAC;IACJ,CAAC;IAED,MAAM,WAAW,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;IACjC,MAAM,aAAa,GAAa,EAAE,CAAC;IAEnC,sCAAsC;IACtC,MAAM,QAAQ,GACZ,0GAA0G,CAAC;IAE7G,sCAAsC;IACtC,MAAM,QAAQ,GACZ,0GAA0G,CAAC;IAE7G,IAAI,KAA6B,CAAC;IAElC,OAAO,CAAC,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACrD,MAAM,YAAY,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QAC9B,MAAM,UAAU,GAAG,YAAY;aAC5B,KAAK,CAAC,GAAG,CAAC;aACV,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,CAAC;QACtC,aAAa,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,CAAC;IACpC,CAAC;IAED,OAAO,CAAC,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACrD,MAAM,YAAY,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QAC9B,MAAM,UAAU,GAAG,YAAY;aAC5B,KAAK,CAAC,GAAG,CAAC;aACV,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,CAAC;QACtC,aAAa,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,CAAC;IACpC,CAAC;IAED,2BAA2B;IAC3B,MAAM,QAAQ,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAE3D,IAAI,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,SAAS,CAAC,EAAE,CAAC;QAC1C,OAAO;YACL,EAAE;YACF,KAAK;YACL,MAAM,EAAE,MAAM;YACd,MAAM,EAAE,6CAA6C,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE;YAC1E,IAAI,EAAE,EAAE,UAAU,EAAE,QAAQ,EAAE;SAC/B,CAAC;IACJ,CAAC;IAED,IAAI,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,UAAU,CAAC,EAAE,CAAC;QAC3C,OAAO;YACL,EAAE;YACF,KAAK;YACL,MAAM,EAAE,SAAS;YACjB,MAAM,EAAE,6CAA6C,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE;YAC1E,IAAI,EAAE,EAAE,UAAU,EAAE,QAAQ,EAAE;SAC/B,CAAC;IACJ,CAAC;IAED,OAAO;QACL,EAAE;QACF,KAAK;QACL,MAAM,EAAE,MAAM;QACd,MAAM,EAAE,uCAAuC;QAC/C,IAAI,EAAE,EAAE,UAAU,EAAE,QAAQ,EAAE;KAC/B,CAAC;AACJ,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,eAAe,CAAC,OAA+B;IAC7D,MAAM,EAAE,GAAG,cAAc,CAAC;IAC1B,MAAM,KAAK,GAAG,qBAAqB,CAAC;IAEpC,MAAM,WAAW,GAAG,OAAO,CAAC,cAAc,CAAC,CAAC;IAC5C,IAAI,CAAC,WAAW,EAAE,CAAC;QACjB,OAAO;YACL,EAAE;YACF,KAAK;YACL,MAAM,EAAE,MAAM;YACd,MAAM,EAAE,wBAAwB;YAChC,IAAI,EAAE,EAAE,UAAU,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE;SAClC,CAAC;IACJ,CAAC;IAED,MAAM,UAAU,GAAG,WAAW;SAC3B,KAAK,CAAC,GAAG,CAAC;SACV,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;SAClC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAE/B,IAAI,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC;QAClD,OAAO;YACL,EAAE;YACF,KAAK;YACL,MAAM,EAAE,MAAM;YACd,MAAM,EAAE,iDAAiD,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE;YAChF,IAAI,EAAE,EAAE,UAAU,EAAE,GAAG,EAAE,WAAW,EAAE;SACvC,CAAC;IACJ,CAAC;IAED,IACE,UAAU,CAAC,IAAI,CACb,CAAC,CAAC,EAAE,EAAE,CACJ,CAAC,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC,IAAI,CAAC,KAAK,MAAM,CACpE,EACD,CAAC;QACD,OAAO;YACL,EAAE;YACF,KAAK;YACL,MAAM,EAAE,SAAS;YACjB,MAAM,EAAE,qCAAqC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE;YACpE,IAAI,EAAE,EAAE,UAAU,EAAE,GAAG,EAAE,WAAW,EAAE;SACvC,CAAC;IACJ,CAAC;IAED,OAAO;QACL,EAAE;QACF,KAAK;QACL,MAAM,EAAE,MAAM;QACd,MAAM,EAAE,4DAA4D;QACpE,IAAI,EAAE,EAAE,UAAU,EAAE,GAAG,EAAE,WAAW,EAAE;KACvC,CAAC;AACJ,CAAC"}
@@ -0,0 +1,26 @@
1
+ export interface RobotsTxtResult {
2
+ parseable: boolean;
3
+ ruleCount: number;
4
+ groups: RobotsGroup[];
5
+ sitemaps: string[];
6
+ }
7
+ export interface RobotsGroup {
8
+ userAgents: string[];
9
+ rules: RobotsRule[];
10
+ }
11
+ export interface RobotsRule {
12
+ type: "allow" | "disallow";
13
+ path: string;
14
+ }
15
+ /**
16
+ * Parse raw robots.txt content into structured data.
17
+ * Handles RFC 9309 edge cases: BOM, CRLF/CR/LF, comments,
18
+ * case-insensitive directives, case-sensitive paths, group boundaries.
19
+ */
20
+ export declare function parseRobotsTxt(content: string): RobotsTxtResult;
21
+ /**
22
+ * Check whether a robots.txt path pattern matches a given URL path.
23
+ * Supports * wildcards and $ end anchor per RFC 9309.
24
+ */
25
+ export declare function matchesPath(pattern: string, path: string): boolean;
26
+ //# sourceMappingURL=robots-parser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"robots-parser.d.ts","sourceRoot":"","sources":["../../../src/bridges/reachability/robots-parser.ts"],"names":[],"mappings":"AAGA,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,OAAO,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,WAAW,EAAE,CAAC;IACtB,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED,MAAM,WAAW,WAAW;IAC1B,UAAU,EAAE,MAAM,EAAE,CAAC;IACrB,KAAK,EAAE,UAAU,EAAE,CAAC;CACrB;AAED,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,OAAO,GAAG,UAAU,CAAC;IAC3B,IAAI,EAAE,MAAM,CAAC;CACd;AAED;;;;GAIG;AACH,wBAAgB,cAAc,CAAC,OAAO,EAAE,MAAM,GAAG,eAAe,CAmF/D;AAED;;;GAGG;AACH,wBAAgB,WAAW,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAyBlE"}
@@ -0,0 +1,105 @@
1
+ // RFC 9309 robots.txt parser and path matcher
2
+ // Pure logic, no I/O
3
+ /**
4
+ * Parse raw robots.txt content into structured data.
5
+ * Handles RFC 9309 edge cases: BOM, CRLF/CR/LF, comments,
6
+ * case-insensitive directives, case-sensitive paths, group boundaries.
7
+ */
8
+ export function parseRobotsTxt(content) {
9
+ const groups = [];
10
+ const sitemaps = [];
11
+ // 1. Strip UTF-8 BOM
12
+ let text = content;
13
+ if (text.charCodeAt(0) === 0xfeff || text.startsWith("\xEF\xBB\xBF")) {
14
+ text = text.replace(/^\xEF\xBB\xBF/, "").replace(/^\uFEFF/, "");
15
+ }
16
+ // 2. Split on CRLF, CR, or LF
17
+ const lines = text.split(/\r\n|\r|\n/);
18
+ let currentGroup = null;
19
+ for (const rawLine of lines) {
20
+ // 3. Strip comments and trim
21
+ const commentIdx = rawLine.indexOf("#");
22
+ const line = (commentIdx >= 0 ? rawLine.substring(0, commentIdx) : rawLine).trim();
23
+ if (line === "")
24
+ continue;
25
+ // 4. Parse directive: split on first ':'
26
+ const colonIdx = line.indexOf(":");
27
+ if (colonIdx < 0)
28
+ continue;
29
+ const directive = line.substring(0, colonIdx).trim().toLowerCase();
30
+ const value = line.substring(colonIdx + 1).trim();
31
+ switch (directive) {
32
+ case "user-agent": {
33
+ // 6. If currentGroup has rules, start a new group
34
+ if (currentGroup && currentGroup.rules.length > 0) {
35
+ groups.push(currentGroup);
36
+ currentGroup = { userAgents: [value.toLowerCase()], rules: [] };
37
+ }
38
+ else if (currentGroup) {
39
+ // Consecutive User-agent lines = same group
40
+ currentGroup.userAgents.push(value.toLowerCase());
41
+ }
42
+ else {
43
+ currentGroup = { userAgents: [value.toLowerCase()], rules: [] };
44
+ }
45
+ break;
46
+ }
47
+ case "allow":
48
+ case "disallow": {
49
+ // 7. If no currentGroup, create one with empty userAgents
50
+ if (!currentGroup) {
51
+ currentGroup = { userAgents: [], rules: [] };
52
+ }
53
+ currentGroup.rules.push({ type: directive, path: value });
54
+ break;
55
+ }
56
+ case "sitemap": {
57
+ // 8. Sitemaps are not tied to any group
58
+ sitemaps.push(value);
59
+ break;
60
+ }
61
+ default:
62
+ // 9. Unknown directives: ignore
63
+ break;
64
+ }
65
+ }
66
+ // 10. Push final group
67
+ if (currentGroup) {
68
+ groups.push(currentGroup);
69
+ }
70
+ // 11. Calculate ruleCount
71
+ const ruleCount = groups.reduce((sum, g) => sum + g.rules.length, 0);
72
+ return {
73
+ parseable: true,
74
+ ruleCount,
75
+ groups,
76
+ sitemaps,
77
+ };
78
+ }
79
+ /**
80
+ * Check whether a robots.txt path pattern matches a given URL path.
81
+ * Supports * wildcards and $ end anchor per RFC 9309.
82
+ */
83
+ export function matchesPath(pattern, path) {
84
+ // 1. Empty pattern matches everything
85
+ if (pattern === "")
86
+ return true;
87
+ // 2. Check for $ end anchor
88
+ let anchorEnd = false;
89
+ let pat = pattern;
90
+ if (pat.endsWith("$")) {
91
+ anchorEnd = true;
92
+ pat = pat.slice(0, -1);
93
+ }
94
+ // 3. Escape regex special chars except * (already removed $ if present)
95
+ // Escape: \ . + ? [ ] ( ) { } | ^ /
96
+ const escaped = pat.replace(/([\\.\+\?\[\]\(\)\{\}\|\^\/])/g, "\\$1");
97
+ // 4. Replace * with .*
98
+ const regexStr = escaped.replace(/\*/g, ".*");
99
+ // 5. Build regex: prefix match unless $ anchor
100
+ const fullRegex = anchorEnd
101
+ ? new RegExp("^" + regexStr + "$")
102
+ : new RegExp("^" + regexStr);
103
+ return fullRegex.test(path);
104
+ }
105
+ //# sourceMappingURL=robots-parser.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"robots-parser.js","sourceRoot":"","sources":["../../../src/bridges/reachability/robots-parser.ts"],"names":[],"mappings":"AAAA,8CAA8C;AAC9C,qBAAqB;AAmBrB;;;;GAIG;AACH,MAAM,UAAU,cAAc,CAAC,OAAe;IAC5C,MAAM,MAAM,GAAkB,EAAE,CAAC;IACjC,MAAM,QAAQ,GAAa,EAAE,CAAC;IAE9B,qBAAqB;IACrB,IAAI,IAAI,GAAG,OAAO,CAAC;IACnB,IAAI,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,KAAK,MAAM,IAAI,IAAI,CAAC,UAAU,CAAC,cAAc,CAAC,EAAE,CAAC;QACrE,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,eAAe,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;IAClE,CAAC;IAED,8BAA8B;IAC9B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;IAEvC,IAAI,YAAY,GACd,IAAI,CAAC;IAEP,KAAK,MAAM,OAAO,IAAI,KAAK,EAAE,CAAC;QAC5B,6BAA6B;QAC7B,MAAM,UAAU,GAAG,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACxC,MAAM,IAAI,GAAG,CACX,UAAU,IAAI,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC,OAAO,CAC7D,CAAC,IAAI,EAAE,CAAC;QAET,IAAI,IAAI,KAAK,EAAE;YAAE,SAAS;QAE1B,yCAAyC;QACzC,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACnC,IAAI,QAAQ,GAAG,CAAC;YAAE,SAAS;QAE3B,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QACnE,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,QAAQ,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAElD,QAAQ,SAAS,EAAE,CAAC;YAClB,KAAK,YAAY,CAAC,CAAC,CAAC;gBAClB,kDAAkD;gBAClD,IAAI,YAAY,IAAI,YAAY,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAClD,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;oBAC1B,YAAY,GAAG,EAAE,UAAU,EAAE,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC;gBAClE,CAAC;qBAAM,IAAI,YAAY,EAAE,CAAC;oBACxB,4CAA4C;oBAC5C,YAAY,CAAC,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,CAAC;gBACpD,CAAC;qBAAM,CAAC;oBACN,YAAY,GAAG,EAAE,UAAU,EAAE,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC;gBAClE,CAAC;gBACD,MAAM;YACR,CAAC;YAED,KAAK,OAAO,CAAC;YACb,KAAK,UAAU,CAAC,CAAC,CAAC;gBAChB,0DAA0D;gBAC1D,IAAI,CAAC,YAAY,EAAE,CAAC;oBAClB,YAAY,GAAG,EAAE,UAAU,EAAE,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC;gBAC/C,CAAC;gBACD,YAAY,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC,CAAC;gBAC1D,MAAM;YACR,CAAC;YAED,KAAK,SAAS,CAAC,CAAC,CAAC;gBACf,wCAAwC;gBACxC,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBACrB,MAAM;YACR,CAAC;YAED;gBACE,gCAAgC;gBAChC,MAAM;QACV,CAAC;IACH,CAAC;IAED,uBAAuB;IACvB,IAAI,YAAY,EAAE,CAAC;QACjB,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IAC5B,CAAC;IAED,0BAA0B;IAC1B,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IAErE,OAAO;QACL,SAAS,EAAE,IAAI;QACf,SAAS;QACT,MAAM;QACN,QAAQ;KACT,CAAC;AACJ,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,WAAW,CAAC,OAAe,EAAE,IAAY;IACvD,sCAAsC;IACtC,IAAI,OAAO,KAAK,EAAE;QAAE,OAAO,IAAI,CAAC;IAEhC,4BAA4B;IAC5B,IAAI,SAAS,GAAG,KAAK,CAAC;IACtB,IAAI,GAAG,GAAG,OAAO,CAAC;IAClB,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QACtB,SAAS,GAAG,IAAI,CAAC;QACjB,GAAG,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IACzB,CAAC;IAED,wEAAwE;IACxE,oCAAoC;IACpC,MAAM,OAAO,GAAG,GAAG,CAAC,OAAO,CAAC,gCAAgC,EAAE,MAAM,CAAC,CAAC;IAEtE,uBAAuB;IACvB,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;IAE9C,+CAA+C;IAC/C,MAAM,SAAS,GAAG,SAAS;QACzB,CAAC,CAAC,IAAI,MAAM,CAAC,GAAG,GAAG,QAAQ,GAAG,GAAG,CAAC;QAClC,CAAC,CAAC,IAAI,MAAM,CAAC,GAAG,GAAG,QAAQ,CAAC,CAAC;IAE/B,OAAO,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC9B,CAAC"}
@@ -0,0 +1,14 @@
1
+ import type { Check } from "../../core/types.js";
2
+ import { type RobotsTxtResult } from "./robots-parser.js";
3
+ export interface RobotsTxtCheckResult {
4
+ check: Check;
5
+ parsed: RobotsTxtResult | null;
6
+ }
7
+ /**
8
+ * Fetch and parse robots.txt for the given domain.
9
+ *
10
+ * Returns both the Check result and the parsed data for downstream
11
+ * use by crawler policy evaluation.
12
+ */
13
+ export declare function checkRobotsTxt(domain: string, timeout?: number): Promise<RobotsTxtCheckResult>;
14
+ //# sourceMappingURL=robots-txt.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"robots-txt.d.ts","sourceRoot":"","sources":["../../../src/bridges/reachability/robots-txt.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,qBAAqB,CAAC;AAEjD,OAAO,EAAkB,KAAK,eAAe,EAAE,MAAM,oBAAoB,CAAC;AAE1E,MAAM,WAAW,oBAAoB;IACnC,KAAK,EAAE,KAAK,CAAC;IACb,MAAM,EAAE,eAAe,GAAG,IAAI,CAAC;CAChC;AAID;;;;;GAKG;AACH,wBAAsB,cAAc,CAClC,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,MAAM,GACf,OAAO,CAAC,oBAAoB,CAAC,CA+E/B"}
@@ -0,0 +1,80 @@
1
+ import { httpGet } from "../../utils/http-client.js";
2
+ import { parseRobotsTxt } from "./robots-parser.js";
3
+ const DIRECTIVE_PATTERN = /\b(user-agent|disallow|allow)\s*:/i;
4
+ /**
5
+ * Fetch and parse robots.txt for the given domain.
6
+ *
7
+ * Returns both the Check result and the parsed data for downstream
8
+ * use by crawler policy evaluation.
9
+ */
10
+ export async function checkRobotsTxt(domain, timeout) {
11
+ const id = "robots_txt";
12
+ const label = "robots.txt";
13
+ const result = await httpGet("https://" + domain + "/robots.txt", {
14
+ timeout,
15
+ });
16
+ if (!result.ok) {
17
+ // 404 = no robots.txt
18
+ if (result.error.kind === "http_error" &&
19
+ result.error.statusCode === 404) {
20
+ return {
21
+ check: {
22
+ id,
23
+ label,
24
+ status: "partial",
25
+ detail: "No robots.txt found",
26
+ },
27
+ parsed: null,
28
+ };
29
+ }
30
+ // Other errors
31
+ return {
32
+ check: {
33
+ id,
34
+ label,
35
+ status: "fail",
36
+ detail: result.error.message,
37
+ },
38
+ parsed: null,
39
+ };
40
+ }
41
+ // Success -- validate content type
42
+ const contentType = result.headers["content-type"] ?? "";
43
+ const isTextPlain = contentType.toLowerCase().includes("text/plain");
44
+ const hasDirectives = DIRECTIVE_PATTERN.test(result.body);
45
+ if (!isTextPlain && !hasDirectives) {
46
+ return {
47
+ check: {
48
+ id,
49
+ label,
50
+ status: "fail",
51
+ detail: "robots.txt is not a text file",
52
+ },
53
+ parsed: null,
54
+ };
55
+ }
56
+ // Parse
57
+ const parsed = parseRobotsTxt(result.body);
58
+ if (parsed.ruleCount === 0 && parsed.groups.length === 0) {
59
+ return {
60
+ check: {
61
+ id,
62
+ label,
63
+ status: "partial",
64
+ detail: "robots.txt exists but has no rules",
65
+ },
66
+ parsed,
67
+ };
68
+ }
69
+ return {
70
+ check: {
71
+ id,
72
+ label,
73
+ status: "pass",
74
+ detail: `robots.txt found with ${parsed.ruleCount} rules`,
75
+ data: { ruleCount: parsed.ruleCount, sitemaps: parsed.sitemaps },
76
+ },
77
+ parsed,
78
+ };
79
+ }
80
+ //# sourceMappingURL=robots-txt.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"robots-txt.js","sourceRoot":"","sources":["../../../src/bridges/reachability/robots-txt.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,OAAO,EAAE,MAAM,4BAA4B,CAAC;AACrD,OAAO,EAAE,cAAc,EAAwB,MAAM,oBAAoB,CAAC;AAO1E,MAAM,iBAAiB,GAAG,oCAAoC,CAAC;AAE/D;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,MAAc,EACd,OAAgB;IAEhB,MAAM,EAAE,GAAG,YAAY,CAAC;IACxB,MAAM,KAAK,GAAG,YAAY,CAAC;IAE3B,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,UAAU,GAAG,MAAM,GAAG,aAAa,EAAE;QAChE,OAAO;KACR,CAAC,CAAC;IAEH,IAAI,CAAC,MAAM,CAAC,EAAE,EAAE,CAAC;QACf,sBAAsB;QACtB,IACE,MAAM,CAAC,KAAK,CAAC,IAAI,KAAK,YAAY;YAClC,MAAM,CAAC,KAAK,CAAC,UAAU,KAAK,GAAG,EAC/B,CAAC;YACD,OAAO;gBACL,KAAK,EAAE;oBACL,EAAE;oBACF,KAAK;oBACL,MAAM,EAAE,SAAS;oBACjB,MAAM,EAAE,qBAAqB;iBAC9B;gBACD,MAAM,EAAE,IAAI;aACb,CAAC;QACJ,CAAC;QAED,eAAe;QACf,OAAO;YACL,KAAK,EAAE;gBACL,EAAE;gBACF,KAAK;gBACL,MAAM,EAAE,MAAM;gBACd,MAAM,EAAE,MAAM,CAAC,KAAK,CAAC,OAAO;aAC7B;YACD,MAAM,EAAE,IAAI;SACb,CAAC;IACJ,CAAC;IAED,mCAAmC;IACnC,MAAM,WAAW,GAAG,MAAM,CAAC,OAAO,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;IACzD,MAAM,WAAW,GAAG,WAAW,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC;IACrE,MAAM,aAAa,GAAG,iBAAiB,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IAE1D,IAAI,CAAC,WAAW,IAAI,CAAC,aAAa,EAAE,CAAC;QACnC,OAAO;YACL,KAAK,EAAE;gBACL,EAAE;gBACF,KAAK;gBACL,MAAM,EAAE,MAAM;gBACd,MAAM,EAAE,+BAA+B;aACxC;YACD,MAAM,EAAE,IAAI;SACb,CAAC;IACJ,CAAC;IAED,QAAQ;IACR,MAAM,MAAM,GAAG,cAAc,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IAE3C,IAAI,MAAM,CAAC,SAAS,KAAK,CAAC,IAAI,MAAM,CAAC,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzD,OAAO;YACL,KAAK,EAAE;gBACL,EAAE;gBACF,KAAK;gBACL,MAAM,EAAE,SAAS;gBACjB,MAAM,EAAE,oCAAoC;aAC7C;YACD,MAAM;SACP,CAAC;IACJ,CAAC;IAED,OAAO;QACL,KAAK,EAAE;YACL,EAAE;YACF,KAAK;YACL,MAAM,EAAE,MAAM;YACd,MAAM,EAAE,yBAAyB,MAAM,CAAC,SAAS,QAAQ;YACzD,IAAI,EAAE,EAAE,SAAS,EAAE,MAAM,CAAC,SAAS,EAAE,QAAQ,EAAE,MAAM,CAAC,QAAQ,EAAE;SACjE;QACD,MAAM;KACP,CAAC;AACJ,CAAC"}
@@ -0,0 +1,14 @@
1
+ import type { Check, ContentSource } from "../../core/types.js";
2
+ /**
3
+ * Detect API presence via multiple signals across content sources.
4
+ *
5
+ * Four signal sources:
6
+ * 1. OpenAPI spec detected by Bridge 2 (boolean from ctx.shared.openApiDetected)
7
+ * 2. API-related response headers (X-RateLimit-*, X-Request-Id, etc.)
8
+ * 3. HTML links containing /api/ paths (scanned from all content sources)
9
+ * 4. Markdown links containing /api/ paths (scanned from all content sources)
10
+ *
11
+ * Pure function -- no HTTP calls.
12
+ */
13
+ export declare function checkApiPresence(openApiDetected: boolean, sources: ContentSource[], headers: Record<string, string>): Check;
14
+ //# sourceMappingURL=api-presence.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"api-presence.d.ts","sourceRoot":"","sources":["../../../src/bridges/separation/api-presence.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AA8ChE;;;;;;;;;;GAUG;AACH,wBAAgB,gBAAgB,CAC9B,eAAe,EAAE,OAAO,EACxB,OAAO,EAAE,aAAa,EAAE,EACxB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAC9B,KAAK,CA0CP"}