@pseolint/core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (223) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +53 -0
  3. package/dist/algorithms/entity-mask.d.ts +3 -0
  4. package/dist/algorithms/entity-mask.d.ts.map +1 -0
  5. package/dist/algorithms/entity-mask.js +8 -0
  6. package/dist/algorithms/entity-mask.js.map +1 -0
  7. package/dist/algorithms/entity-mask.test.d.ts +2 -0
  8. package/dist/algorithms/entity-mask.test.d.ts.map +1 -0
  9. package/dist/algorithms/entity-mask.test.js +23 -0
  10. package/dist/algorithms/entity-mask.test.js.map +1 -0
  11. package/dist/algorithms/simhash.d.ts +4 -0
  12. package/dist/algorithms/simhash.d.ts.map +1 -0
  13. package/dist/algorithms/simhash.js +64 -0
  14. package/dist/algorithms/simhash.js.map +1 -0
  15. package/dist/algorithms/simhash.test.d.ts +2 -0
  16. package/dist/algorithms/simhash.test.d.ts.map +1 -0
  17. package/dist/algorithms/simhash.test.js +23 -0
  18. package/dist/algorithms/simhash.test.js.map +1 -0
  19. package/dist/algorithms/tf-idf.d.ts +8 -0
  20. package/dist/algorithms/tf-idf.d.ts.map +1 -0
  21. package/dist/algorithms/tf-idf.js +55 -0
  22. package/dist/algorithms/tf-idf.js.map +1 -0
  23. package/dist/auditor.d.ts +3 -0
  24. package/dist/auditor.d.ts.map +1 -0
  25. package/dist/auditor.js +730 -0
  26. package/dist/auditor.js.map +1 -0
  27. package/dist/auditor.test.d.ts +2 -0
  28. package/dist/auditor.test.d.ts.map +1 -0
  29. package/dist/auditor.test.js +134 -0
  30. package/dist/auditor.test.js.map +1 -0
  31. package/dist/enrich-findings.d.ts +9 -0
  32. package/dist/enrich-findings.d.ts.map +1 -0
  33. package/dist/enrich-findings.js +436 -0
  34. package/dist/enrich-findings.js.map +1 -0
  35. package/dist/formatters/console.d.ts +6 -0
  36. package/dist/formatters/console.d.ts.map +1 -0
  37. package/dist/formatters/console.js +237 -0
  38. package/dist/formatters/console.js.map +1 -0
  39. package/dist/formatters/html.d.ts +3 -0
  40. package/dist/formatters/html.d.ts.map +1 -0
  41. package/dist/formatters/html.js +170 -0
  42. package/dist/formatters/html.js.map +1 -0
  43. package/dist/formatters/index.d.ts +6 -0
  44. package/dist/formatters/index.d.ts.map +1 -0
  45. package/dist/formatters/index.js +5 -0
  46. package/dist/formatters/index.js.map +1 -0
  47. package/dist/formatters/json.d.ts +3 -0
  48. package/dist/formatters/json.d.ts.map +1 -0
  49. package/dist/formatters/json.js +4 -0
  50. package/dist/formatters/json.js.map +1 -0
  51. package/dist/formatters/markdown.d.ts +3 -0
  52. package/dist/formatters/markdown.d.ts.map +1 -0
  53. package/dist/formatters/markdown.js +93 -0
  54. package/dist/formatters/markdown.js.map +1 -0
  55. package/dist/index.d.ts +45 -0
  56. package/dist/index.d.ts.map +1 -0
  57. package/dist/index.js +45 -0
  58. package/dist/index.js.map +1 -0
  59. package/dist/page-classifier.d.ts +4 -0
  60. package/dist/page-classifier.d.ts.map +1 -0
  61. package/dist/page-classifier.js +133 -0
  62. package/dist/page-classifier.js.map +1 -0
  63. package/dist/parser.d.ts +3 -0
  64. package/dist/parser.d.ts.map +1 -0
  65. package/dist/parser.js +131 -0
  66. package/dist/parser.js.map +1 -0
  67. package/dist/parser.test.d.ts +2 -0
  68. package/dist/parser.test.d.ts.map +1 -0
  69. package/dist/parser.test.js +37 -0
  70. package/dist/parser.test.js.map +1 -0
  71. package/dist/renderer.d.ts +15 -0
  72. package/dist/renderer.d.ts.map +1 -0
  73. package/dist/renderer.js +124 -0
  74. package/dist/renderer.js.map +1 -0
  75. package/dist/rule-references.d.ts +2 -0
  76. package/dist/rule-references.d.ts.map +1 -0
  77. package/dist/rule-references.js +35 -0
  78. package/dist/rule-references.js.map +1 -0
  79. package/dist/rules/cannibal/keyword-collision.d.ts +3 -0
  80. package/dist/rules/cannibal/keyword-collision.d.ts.map +1 -0
  81. package/dist/rules/cannibal/keyword-collision.js +25 -0
  82. package/dist/rules/cannibal/keyword-collision.js.map +1 -0
  83. package/dist/rules/cannibal/title-overlap.d.ts +3 -0
  84. package/dist/rules/cannibal/title-overlap.d.ts.map +1 -0
  85. package/dist/rules/cannibal/title-overlap.js +43 -0
  86. package/dist/rules/cannibal/title-overlap.js.map +1 -0
  87. package/dist/rules/cannibal/url-pattern.d.ts +3 -0
  88. package/dist/rules/cannibal/url-pattern.d.ts.map +1 -0
  89. package/dist/rules/cannibal/url-pattern.js +48 -0
  90. package/dist/rules/cannibal/url-pattern.js.map +1 -0
  91. package/dist/rules/content/eeat-signals.d.ts +3 -0
  92. package/dist/rules/content/eeat-signals.d.ts.map +1 -0
  93. package/dist/rules/content/eeat-signals.js +46 -0
  94. package/dist/rules/content/eeat-signals.js.map +1 -0
  95. package/dist/rules/content/heading-uniqueness.d.ts +3 -0
  96. package/dist/rules/content/heading-uniqueness.d.ts.map +1 -0
  97. package/dist/rules/content/heading-uniqueness.js +56 -0
  98. package/dist/rules/content/heading-uniqueness.js.map +1 -0
  99. package/dist/rules/content/meta-uniqueness.d.ts +3 -0
  100. package/dist/rules/content/meta-uniqueness.d.ts.map +1 -0
  101. package/dist/rules/content/meta-uniqueness.js +28 -0
  102. package/dist/rules/content/meta-uniqueness.js.map +1 -0
  103. package/dist/rules/content/missing-author.d.ts +3 -0
  104. package/dist/rules/content/missing-author.d.ts.map +1 -0
  105. package/dist/rules/content/missing-author.js +26 -0
  106. package/dist/rules/content/missing-author.js.map +1 -0
  107. package/dist/rules/content/unique-value.d.ts +3 -0
  108. package/dist/rules/content/unique-value.d.ts.map +1 -0
  109. package/dist/rules/content/unique-value.js +26 -0
  110. package/dist/rules/content/unique-value.js.map +1 -0
  111. package/dist/rules/links/cluster-connectivity.d.ts +7 -0
  112. package/dist/rules/links/cluster-connectivity.d.ts.map +1 -0
  113. package/dist/rules/links/cluster-connectivity.js +73 -0
  114. package/dist/rules/links/cluster-connectivity.js.map +1 -0
  115. package/dist/rules/links/cluster-key.d.ts +3 -0
  116. package/dist/rules/links/cluster-key.d.ts.map +1 -0
  117. package/dist/rules/links/cluster-key.js +22 -0
  118. package/dist/rules/links/cluster-key.js.map +1 -0
  119. package/dist/rules/links/dead-ends.d.ts +3 -0
  120. package/dist/rules/links/dead-ends.d.ts.map +1 -0
  121. package/dist/rules/links/dead-ends.js +13 -0
  122. package/dist/rules/links/dead-ends.js.map +1 -0
  123. package/dist/rules/links/hub-pages.d.ts +7 -0
  124. package/dist/rules/links/hub-pages.d.ts.map +1 -0
  125. package/dist/rules/links/hub-pages.js +73 -0
  126. package/dist/rules/links/hub-pages.js.map +1 -0
  127. package/dist/rules/links/link-depth.d.ts +3 -0
  128. package/dist/rules/links/link-depth.d.ts.map +1 -0
  129. package/dist/rules/links/link-depth.js +46 -0
  130. package/dist/rules/links/link-depth.js.map +1 -0
  131. package/dist/rules/links/orphan-pages.d.ts +3 -0
  132. package/dist/rules/links/orphan-pages.d.ts.map +1 -0
  133. package/dist/rules/links/orphan-pages.js +19 -0
  134. package/dist/rules/links/orphan-pages.js.map +1 -0
  135. package/dist/rules/schema/consistency.d.ts +3 -0
  136. package/dist/rules/schema/consistency.d.ts.map +1 -0
  137. package/dist/rules/schema/consistency.js +44 -0
  138. package/dist/rules/schema/consistency.js.map +1 -0
  139. package/dist/rules/schema/json-ld-valid.d.ts +3 -0
  140. package/dist/rules/schema/json-ld-valid.d.ts.map +1 -0
  141. package/dist/rules/schema/json-ld-valid.js +47 -0
  142. package/dist/rules/schema/json-ld-valid.js.map +1 -0
  143. package/dist/rules/schema/required-fields.d.ts +3 -0
  144. package/dist/rules/schema/required-fields.d.ts.map +1 -0
  145. package/dist/rules/schema/required-fields.js +60 -0
  146. package/dist/rules/schema/required-fields.js.map +1 -0
  147. package/dist/rules/spam/boilerplate-ratio.d.ts +3 -0
  148. package/dist/rules/spam/boilerplate-ratio.d.ts.map +1 -0
  149. package/dist/rules/spam/boilerplate-ratio.js +50 -0
  150. package/dist/rules/spam/boilerplate-ratio.js.map +1 -0
  151. package/dist/rules/spam/doorway-pattern.d.ts +4 -0
  152. package/dist/rules/spam/doorway-pattern.d.ts.map +1 -0
  153. package/dist/rules/spam/doorway-pattern.js +47 -0
  154. package/dist/rules/spam/doorway-pattern.js.map +1 -0
  155. package/dist/rules/spam/entity-swap.d.ts +7 -0
  156. package/dist/rules/spam/entity-swap.d.ts.map +1 -0
  157. package/dist/rules/spam/entity-swap.js +26 -0
  158. package/dist/rules/spam/entity-swap.js.map +1 -0
  159. package/dist/rules/spam/near-duplicate.d.ts +11 -0
  160. package/dist/rules/spam/near-duplicate.d.ts.map +1 -0
  161. package/dist/rules/spam/near-duplicate.js +25 -0
  162. package/dist/rules/spam/near-duplicate.js.map +1 -0
  163. package/dist/rules/spam/publication-velocity.d.ts +3 -0
  164. package/dist/rules/spam/publication-velocity.d.ts.map +1 -0
  165. package/dist/rules/spam/publication-velocity.js +25 -0
  166. package/dist/rules/spam/publication-velocity.js.map +1 -0
  167. package/dist/rules/spam/template-coverage.d.ts +3 -0
  168. package/dist/rules/spam/template-coverage.d.ts.map +1 -0
  169. package/dist/rules/spam/template-coverage.js +87 -0
  170. package/dist/rules/spam/template-coverage.js.map +1 -0
  171. package/dist/rules/spam/template-diversity.d.ts +3 -0
  172. package/dist/rules/spam/template-diversity.d.ts.map +1 -0
  173. package/dist/rules/spam/template-diversity.js +19 -0
  174. package/dist/rules/spam/template-diversity.js.map +1 -0
  175. package/dist/rules/spam/thin-content.d.ts +6 -0
  176. package/dist/rules/spam/thin-content.d.ts.map +1 -0
  177. package/dist/rules/spam/thin-content.js +22 -0
  178. package/dist/rules/spam/thin-content.js.map +1 -0
  179. package/dist/rules/tech/canonical-consistency.d.ts +4 -0
  180. package/dist/rules/tech/canonical-consistency.d.ts.map +1 -0
  181. package/dist/rules/tech/canonical-consistency.js +78 -0
  182. package/dist/rules/tech/canonical-consistency.js.map +1 -0
  183. package/dist/rules/tech/canonical-noindex-conflict.d.ts +3 -0
  184. package/dist/rules/tech/canonical-noindex-conflict.d.ts.map +1 -0
  185. package/dist/rules/tech/canonical-noindex-conflict.js +27 -0
  186. package/dist/rules/tech/canonical-noindex-conflict.js.map +1 -0
  187. package/dist/rules/tech/hreflang-consistency.d.ts +3 -0
  188. package/dist/rules/tech/hreflang-consistency.d.ts.map +1 -0
  189. package/dist/rules/tech/hreflang-consistency.js +99 -0
  190. package/dist/rules/tech/hreflang-consistency.js.map +1 -0
  191. package/dist/rules/tech/og-completeness.d.ts +3 -0
  192. package/dist/rules/tech/og-completeness.d.ts.map +1 -0
  193. package/dist/rules/tech/og-completeness.js +35 -0
  194. package/dist/rules/tech/og-completeness.js.map +1 -0
  195. package/dist/rules/tech/redirect-chain.d.ts +3 -0
  196. package/dist/rules/tech/redirect-chain.d.ts.map +1 -0
  197. package/dist/rules/tech/redirect-chain.js +20 -0
  198. package/dist/rules/tech/redirect-chain.js.map +1 -0
  199. package/dist/rules/tech/robots-noindex-conflict.d.ts +3 -0
  200. package/dist/rules/tech/robots-noindex-conflict.d.ts.map +1 -0
  201. package/dist/rules/tech/robots-noindex-conflict.js +30 -0
  202. package/dist/rules/tech/robots-noindex-conflict.js.map +1 -0
  203. package/dist/rules/tech/robots-sitemap-presence.d.ts +3 -0
  204. package/dist/rules/tech/robots-sitemap-presence.d.ts.map +1 -0
  205. package/dist/rules/tech/robots-sitemap-presence.js +61 -0
  206. package/dist/rules/tech/robots-sitemap-presence.js.map +1 -0
  207. package/dist/rules/tech/sitemap-completeness.d.ts +3 -0
  208. package/dist/rules/tech/sitemap-completeness.d.ts.map +1 -0
  209. package/dist/rules/tech/sitemap-completeness.js +40 -0
  210. package/dist/rules/tech/sitemap-completeness.js.map +1 -0
  211. package/dist/rules/tech/soft-404.d.ts +3 -0
  212. package/dist/rules/tech/soft-404.d.ts.map +1 -0
  213. package/dist/rules/tech/soft-404.js +24 -0
  214. package/dist/rules/tech/soft-404.js.map +1 -0
  215. package/dist/types.d.ts +170 -0
  216. package/dist/types.d.ts.map +1 -0
  217. package/dist/types.js +2 -0
  218. package/dist/types.js.map +1 -0
  219. package/dist/url-normalize.d.ts +10 -0
  220. package/dist/url-normalize.d.ts.map +1 -0
  221. package/dist/url-normalize.js +52 -0
  222. package/dist/url-normalize.js.map +1 -0
  223. package/package.json +46 -0
package/dist/index.js ADDED
@@ -0,0 +1,45 @@
1
+ export * from "./types.js";
2
+ export * from "./auditor.js";
3
+ export * from "./parser.js";
4
+ export * from "./url-normalize.js";
5
+ export * from "./algorithms/simhash.js";
6
+ export * from "./algorithms/entity-mask.js";
7
+ export * from "./rules/spam/near-duplicate.js";
8
+ export * from "./rules/spam/entity-swap.js";
9
+ export * from "./rules/spam/thin-content.js";
10
+ export * from "./rules/spam/boilerplate-ratio.js";
11
+ export * from "./rules/spam/template-diversity.js";
12
+ export * from "./rules/spam/publication-velocity.js";
13
+ export * from "./rules/spam/doorway-pattern.js";
14
+ export * from "./rules/spam/template-coverage.js";
15
+ export * from "./rules/content/unique-value.js";
16
+ export * from "./rules/content/heading-uniqueness.js";
17
+ export * from "./rules/content/meta-uniqueness.js";
18
+ export * from "./rules/content/missing-author.js";
19
+ export * from "./rules/content/eeat-signals.js";
20
+ export * from "./rules/links/orphan-pages.js";
21
+ export * from "./rules/links/dead-ends.js";
22
+ export * from "./rules/links/link-depth.js";
23
+ export * from "./rules/links/cluster-connectivity.js";
24
+ export * from "./rules/links/hub-pages.js";
25
+ export * from "./rules/tech/canonical-consistency.js";
26
+ export * from "./rules/tech/canonical-noindex-conflict.js";
27
+ export * from "./rules/tech/robots-noindex-conflict.js";
28
+ export * from "./rules/tech/sitemap-completeness.js";
29
+ export * from "./rules/tech/redirect-chain.js";
30
+ export * from "./rules/tech/soft-404.js";
31
+ export * from "./rules/tech/og-completeness.js";
32
+ export * from "./rules/tech/hreflang-consistency.js";
33
+ export * from "./rules/schema/json-ld-valid.js";
34
+ export * from "./rules/schema/required-fields.js";
35
+ export * from "./rules/schema/consistency.js";
36
+ export * from "./algorithms/tf-idf.js";
37
+ export * from "./rules/cannibal/title-overlap.js";
38
+ export * from "./rules/cannibal/keyword-collision.js";
39
+ export * from "./rules/cannibal/url-pattern.js";
40
+ export * from "./rule-references.js";
41
+ export * from "./page-classifier.js";
42
+ export * from "./formatters/index.js";
43
+ export * from "./renderer.js";
44
+ export * from "./enrich-findings.js";
45
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,YAAY,CAAC;AAC3B,cAAc,cAAc,CAAC;AAC7B,cAAc,aAAa,CAAC;AAC5B,cAAc,oBAAoB,CAAC;AACnC,cAAc,yBAAyB,CAAC;AACxC,cAAc,6BAA6B,CAAC;AAC5C,cAAc,gCAAgC,CAAC;AAC/C,cAAc,6BAA6B,CAAC;AAC5C,cAAc,8BAA8B,CAAC;AAC7C,cAAc,mCAAmC,CAAC;AAClD,cAAc,oCAAoC,CAAC;AACnD,cAAc,sCAAsC,CAAC;AACrD,cAAc,iCAAiC,CAAC;AAChD,cAAc,mCAAmC,CAAC;AAClD,cAAc,iCAAiC,CAAC;AAChD,cAAc,uCAAuC,CAAC;AACtD,cAAc,oCAAoC,CAAC;AACnD,cAAc,mCAAmC,CAAC;AAClD,cAAc,iCAAiC,CAAC;AAChD,cAAc,+BAA+B,CAAC;AAC9C,cAAc,4BAA4B,CAAC;AAC3C,cAAc,6BAA6B,CAAC;AAC5C,cAAc,uCAAuC,CAAC;AACtD,cAAc,4BAA4B,CAAC;AAC3C,cAAc,uCAAuC,CAAC;AACtD,cAAc,4CAA4C,CAAC;AAC3D,cAAc,yCAAyC,CAAC;AACxD,cAAc,sCAAsC,CAAC;AACrD,cAAc,gCAAgC,CAAC;AAC/C,cAAc,0BAA0B,CAAC;AACzC,cAAc,iCAAiC,CAAC;AAChD,cAAc,sCAAsC,CAAC;AACrD,cAAc,iCAAiC,CAAC;AAChD,cAAc,mCAAmC,CAAC;AAClD,cAAc,+BAA+B,CAAC;AAC9C,cAAc,wBAAwB,CAAC;AACvC,cAAc,mCAAmC,CAAC;AAClD,cAAc,uCAAuC,CAAC;AACtD,cAAc,iCAAiC,CAAC;AAChD,cAAc,sBAAsB,CAAC;AACrC,cAAc,sBAAsB,CAAC;AACrC,cAAc,uBAAuB,CAAC;AACtC,cAAc,eAAe,CAAC;AAC9B,cAAc,sBAAsB,CAAC"}
@@ -0,0 +1,4 @@
1
+ import type { PageGroupConfig, ParsedPage } from "./types.js";
2
+ export declare function classifyPages(pages: ParsedPage[], groups: Record<string, PageGroupConfig> | undefined): Map<string, ParsedPage[]>;
3
+ export declare function isRuleEnabled(ruleId: string, rules: string[] | undefined): boolean;
4
+ //# sourceMappingURL=page-classifier.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"page-classifier.d.ts","sourceRoot":"","sources":["../src/page-classifier.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAoG9D,wBAAgB,aAAa,CAC3B,KAAK,EAAE,UAAU,EAAE,EACnB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,eAAe,CAAC,GAAG,SAAS,GAClD,GAAG,CAAC,MAAM,EAAE,UAAU,EAAE,CAAC,CA8B3B;AAED,wBAAgB,aAAa,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,SAAS,GAAG,OAAO,CAWlF"}
@@ -0,0 +1,133 @@
1
+ // Splits a glob pattern into literal and wildcard tokens.
2
+ function tokenize(pattern) {
3
+ const tokens = [];
4
+ let i = 0;
5
+ let literal = "";
6
+ while (i < pattern.length) {
7
+ if (pattern[i] === "*") {
8
+ if (literal.length > 0) {
9
+ tokens.push({ kind: "literal", value: literal });
10
+ literal = "";
11
+ }
12
+ if (pattern[i + 1] === "*") {
13
+ tokens.push({ kind: "globstar" });
14
+ i += 2;
15
+ }
16
+ else {
17
+ tokens.push({ kind: "star" });
18
+ i += 1;
19
+ }
20
+ }
21
+ else {
22
+ literal += pattern[i];
23
+ i += 1;
24
+ }
25
+ }
26
+ if (literal.length > 0) {
27
+ tokens.push({ kind: "literal", value: literal });
28
+ }
29
+ return tokens;
30
+ }
31
+ // Linear glob matcher — no RegExp construction, immune to ReDoS.
32
+ // Supports * (any non-slash chars) and ** (any chars including slash).
33
+ // Anchored to the end of value; pattern may match at any slash-delimited
34
+ // segment boundary within the string.
35
+ function matchGlob(pattern, value) {
36
+ const normalized = value.replace(/\\/g, "/");
37
+ const tokens = tokenize(pattern);
38
+ const n = normalized.length;
39
+ const m = tokens.length;
40
+ // pos[j] = 1 means we can be at character index j in normalized.
41
+ // Seed position 0 and every position that sits ON a slash so that
42
+ // patterns starting with "/" (e.g. "/about") match at segment boundaries.
43
+ let pos = new Uint8Array(n + 1);
44
+ pos[0] = 1;
45
+ for (let j = 0; j < n; j++) {
46
+ if (normalized[j] === "/")
47
+ pos[j] = 1;
48
+ }
49
+ for (let ti = 0; ti < m; ti++) {
50
+ const tok = tokens[ti];
51
+ const next = new Uint8Array(n + 1);
52
+ if (tok.kind === "literal") {
53
+ const lit = tok.value;
54
+ const ll = lit.length;
55
+ for (let j = 0; j <= n - ll; j++) {
56
+ if (pos[j] && normalized.startsWith(lit, j)) {
57
+ next[j + ll] = 1;
58
+ }
59
+ }
60
+ }
61
+ else if (tok.kind === "star") {
62
+ // Consume zero or more non-slash characters.
63
+ for (let j = 0; j <= n; j++) {
64
+ if (!pos[j])
65
+ continue;
66
+ next[j] = 1;
67
+ for (let k = j + 1; k <= n; k++) {
68
+ if (normalized[k - 1] === "/")
69
+ break;
70
+ next[k] = 1;
71
+ }
72
+ }
73
+ }
74
+ else {
75
+ // globstar: consume zero or more characters including slashes.
76
+ for (let j = 0; j <= n; j++) {
77
+ if (!pos[j])
78
+ continue;
79
+ for (let k = j; k <= n; k++) {
80
+ next[k] = 1;
81
+ }
82
+ }
83
+ }
84
+ pos = next;
85
+ }
86
+ return pos[n] === 1;
87
+ }
88
+ function matchesGroup(url, config) {
89
+ const patterns = Array.isArray(config.match) ? config.match : [config.match];
90
+ return patterns.some((pattern) => matchGlob(pattern, url));
91
+ }
92
+ export function classifyPages(pages, groups) {
93
+ const result = new Map();
94
+ if (!groups || Object.keys(groups).length === 0) {
95
+ result.set("__default", [...pages]);
96
+ return result;
97
+ }
98
+ for (const groupName of Object.keys(groups)) {
99
+ result.set(groupName, []);
100
+ }
101
+ result.set("__default", []);
102
+ const groupEntries = Object.entries(groups);
103
+ for (const page of pages) {
104
+ let matched = false;
105
+ for (const [name, config] of groupEntries) {
106
+ if (matchesGroup(page.url, config)) {
107
+ result.get(name).push(page);
108
+ matched = true;
109
+ break;
110
+ }
111
+ }
112
+ if (!matched) {
113
+ result.get("__default").push(page);
114
+ }
115
+ }
116
+ return result;
117
+ }
118
+ export function isRuleEnabled(ruleId, rules) {
119
+ if (rules === undefined)
120
+ return true;
121
+ if (rules.length === 0)
122
+ return false;
123
+ return rules.some((pattern) => {
124
+ if (pattern === ruleId)
125
+ return true;
126
+ if (pattern.endsWith("/*")) {
127
+ const prefix = pattern.slice(0, -2);
128
+ return ruleId.startsWith(prefix + "/");
129
+ }
130
+ return false;
131
+ });
132
+ }
133
+ //# sourceMappingURL=page-classifier.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"page-classifier.js","sourceRoot":"","sources":["../src/page-classifier.ts"],"names":[],"mappings":"AAQA,0DAA0D;AAC1D,SAAS,QAAQ,CAAC,OAAe;IAC/B,MAAM,MAAM,GAAY,EAAE,CAAC;IAC3B,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,IAAI,OAAO,GAAG,EAAE,CAAC;IACjB,OAAO,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;QAC1B,IAAI,OAAO,CAAC,CAAC,CAAC,KAAK,GAAG,EAAE,CAAC;YACvB,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACvB,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC;gBACjD,OAAO,GAAG,EAAE,CAAC;YACf,CAAC;YACD,IAAI,OAAO,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,GAAG,EAAE,CAAC;gBAC3B,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC,CAAC;gBAClC,CAAC,IAAI,CAAC,CAAC;YACT,CAAC;iBAAM,CAAC;gBACN,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC;gBAC9B,CAAC,IAAI,CAAC,CAAC;YACT,CAAC;QACH,CAAC;aAAM,CAAC;YACN,OAAO,IAAI,OAAO,CAAC,CAAC,CAAC,CAAC;YACtB,CAAC,IAAI,CAAC,CAAC;QACT,CAAC;IACH,CAAC;IACD,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC;IACnD,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,iEAAiE;AACjE,uEAAuE;AACvE,yEAAyE;AACzE,sCAAsC;AACtC,SAAS,SAAS,CAAC,OAAe,EAAE,KAAa;IAC/C,MAAM,UAAU,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;IAC7C,MAAM,MAAM,GAAG,QAAQ,CAAC,OAAO,CAAC,CAAC;IAEjC,MAAM,CAAC,GAAG,UAAU,CAAC,MAAM,CAAC;IAC5B,MAAM,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;IAExB,iEAAiE;IACjE,kEAAkE;IAClE,0EAA0E;IAC1E,IAAI,GAAG,GAAG,IAAI,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAChC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IACX,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3B,IAAI,UAAU,CAAC,CAAC,CAAC,KAAK,GAAG;YAAE,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IACxC,CAAC;IAED,KAAK,IAAI,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC;QAC9B,MAAM,GAAG,GAAG,MAAM,CAAC,EAAE,CAAC,CAAC;QACvB,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAEnC,IAAI,GAAG,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;YAC3B,MAAM,GAAG,GAAG,GAAG,CAAC,KAAK,CAAC;YACtB,MAAM,EAAE,GAAG,GAAG,CAAC,MAAM,CAAC;YACtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC;gBACjC,IAAI,GAAG,CAAC,CAAC,CAAC,IAAI,UAAU,CAAC,UAAU,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE,CAAC;oBAC5C,IAAI,CAAC,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC;gBACnB,CAAC;YACH,CAAC;QACH,CAAC;aAAM,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;YAC/B,6CAA6C;YAC7C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC5B,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;oBAAE,SAAS;gBACtB,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;gBACZ,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;oBAChC,IAAI,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,GAAG;wBAAE,MAAM;oBACrC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;gBACd,CAAC;YACH,CAAC;QACH,CAAC;aAAM,CAAC;YACN,+DAA+D;YAC/D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC5B,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;oBAAE,SAAS;gBACtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC5B,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;gBACd,CAAC;YACH,CAAC;QACH,CAAC;QAED,GAAG,GAAG,IAAI,CAAC;IACb,CAAC;IAED,OAAO,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;AACtB,CAAC;AAED,SAAS,YAAY,CAAC,GAAW,EAAE,MAAuB;IACxD,MAAM,QAAQ,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IAC7E,OAAO,QAAQ,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,SAAS,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,CAAC;AAC7D,CAAC;AAED,MAAM,UAAU,aAAa,CAC3B,KAAmB,EACnB,MAAmD;IAEnD,MAAM,MAAM,GAAG,IAAI,GAAG,EAAwB,CAAC;IAE/C,IAAI,CAAC,MAAM,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAChD,MAAM,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC;QACpC,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,KAAK,MAAM,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC;QAC5C,MAAM,CAAC,GAAG,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;IAC5B,CAAC;IACD,MAAM,CAAC,GAAG,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC;IAE5B,MAAM,YAAY,GAAG,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IAE5C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,OAAO,GAAG,KAAK,CAAC;QACpB,KAAK,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,IAAI,YAAY,EAAE,CAAC;YAC1C,IAAI,YAAY,CAAC,IAAI,CAAC,GAAG,EAAE,MAAM,CAAC,EAAE,CAAC;gBACnC,MAAM,CAAC,GAAG,CAAC,IAAI,CAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBAC7B,OAAO,GAAG,IAAI,CAAC;gBACf,MAAM;YACR,CAAC;QACH,CAAC;QACD,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,MAAM,CAAC,GAAG,CAAC,WAAW,CAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACtC,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,MAAc,EAAE,KAA2B;IACvE,IAAI,KAAK,KAAK,SAAS;QAAE,OAAO,IAAI,CAAC;IACrC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IACrC,OAAO,KAAK,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE;QAC5B,IAAI,OAAO,KAAK,MAAM;YAAE,OAAO,IAAI,CAAC;QACpC,IAAI,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;YAC3B,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;YACpC,OAAO,MAAM,CAAC,UAAU,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC;QACzC,CAAC;QACD,OAAO,KAAK,CAAC;IACf,CAAC,CAAC,CAAC;AACL,CAAC"}
@@ -0,0 +1,3 @@
1
+ import type { ParseHtmlOptions, ParsedPage } from "./types.js";
2
+ export declare function parseHtmlPage(html: string, url: string, options?: ParseHtmlOptions): ParsedPage;
3
+ //# sourceMappingURL=parser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parser.d.ts","sourceRoot":"","sources":["../src/parser.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,gBAAgB,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAiE/D,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,gBAAgB,GAAG,UAAU,CA2F/F"}
package/dist/parser.js ADDED
@@ -0,0 +1,131 @@
1
+ import { load } from "cheerio";
2
+ import { dirname, resolve } from "node:path";
3
+ import { mergeNormalizeUrlOptions, normalizeAuditUrl } from "./url-normalize.js";
4
+ function normalizedText(input) {
5
+ return input.replace(/\s+/g, " ").trim();
6
+ }
7
+ function buildStructureSignature(html) {
8
+ const tags = Array.from(html.toLowerCase().matchAll(/<([a-z0-9-]+)(\s|>)/g)).map((m) => m[1]);
9
+ const counts = new Map();
10
+ for (const tag of tags) {
11
+ counts.set(tag, (counts.get(tag) ?? 0) + 1);
12
+ }
13
+ return Array.from(counts.entries())
14
+ .sort(([a], [b]) => a.localeCompare(b))
15
+ .map(([tag, count]) => `${tag}:${count}`)
16
+ .join("|");
17
+ }
18
+ function resolveHref(href, pageUrl, normalizeOpts) {
19
+ const trimmed = href.trim();
20
+ if (!trimmed) {
21
+ return null;
22
+ }
23
+ if (/^https?:\/\//i.test(trimmed)) {
24
+ return normalizeAuditUrl(trimmed, normalizeOpts);
25
+ }
26
+ if (/^https?:\/\//i.test(pageUrl)) {
27
+ try {
28
+ const resolved = new URL(trimmed, pageUrl).href;
29
+ const u = new URL(resolved);
30
+ if (u.protocol !== "http:" && u.protocol !== "https:") {
31
+ return null;
32
+ }
33
+ return normalizeAuditUrl(resolved, normalizeOpts);
34
+ }
35
+ catch {
36
+ return null;
37
+ }
38
+ }
39
+ return normalizeAuditUrl(resolve(dirname(pageUrl), trimmed), normalizeOpts);
40
+ }
41
+ /** Resolved targets: http(s) on web bases; file paths for local `pageUrl`. */
42
+ function resolveAbsoluteHrefs(hrefs, pageUrl, normalizeOpts) {
43
+ const resolved = hrefs
44
+ .map((href) => href.trim())
45
+ .filter(Boolean)
46
+ .filter((href) => !href.startsWith("#"))
47
+ .filter((href) => !/^mailto:|^tel:|^javascript:|^data:/i.test(href))
48
+ .map((href) => resolveHref(href, pageUrl, normalizeOpts))
49
+ .filter((x) => x !== null);
50
+ return Array.from(new Set(resolved));
51
+ }
52
+ export function parseHtmlPage(html, url, options) {
53
+ const normalizeOpts = mergeNormalizeUrlOptions(options?.normalizeUrl);
54
+ const $ = load(html);
55
+ const title = normalizedText($("title").first().text());
56
+ const metaDescription = normalizedText($('meta[name="description"]').attr("content") ?? "");
57
+ const canonical = normalizedText($('link[rel="canonical"]').attr("href") ?? "");
58
+ const robotsMeta = normalizedText($('meta[name="robots"]').attr("content") ?? "");
59
+ const ogTitle = normalizedText($('meta[property="og:title"]').attr("content") ?? "");
60
+ const ogDescription = normalizedText($('meta[property="og:description"]').attr("content") ?? "");
61
+ const ogImage = normalizedText($('meta[property="og:image"]').attr("content") ?? "");
62
+ const hreflangs = $('link[rel="alternate"][hreflang]')
63
+ .map((_idx, node) => ({
64
+ lang: normalizedText(String($(node).attr("hreflang") ?? "")),
65
+ href: normalizedText(String($(node).attr("href") ?? ""))
66
+ }))
67
+ .get()
68
+ .filter((entry) => entry.lang.length > 0);
69
+ const publishedDate = normalizedText($('meta[property="article:published_time"]').attr("content") ??
70
+ $('meta[name="datePublished"]').attr("content") ??
71
+ $("time[datetime]").first().attr("datetime") ??
72
+ "");
73
+ const h1 = $("h1")
74
+ .map((_idx, node) => normalizedText($(node).text()))
75
+ .get()
76
+ .filter(Boolean);
77
+ const h2 = $("h2")
78
+ .map((_idx, node) => normalizedText($(node).text()))
79
+ .get()
80
+ .filter(Boolean);
81
+ const resolvedHrefs = resolveAbsoluteHrefs($("a[href]")
82
+ .map((_idx, node) => String($(node).attr("href") ?? ""))
83
+ .get(), url, normalizeOpts);
84
+ const jsonLd = [];
85
+ $('script[type="application/ld+json"]').each((_idx, node) => {
86
+ try {
87
+ const parsed = JSON.parse($(node).html() ?? "");
88
+ jsonLd.push(parsed);
89
+ }
90
+ catch {
91
+ jsonLd.push({ __parseError: true, __raw: $(node).html() ?? "" });
92
+ }
93
+ });
94
+ const metaAuthor = normalizedText($('meta[name="author"]').attr("content") ?? "");
95
+ const schemaAuthor = jsonLd.some((ld) => {
96
+ if (typeof ld !== "object" || ld === null)
97
+ return false;
98
+ return "author" in ld;
99
+ });
100
+ const bylineElement = $("[class*='author'], [class*='byline'], [rel='author']").length > 0;
101
+ const relAuthorLink = $('a[rel="author"], link[rel="author"]').length > 0;
102
+ $("header, footer, nav, script, style, noscript").remove();
103
+ const contentText = normalizedText($("body").text());
104
+ return {
105
+ url,
106
+ title,
107
+ metaDescription,
108
+ canonical,
109
+ robotsMeta,
110
+ og: {
111
+ title: ogTitle,
112
+ description: ogDescription,
113
+ image: ogImage
114
+ },
115
+ hreflangs,
116
+ publishedDate: publishedDate || undefined,
117
+ headings: { h1, h2 },
118
+ jsonLd,
119
+ authorSignals: {
120
+ metaAuthor,
121
+ schemaAuthor,
122
+ bylineElement,
123
+ relAuthorLink
124
+ },
125
+ resolvedHrefs,
126
+ structureSignature: buildStructureSignature(html),
127
+ contentText,
128
+ html
129
+ };
130
+ }
131
+ //# sourceMappingURL=parser.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parser.js","sourceRoot":"","sources":["../src/parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAC/B,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAE7C,OAAO,EAAE,wBAAwB,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AAEjF,SAAS,cAAc,CAAC,KAAa;IACnC,OAAO,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;AAC3C,CAAC;AAED,SAAS,uBAAuB,CAAC,IAAY;IAC3C,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,sBAAsB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC9F,MAAM,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAC;IACzC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC9C,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;SAChC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC;SACtC,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,IAAI,KAAK,EAAE,CAAC;SACxC,IAAI,CAAC,GAAG,CAAC,CAAC;AACf,CAAC;AAED,SAAS,WAAW,CAClB,IAAY,EACZ,OAAe,EACf,aAA0D;IAE1D,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC5B,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,OAAO,IAAI,CAAC;IACd,CAAC;IAED,IAAI,eAAe,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QAClC,OAAO,iBAAiB,CAAC,OAAO,EAAE,aAAa,CAAC,CAAC;IACnD,CAAC;IACD,IAAI,eAAe,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QAClC,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;YAChD,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC;YAC5B,IAAI,CAAC,CAAC,QAAQ,KAAK,OAAO,IAAI,CAAC,CAAC,QAAQ,KAAK,QAAQ,EAAE,CAAC;gBACtD,OAAO,IAAI,CAAC;YACd,CAAC;YACD,OAAO,iBAAiB,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;QACpD,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IACD,OAAO,iBAAiB,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,OAAO,CAAC,EAAE,aAAa,CAAC,CAAC;AAC9E,CAAC;AAED,8EAA8E;AAC9E,SAAS,oBAAoB,CAC3B,KAAe,EACf,OAAe,EACf,aAA0D;IAE1D,MAAM,QAAQ,GAAG,KAAK;SACnB,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;SAC1B,MAAM,CAAC,OAAO,CAAC;SACf,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;SACvC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,qCAAqC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;SACnE,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,WAAW,CAAC,IAAI,EAAE,OAAO,EAAE,aAAa,CAAC,CAAC;SACxD,MAAM,CAAC,CAAC,CAAC,EAAe,EAAE,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC;IAE1C,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC;AACvC,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,IAAY,EAAE,GAAW,EAAE,OAA0B;IACjF,MAAM,aAAa,GAAG,wBAAwB,CAAC,OAAO,EAAE,YAAY,CAAC,CAAC;IACtE,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;IAErB,MAAM,KAAK,GAAG,cAAc,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC,CAAC;IACxD,MAAM,eAAe,GAAG,cAAc,CAAC,CAAC,CAAC,0BAA0B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC;IAC5F,MAAM,SAAS,GAAG,cAAc,CAAC,CAAC,CAAC,uBAAuB,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;IAChF,MAAM,UAAU,GAAG,cAAc,CAAC,CAAC,CAAC,qBAAqB,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC;IAClF,MAAM,OAAO,GAAG,cAAc,CAAC,CAAC,CAAC,2BAA2B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC;IACrF,MAAM,aAAa,GAAG,cAAc,CAAC,CAAC,CAAC,iCAAiC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC;IACjG,MAAM,OAAO,GAAG,cAAc,CAAC,CAAC,CAAC,2BAA2B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC;IACrF,MAAM,SAAS,GAAG,CAAC,CAAC,iCAAiC,CAAC;SACnD,GAAG,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,EAAE,CAAC,CAAC;QACpB,IAAI,EAAE,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,CAAC,CAAC;QAC5D,IAAI,EAAE,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;KACzD,CAAC,CAAC;SACF,GAAG,EAAE;SACL,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC5C,MAAM,aAAa,GAAG,cAAc,CAClC,CAAC,CAAC,yCAAyC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC;QAC1D,CAAC,CAAC,4BAA4B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC;QAC/C,CAAC,CAAC,gBAAgB,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC;QAC5C,EAAE,CACL,CAAC;IAEF,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC;SACf,GAAG,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,EAAE,CAAC,cAAc,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;SACnD,GAAG,EAAE;SACL,MAAM,CAAC,OAAO,CAAC,CAAC;IAEnB,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC;SACf,GAAG,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,EAAE,CAAC,cAAc,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;SACnD,GAAG,EAAE;SACL,MAAM,CAAC,OAAO,CAAC,CAAC;IAEnB,MAAM,aAAa,GAAG,oBAAoB,CACxC,CAAC,CAAC,SAAS,CAAC;SACT,GAAG,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;SACvD,GAAG,EAAE,EACR,GAAG,EACH,aAAa,CACd,CAAC;IAEF,MAAM,MAAM,GAAc,EAAE,CAAC;IAC7B,CAAC,CAAC,oCAAoC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,EAAE;QAC1D,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC;YAChD,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACtB,CAAC;QAAC,MAAM,CAAC;YACP,MAAM,CAAC,IAAI,CAAC,EAAE,YAAY,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,EAAE,CAAC,CAAC;QACnE,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,MAAM,UAAU,GAAG,cAAc,CAAC,CAAC,CAAC,qBAAqB,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC;IAClF,MAAM,YAAY,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE;QACtC,IAAI,OAAO,EAAE,KAAK,QAAQ,IAAI,EAAE,KAAK,IAAI;YAAE,OAAO,KAAK,CAAC;QACxD,OAAO,QAAQ,IAAI,EAAE,CAAC;IACxB,CAAC,CAAC,CAAC;IACH,MAAM,aAAa,GACjB,CAAC,CAAC,sDAAsD,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;IACvE,MAAM,aAAa,GAAG,CAAC,CAAC,qCAAqC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;IAE1E,CAAC,CAAC,8CAA8C,CAAC,CAAC,MAAM,EAAE,CAAC;IAC3D,MAAM,WAAW,GAAG,cAAc,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAErD,OAAO;QACL,GAAG;QACH,KAAK;QACL,eAAe;QACf,SAAS;QACT,UAAU;QACV,EAAE,EAAE;YACF,KAAK,EAAE,OAAO;YACd,WAAW,EAAE,aAAa;YAC1B,KAAK,EAAE,OAAO;SACf;QACD,SAAS;QACT,aAAa,EAAE,aAAa,IAAI,SAAS;QACzC,QAAQ,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE;QACpB,MAAM;QACN,aAAa,EAAE;YACb,UAAU;YACV,YAAY;YACZ,aAAa;YACb,aAAa;SACd;QACD,aAAa;QACb,kBAAkB,EAAE,uBAAuB,CAAC,IAAI,CAAC;QACjD,WAAW;QACX,IAAI;KACL,CAAC;AACJ,CAAC"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=parser.test.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parser.test.d.ts","sourceRoot":"","sources":["../src/parser.test.ts"],"names":[],"mappings":""}
@@ -0,0 +1,37 @@
1
+ import { describe, expect, test } from "vitest";
2
+ import { parseHtmlPage } from "./parser.js";
3
+ describe("parseHtmlPage", () => {
4
+ test("extracts title, metadata, headings, and cleaned content text", () => {
5
+ const html = `
6
+ <html>
7
+ <head>
8
+ <title>California LLC Template</title>
9
+ <meta name="description" content="A practical filing guide." />
10
+ <link rel="canonical" href="https://example.dev/templates/california-llc" />
11
+ <meta property="article:published_time" content="2026-04-01" />
12
+ </head>
13
+ <body>
14
+ <header>Global nav should be removed</header>
15
+ <main>
16
+ <h1>California LLC Template</h1>
17
+ <h2>Filing Requirements</h2>
18
+ <p>California has a publication rule in some counties.</p>
19
+ </main>
20
+ <footer>Footer should be removed</footer>
21
+ </body>
22
+ </html>
23
+ `;
24
+ const parsed = parseHtmlPage(html, "https://example.dev/templates/california-llc");
25
+ expect(parsed.url).toBe("https://example.dev/templates/california-llc");
26
+ expect(parsed.title).toBe("California LLC Template");
27
+ expect(parsed.metaDescription).toBe("A practical filing guide.");
28
+ expect(parsed.canonical).toBe("https://example.dev/templates/california-llc");
29
+ expect(parsed.publishedDate).toBe("2026-04-01");
30
+ expect(parsed.headings.h1).toEqual(["California LLC Template"]);
31
+ expect(parsed.headings.h2).toEqual(["Filing Requirements"]);
32
+ expect(parsed.structureSignature).toContain("h1:1");
33
+ expect(parsed.contentText).toContain("California has a publication rule");
34
+ expect(parsed.contentText).not.toContain("Global nav should be removed");
35
+ });
36
+ });
37
+ //# sourceMappingURL=parser.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parser.test.js","sourceRoot":"","sources":["../src/parser.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,QAAQ,CAAC;AAChD,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAE5C,QAAQ,CAAC,eAAe,EAAE,GAAG,EAAE;IAC7B,IAAI,CAAC,8DAA8D,EAAE,GAAG,EAAE;QACxE,MAAM,IAAI,GAAG;;;;;;;;;;;;;;;;;;KAkBZ,CAAC;QAEF,MAAM,MAAM,GAAG,aAAa,CAAC,IAAI,EAAE,8CAA8C,CAAC,CAAC;QAEnF,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,8CAA8C,CAAC,CAAC;QACxE,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,yBAAyB,CAAC,CAAC;QACrD,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;QACjE,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,8CAA8C,CAAC,CAAC;QAC9E,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAChD,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,yBAAyB,CAAC,CAAC,CAAC;QAChE,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC;QAC5D,MAAM,CAAC,MAAM,CAAC,kBAAkB,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;QACpD,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,SAAS,CAAC,mCAAmC,CAAC,CAAC;QAC1E,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,8BAA8B,CAAC,CAAC;IAC3E,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -0,0 +1,15 @@
1
+ export interface RenderOptions {
2
+ browserWsEndpoint?: string;
3
+ concurrency: number;
4
+ timeoutMs: number;
5
+ }
6
+ interface RenderedPage {
7
+ url: string;
8
+ html: string;
9
+ }
10
+ export declare function renderPages(pages: Array<{
11
+ url: string;
12
+ localPath?: string;
13
+ }>, sourceDir: string | null, options: RenderOptions): Promise<RenderedPage[]>;
14
+ export {};
15
+ //# sourceMappingURL=renderer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"renderer.d.ts","sourceRoot":"","sources":["../src/renderer.ts"],"names":[],"mappings":"AAIA,MAAM,WAAW,aAAa;IAC5B,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,UAAU,YAAY;IACpB,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC;CACd;AA0ED,wBAAsB,WAAW,CAC/B,KAAK,EAAE,KAAK,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,SAAS,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC,EACjD,SAAS,EAAE,MAAM,GAAG,IAAI,EACxB,OAAO,EAAE,aAAa,GACrB,OAAO,CAAC,YAAY,EAAE,CAAC,CA8DzB"}
@@ -0,0 +1,124 @@
1
+ import { createServer } from "node:http";
2
+ import { readFile } from "node:fs/promises";
3
+ import { join, extname } from "node:path";
4
+ const MIME_TYPES = {
5
+ ".html": "text/html",
6
+ ".htm": "text/html",
7
+ ".js": "application/javascript",
8
+ ".css": "text/css",
9
+ ".json": "application/json",
10
+ ".png": "image/png",
11
+ ".jpg": "image/jpeg",
12
+ ".svg": "image/svg+xml",
13
+ };
14
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
15
+ async function loadPlaywright() {
16
+ try {
17
+ // @ts-ignore -- playwright-core is an optional peer dependency
18
+ return await import("playwright-core");
19
+ }
20
+ catch {
21
+ throw new Error("--render requires a browser connection.\n" +
22
+ " Option 1: Set PSEOLINT_BROWSER_WS to your CDP endpoint (wss://...)\n" +
23
+ " Option 2: Install playwright-core and Chromium:\n" +
24
+ " npm install playwright-core\n" +
25
+ " npx playwright install chromium");
26
+ }
27
+ }
28
+ function isLocalhost(endpoint) {
29
+ try {
30
+ const parsed = new URL(endpoint);
31
+ return parsed.hostname === "localhost" || parsed.hostname === "127.0.0.1";
32
+ }
33
+ catch {
34
+ return false;
35
+ }
36
+ }
37
+ const WS_SECURE = "wss://";
38
+ const WS_PLAIN = "ws" + "://";
39
+ function validateWsEndpoint(endpoint) {
40
+ if (endpoint.startsWith(WS_SECURE))
41
+ return;
42
+ if (endpoint.startsWith(WS_PLAIN) && isLocalhost(endpoint))
43
+ return;
44
+ throw new Error(`Insecure WebSocket endpoint: ${endpoint}. ` +
45
+ `Remote endpoints must use ${WS_SECURE}. ` +
46
+ `Unencrypted ${WS_PLAIN} is only allowed for localhost.`);
47
+ }
48
+ async function startStaticServer(rootDir) {
49
+ return new Promise((resolve) => {
50
+ const server = createServer(async (req, res) => {
51
+ const urlPath = decodeURIComponent(req.url ?? "/");
52
+ const filePath = join(rootDir, urlPath);
53
+ try {
54
+ const content = await readFile(filePath);
55
+ const ext = extname(filePath);
56
+ res.writeHead(200, { "Content-Type": MIME_TYPES[ext] ?? "application/octet-stream" });
57
+ res.end(content);
58
+ }
59
+ catch {
60
+ res.writeHead(404);
61
+ res.end("Not found");
62
+ }
63
+ });
64
+ server.listen(0, "127.0.0.1", () => {
65
+ const addr = server.address();
66
+ const port = typeof addr === "object" && addr ? addr.port : 0;
67
+ resolve({ port, close: () => server.close() });
68
+ });
69
+ });
70
+ }
71
+ export async function renderPages(pages, sourceDir, options) {
72
+ const pw = await loadPlaywright();
73
+ const endpoint = options.browserWsEndpoint
74
+ ?? process.env.PSEOLINT_BROWSER_WS
75
+ ?? null;
76
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
77
+ let browser;
78
+ if (endpoint) {
79
+ validateWsEndpoint(endpoint);
80
+ browser = await pw.chromium.connectOverCDP(endpoint);
81
+ }
82
+ else {
83
+ browser = await pw.chromium.launch({ headless: true });
84
+ }
85
+ let server = null;
86
+ if (sourceDir) {
87
+ server = await startStaticServer(sourceDir);
88
+ }
89
+ const results = [];
90
+ let index = 0;
91
+ async function processNext() {
92
+ while (index < pages.length) {
93
+ const current = index;
94
+ index += 1;
95
+ const entry = pages[current];
96
+ const page = await browser.newPage();
97
+ let navigateUrl = entry.url;
98
+ if (entry.localPath && server) {
99
+ const relativePath = entry.localPath.replace(/\\/g, "/");
100
+ navigateUrl = `http://127.0.0.1:${server.port}/${relativePath}`;
101
+ }
102
+ try {
103
+ await page.goto(navigateUrl, {
104
+ waitUntil: "networkidle",
105
+ timeout: options.timeoutMs,
106
+ });
107
+ const html = await page.content();
108
+ results.push({ url: entry.url, html });
109
+ }
110
+ catch {
111
+ // Skip pages that fail to render
112
+ }
113
+ finally {
114
+ await page.close();
115
+ }
116
+ }
117
+ }
118
+ const workers = Array.from({ length: Math.min(options.concurrency, pages.length) }, () => processNext());
119
+ await Promise.all(workers);
120
+ server?.close();
121
+ await browser.close();
122
+ return results;
123
+ }
124
+ //# sourceMappingURL=renderer.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"renderer.js","sourceRoot":"","sources":["../src/renderer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC;AACzC,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AAC5C,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAa1C,MAAM,UAAU,GAA2B;IACzC,OAAO,EAAE,WAAW;IACpB,MAAM,EAAE,WAAW;IACnB,KAAK,EAAE,wBAAwB;IAC/B,MAAM,EAAE,UAAU;IAClB,OAAO,EAAE,kBAAkB;IAC3B,MAAM,EAAE,WAAW;IACnB,MAAM,EAAE,YAAY;IACpB,MAAM,EAAE,eAAe;CACxB,CAAC;AAEF,8DAA8D;AAC9D,KAAK,UAAU,cAAc;IAC3B,IAAI,CAAC;QACH,+DAA+D;QAC/D,OAAO,MAAM,MAAM,CAAC,iBAAiB,CAAC,CAAC;IACzC,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,IAAI,KAAK,CACb,2CAA2C;YAC3C,wEAAwE;YACxE,qDAAqD;YACrD,mCAAmC;YACnC,qCAAqC,CACtC,CAAC;IACJ,CAAC;AACH,CAAC;AAED,SAAS,WAAW,CAAC,QAAgB;IACnC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC;QACjC,OAAO,MAAM,CAAC,QAAQ,KAAK,WAAW,IAAI,MAAM,CAAC,QAAQ,KAAK,WAAW,CAAC;IAC5E,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED,MAAM,SAAS,GAAG,QAAQ,CAAC;AAC3B,MAAM,QAAQ,GAAG,IAAI,GAAG,KAAK,CAAC;AAE9B,SAAS,kBAAkB,CAAC,QAAgB;IAC1C,IAAI,QAAQ,CAAC,UAAU,CAAC,SAAS,CAAC;QAAE,OAAO;IAC3C,IAAI,QAAQ,CAAC,UAAU,CAAC,QAAQ,CAAC,IAAI,WAAW,CAAC,QAAQ,CAAC;QAAE,OAAO;IACnE,MAAM,IAAI,KAAK,CACb,gCAAgC,QAAQ,IAAI;QAC5C,6BAA6B,SAAS,IAAI;QAC1C,eAAe,QAAQ,iCAAiC,CACzD,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,iBAAiB,CAAC,OAAe;IAC9C,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;QAC7B,MAAM,MAAM,GAAG,YAAY,CAAC,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,EAAE;YAC7C,MAAM,OAAO,GAAG,kBAAkB,CAAC,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,CAAC;YACnD,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;YACxC,IAAI,CAAC;gBACH,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,QAAQ,CAAC,CAAC;gBACzC,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;gBAC9B,GAAG,CAAC,SAAS,CAAC,GAAG,EAAE,EAAE,cAAc,EAAE,UAAU,CAAC,GAAG,CAAC,IAAI,0BAA0B,EAAE,CAAC,CAAC;gBACtF,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;YACnB,CAAC;YAAC,MAAM,CAAC;gBACP,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;gBACnB,GAAG,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;YACvB,CAAC;QACH,CAAC,CAAC,CAAC;QACH,MAAM,CAAC,MAAM,CAAC,CAAC,EAAE,WAAW,EAAE,GAAG,EAAE;YACjC,MAAM,IAAI,GAAG,MAAM,CAAC,OAAO,EAAE,CAAC;YAC9B,MAAM,IAAI,GAAG,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9D,OAAO,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,MAAM,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QACjD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,KAAiD,EACjD,SAAwB,EACxB,OAAsB;IAEtB,MAAM,EAAE,GAAG,MAAM,cAAc,EAAE,CAAC;IAElC,MAAM,QAAQ,GAAG,OAAO,CAAC,iBAAiB;WACrC,OAAO,CAAC,GAAG,CAAC,mBAAmB;WAC/B,IAAI,CAAC;IAEV,8DAA8D;IAC9D,IAAI,OAAY,CAAC;IACjB,IAAI,QAAQ,EAAE,CAAC;QACb,kBAAkB,CAAC,QAAQ,CAAC,CAAC;QAC7B,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;IACvD,CAAC;SAAM,CAAC;QACN,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;IACzD,CAAC;IAED,IAAI,MAAM,GAA+C,IAAI,CAAC;IAC9D,IAAI,SAAS,EAAE,CAAC;QACd,MAAM,GAAG,MAAM,iBAAiB,CAAC,SAAS,CAAC,CAAC;IAC9C,CAAC;IAED,MAAM,OAAO,GAAmB,EAAE,CAAC;IACnC,IAAI,KAAK,GAAG,CAAC,CAAC;IAEd,KAAK,UAAU,WAAW;QACxB,OAAO,KAAK,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;YAC5B,MAAM,OAAO,GAAG,KAAK,CAAC;YACtB,KAAK,IAAI,CAAC,CAAC;YACX,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,CAAC;YAC7B,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;YAErC,IAAI,WAAW,GAAG,KAAK,CAAC,GAAG,CAAC;YAC5B,IAAI,KAAK,CAAC,SAAS,IAAI,MAAM,EAAE,CAAC;gBAC9B,MAAM,YAAY,GAAG,KAAK,CAAC,SAAS,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;gBACzD,WAAW,GAAG,oBAAoB,MAAM,CAAC,IAAI,IAAI,YAAY,EAAE,CAAC;YAClE,CAAC;YAED,IAAI,CAAC;gBACH,MAAM,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE;oBAC3B,SAAS,EAAE,aAAa;oBACxB,OAAO,EAAE,OAAO,CAAC,SAAS;iBAC3B,CAAC,CAAC;gBACH,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;gBAClC,OAAO,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,KAAK,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;YACzC,CAAC;YAAC,MAAM,CAAC;gBACP,iCAAiC;YACnC,CAAC;oBAAS,CAAC;gBACT,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;YACrB,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CACxB,EAAE,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,WAAW,EAAE,KAAK,CAAC,MAAM,CAAC,EAAE,EACvD,GAAG,EAAE,CAAC,WAAW,EAAE,CACpB,CAAC;IACF,MAAM,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IAE3B,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;IAEtB,OAAO,OAAO,CAAC;AACjB,CAAC"}
@@ -0,0 +1,2 @@
1
+ export declare const RULE_REFERENCES: Record<string, string>;
2
+ //# sourceMappingURL=rule-references.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"rule-references.d.ts","sourceRoot":"","sources":["../src/rule-references.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAiClD,CAAC"}
@@ -0,0 +1,35 @@
1
+ export const RULE_REFERENCES = {
2
+ "spam/near-duplicate": "https://developers.google.com/search/docs/essentials/spam-policies#scraped-content",
3
+ "spam/entity-swap": "https://developers.google.com/search/docs/essentials/spam-policies#doorway-pages",
4
+ "spam/thin-content": "https://developers.google.com/search/docs/essentials/spam-policies#thin-content",
5
+ "spam/boilerplate-ratio": "https://developers.google.com/search/docs/essentials/spam-policies#thin-content",
6
+ "spam/template-diversity": "https://developers.google.com/search/docs/essentials/spam-policies#doorway-pages",
7
+ "spam/publication-velocity": "https://developers.google.com/search/docs/essentials/spam-policies#spammy-auto-generated-content",
8
+ "spam/doorway-pattern": "https://developers.google.com/search/docs/essentials/spam-policies#doorway-pages",
9
+ "spam/template-coverage": "https://developers.google.com/search/docs/essentials/spam-policies#doorway-pages",
10
+ "content/unique-value": "https://developers.google.com/search/docs/fundamentals/creating-helpful-content",
11
+ "content/meta-uniqueness": "https://developers.google.com/search/docs/appearance/snippet#meta-descriptions",
12
+ "content/heading-uniqueness": "https://developers.google.com/search/docs/appearance/snippet#headings",
13
+ "content/missing-author": "https://developers.google.com/search/docs/fundamentals/creating-helpful-content#eeat",
14
+ "content/eeat-signals": "https://developers.google.com/search/docs/fundamentals/creating-helpful-content#eeat",
15
+ "links/orphan-pages": "https://developers.google.com/search/docs/crawling-indexing/links-crawlable",
16
+ "links/dead-ends": "https://developers.google.com/search/docs/crawling-indexing/links-crawlable",
17
+ "links/cluster-connectivity": "https://developers.google.com/search/docs/crawling-indexing/links-crawlable",
18
+ "links/hub-pages": "https://developers.google.com/search/docs/crawling-indexing/links-crawlable",
19
+ "links/link-depth": "https://developers.google.com/search/docs/crawling-indexing/links-crawlable",
20
+ "tech/canonical-consistency": "https://developers.google.com/search/docs/crawling-indexing/consolidate-duplicate-urls",
21
+ "tech/canonical-noindex-conflict": "https://developers.google.com/search/docs/crawling-indexing/consolidate-duplicate-urls",
22
+ "tech/robots-noindex-conflict": "https://developers.google.com/search/docs/crawling-indexing/block-indexing",
23
+ "tech/sitemap-completeness": "https://developers.google.com/search/docs/crawling-indexing/sitemaps/overview",
24
+ "tech/redirect-chain": "https://developers.google.com/search/docs/crawling-indexing/301-redirects",
25
+ "tech/soft-404": "https://developers.google.com/search/docs/crawling-indexing/soft-404-errors",
26
+ "tech/og-completeness": "https://developers.google.com/search/docs/appearance/snippet",
27
+ "tech/hreflang-consistency": "https://developers.google.com/search/docs/specialty/international/managing-multi-regional-sites",
28
+ "schema/json-ld-valid": "https://developers.google.com/search/docs/appearance/structured-data/intro-structured-data",
29
+ "schema/required-fields": "https://developers.google.com/search/docs/appearance/structured-data/intro-structured-data",
30
+ "schema/consistency": "https://developers.google.com/search/docs/appearance/structured-data/intro-structured-data",
31
+ "cannibal/title-overlap": "https://developers.google.com/search/docs/crawling-indexing/consolidate-duplicate-urls",
32
+ "cannibal/keyword-collision": "https://developers.google.com/search/docs/crawling-indexing/consolidate-duplicate-urls",
33
+ "cannibal/url-pattern": "https://developers.google.com/search/docs/crawling-indexing/consolidate-duplicate-urls",
34
+ };
35
+ //# sourceMappingURL=rule-references.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"rule-references.js","sourceRoot":"","sources":["../src/rule-references.ts"],"names":[],"mappings":"AAAA,MAAM,CAAC,MAAM,eAAe,GAA2B;IACrD,qBAAqB,EAAE,oFAAoF;IAC3G,kBAAkB,EAAE,kFAAkF;IACtG,mBAAmB,EAAE,iFAAiF;IACtG,wBAAwB,EAAE,iFAAiF;IAC3G,yBAAyB,EAAE,kFAAkF;IAC7G,2BAA2B,EAAE,kGAAkG;IAC/H,sBAAsB,EAAE,kFAAkF;IAC1G,wBAAwB,EAAE,kFAAkF;IAC5G,sBAAsB,EAAE,iFAAiF;IACzG,yBAAyB,EAAE,gFAAgF;IAC3G,4BAA4B,EAAE,uEAAuE;IACrG,wBAAwB,EAAE,sFAAsF;IAChH,sBAAsB,EAAE,sFAAsF;IAC9G,oBAAoB,EAAE,6EAA6E;IACnG,iBAAiB,EAAE,6EAA6E;IAChG,4BAA4B,EAAE,6EAA6E;IAC3G,iBAAiB,EAAE,6EAA6E;IAChG,kBAAkB,EAAE,6EAA6E;IACjG,4BAA4B,EAAE,wFAAwF;IACtH,iCAAiC,EAAE,wFAAwF;IAC3H,8BAA8B,EAAE,4EAA4E;IAC5G,2BAA2B,EAAE,+EAA+E;IAC5G,qBAAqB,EAAE,2EAA2E;IAClG,eAAe,EAAE,6EAA6E;IAC9F,sBAAsB,EAAE,8DAA8D;IACtF,2BAA2B,EAAE,iGAAiG;IAC9H,sBAAsB,EAAE,4FAA4F;IACpH,wBAAwB,EAAE,4FAA4F;IACtH,oBAAoB,EAAE,4FAA4F;IAClH,wBAAwB,EAAE,wFAAwF;IAClH,4BAA4B,EAAE,wFAAwF;IACtH,sBAAsB,EAAE,wFAAwF;CACjH,CAAC"}
@@ -0,0 +1,3 @@
1
+ import type { ParsedPage, RuleResult } from "../../types.js";
2
+ export declare function keywordCollisionRule(pages: ParsedPage[], minShared?: number): RuleResult[];
3
+ //# sourceMappingURL=keyword-collision.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"keyword-collision.d.ts","sourceRoot":"","sources":["../../../src/rules/cannibal/keyword-collision.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,oBAAoB,CAClC,KAAK,EAAE,UAAU,EAAE,EACnB,SAAS,GAAE,MAAU,GACpB,UAAU,EAAE,CAwBd"}