@pseolint/core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (223) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +53 -0
  3. package/dist/algorithms/entity-mask.d.ts +3 -0
  4. package/dist/algorithms/entity-mask.d.ts.map +1 -0
  5. package/dist/algorithms/entity-mask.js +8 -0
  6. package/dist/algorithms/entity-mask.js.map +1 -0
  7. package/dist/algorithms/entity-mask.test.d.ts +2 -0
  8. package/dist/algorithms/entity-mask.test.d.ts.map +1 -0
  9. package/dist/algorithms/entity-mask.test.js +23 -0
  10. package/dist/algorithms/entity-mask.test.js.map +1 -0
  11. package/dist/algorithms/simhash.d.ts +4 -0
  12. package/dist/algorithms/simhash.d.ts.map +1 -0
  13. package/dist/algorithms/simhash.js +64 -0
  14. package/dist/algorithms/simhash.js.map +1 -0
  15. package/dist/algorithms/simhash.test.d.ts +2 -0
  16. package/dist/algorithms/simhash.test.d.ts.map +1 -0
  17. package/dist/algorithms/simhash.test.js +23 -0
  18. package/dist/algorithms/simhash.test.js.map +1 -0
  19. package/dist/algorithms/tf-idf.d.ts +8 -0
  20. package/dist/algorithms/tf-idf.d.ts.map +1 -0
  21. package/dist/algorithms/tf-idf.js +55 -0
  22. package/dist/algorithms/tf-idf.js.map +1 -0
  23. package/dist/auditor.d.ts +3 -0
  24. package/dist/auditor.d.ts.map +1 -0
  25. package/dist/auditor.js +730 -0
  26. package/dist/auditor.js.map +1 -0
  27. package/dist/auditor.test.d.ts +2 -0
  28. package/dist/auditor.test.d.ts.map +1 -0
  29. package/dist/auditor.test.js +134 -0
  30. package/dist/auditor.test.js.map +1 -0
  31. package/dist/enrich-findings.d.ts +9 -0
  32. package/dist/enrich-findings.d.ts.map +1 -0
  33. package/dist/enrich-findings.js +436 -0
  34. package/dist/enrich-findings.js.map +1 -0
  35. package/dist/formatters/console.d.ts +6 -0
  36. package/dist/formatters/console.d.ts.map +1 -0
  37. package/dist/formatters/console.js +237 -0
  38. package/dist/formatters/console.js.map +1 -0
  39. package/dist/formatters/html.d.ts +3 -0
  40. package/dist/formatters/html.d.ts.map +1 -0
  41. package/dist/formatters/html.js +170 -0
  42. package/dist/formatters/html.js.map +1 -0
  43. package/dist/formatters/index.d.ts +6 -0
  44. package/dist/formatters/index.d.ts.map +1 -0
  45. package/dist/formatters/index.js +5 -0
  46. package/dist/formatters/index.js.map +1 -0
  47. package/dist/formatters/json.d.ts +3 -0
  48. package/dist/formatters/json.d.ts.map +1 -0
  49. package/dist/formatters/json.js +4 -0
  50. package/dist/formatters/json.js.map +1 -0
  51. package/dist/formatters/markdown.d.ts +3 -0
  52. package/dist/formatters/markdown.d.ts.map +1 -0
  53. package/dist/formatters/markdown.js +93 -0
  54. package/dist/formatters/markdown.js.map +1 -0
  55. package/dist/index.d.ts +45 -0
  56. package/dist/index.d.ts.map +1 -0
  57. package/dist/index.js +45 -0
  58. package/dist/index.js.map +1 -0
  59. package/dist/page-classifier.d.ts +4 -0
  60. package/dist/page-classifier.d.ts.map +1 -0
  61. package/dist/page-classifier.js +133 -0
  62. package/dist/page-classifier.js.map +1 -0
  63. package/dist/parser.d.ts +3 -0
  64. package/dist/parser.d.ts.map +1 -0
  65. package/dist/parser.js +131 -0
  66. package/dist/parser.js.map +1 -0
  67. package/dist/parser.test.d.ts +2 -0
  68. package/dist/parser.test.d.ts.map +1 -0
  69. package/dist/parser.test.js +37 -0
  70. package/dist/parser.test.js.map +1 -0
  71. package/dist/renderer.d.ts +15 -0
  72. package/dist/renderer.d.ts.map +1 -0
  73. package/dist/renderer.js +124 -0
  74. package/dist/renderer.js.map +1 -0
  75. package/dist/rule-references.d.ts +2 -0
  76. package/dist/rule-references.d.ts.map +1 -0
  77. package/dist/rule-references.js +35 -0
  78. package/dist/rule-references.js.map +1 -0
  79. package/dist/rules/cannibal/keyword-collision.d.ts +3 -0
  80. package/dist/rules/cannibal/keyword-collision.d.ts.map +1 -0
  81. package/dist/rules/cannibal/keyword-collision.js +25 -0
  82. package/dist/rules/cannibal/keyword-collision.js.map +1 -0
  83. package/dist/rules/cannibal/title-overlap.d.ts +3 -0
  84. package/dist/rules/cannibal/title-overlap.d.ts.map +1 -0
  85. package/dist/rules/cannibal/title-overlap.js +43 -0
  86. package/dist/rules/cannibal/title-overlap.js.map +1 -0
  87. package/dist/rules/cannibal/url-pattern.d.ts +3 -0
  88. package/dist/rules/cannibal/url-pattern.d.ts.map +1 -0
  89. package/dist/rules/cannibal/url-pattern.js +48 -0
  90. package/dist/rules/cannibal/url-pattern.js.map +1 -0
  91. package/dist/rules/content/eeat-signals.d.ts +3 -0
  92. package/dist/rules/content/eeat-signals.d.ts.map +1 -0
  93. package/dist/rules/content/eeat-signals.js +46 -0
  94. package/dist/rules/content/eeat-signals.js.map +1 -0
  95. package/dist/rules/content/heading-uniqueness.d.ts +3 -0
  96. package/dist/rules/content/heading-uniqueness.d.ts.map +1 -0
  97. package/dist/rules/content/heading-uniqueness.js +56 -0
  98. package/dist/rules/content/heading-uniqueness.js.map +1 -0
  99. package/dist/rules/content/meta-uniqueness.d.ts +3 -0
  100. package/dist/rules/content/meta-uniqueness.d.ts.map +1 -0
  101. package/dist/rules/content/meta-uniqueness.js +28 -0
  102. package/dist/rules/content/meta-uniqueness.js.map +1 -0
  103. package/dist/rules/content/missing-author.d.ts +3 -0
  104. package/dist/rules/content/missing-author.d.ts.map +1 -0
  105. package/dist/rules/content/missing-author.js +26 -0
  106. package/dist/rules/content/missing-author.js.map +1 -0
  107. package/dist/rules/content/unique-value.d.ts +3 -0
  108. package/dist/rules/content/unique-value.d.ts.map +1 -0
  109. package/dist/rules/content/unique-value.js +26 -0
  110. package/dist/rules/content/unique-value.js.map +1 -0
  111. package/dist/rules/links/cluster-connectivity.d.ts +7 -0
  112. package/dist/rules/links/cluster-connectivity.d.ts.map +1 -0
  113. package/dist/rules/links/cluster-connectivity.js +73 -0
  114. package/dist/rules/links/cluster-connectivity.js.map +1 -0
  115. package/dist/rules/links/cluster-key.d.ts +3 -0
  116. package/dist/rules/links/cluster-key.d.ts.map +1 -0
  117. package/dist/rules/links/cluster-key.js +22 -0
  118. package/dist/rules/links/cluster-key.js.map +1 -0
  119. package/dist/rules/links/dead-ends.d.ts +3 -0
  120. package/dist/rules/links/dead-ends.d.ts.map +1 -0
  121. package/dist/rules/links/dead-ends.js +13 -0
  122. package/dist/rules/links/dead-ends.js.map +1 -0
  123. package/dist/rules/links/hub-pages.d.ts +7 -0
  124. package/dist/rules/links/hub-pages.d.ts.map +1 -0
  125. package/dist/rules/links/hub-pages.js +73 -0
  126. package/dist/rules/links/hub-pages.js.map +1 -0
  127. package/dist/rules/links/link-depth.d.ts +3 -0
  128. package/dist/rules/links/link-depth.d.ts.map +1 -0
  129. package/dist/rules/links/link-depth.js +46 -0
  130. package/dist/rules/links/link-depth.js.map +1 -0
  131. package/dist/rules/links/orphan-pages.d.ts +3 -0
  132. package/dist/rules/links/orphan-pages.d.ts.map +1 -0
  133. package/dist/rules/links/orphan-pages.js +19 -0
  134. package/dist/rules/links/orphan-pages.js.map +1 -0
  135. package/dist/rules/schema/consistency.d.ts +3 -0
  136. package/dist/rules/schema/consistency.d.ts.map +1 -0
  137. package/dist/rules/schema/consistency.js +44 -0
  138. package/dist/rules/schema/consistency.js.map +1 -0
  139. package/dist/rules/schema/json-ld-valid.d.ts +3 -0
  140. package/dist/rules/schema/json-ld-valid.d.ts.map +1 -0
  141. package/dist/rules/schema/json-ld-valid.js +47 -0
  142. package/dist/rules/schema/json-ld-valid.js.map +1 -0
  143. package/dist/rules/schema/required-fields.d.ts +3 -0
  144. package/dist/rules/schema/required-fields.d.ts.map +1 -0
  145. package/dist/rules/schema/required-fields.js +60 -0
  146. package/dist/rules/schema/required-fields.js.map +1 -0
  147. package/dist/rules/spam/boilerplate-ratio.d.ts +3 -0
  148. package/dist/rules/spam/boilerplate-ratio.d.ts.map +1 -0
  149. package/dist/rules/spam/boilerplate-ratio.js +50 -0
  150. package/dist/rules/spam/boilerplate-ratio.js.map +1 -0
  151. package/dist/rules/spam/doorway-pattern.d.ts +4 -0
  152. package/dist/rules/spam/doorway-pattern.d.ts.map +1 -0
  153. package/dist/rules/spam/doorway-pattern.js +47 -0
  154. package/dist/rules/spam/doorway-pattern.js.map +1 -0
  155. package/dist/rules/spam/entity-swap.d.ts +7 -0
  156. package/dist/rules/spam/entity-swap.d.ts.map +1 -0
  157. package/dist/rules/spam/entity-swap.js +26 -0
  158. package/dist/rules/spam/entity-swap.js.map +1 -0
  159. package/dist/rules/spam/near-duplicate.d.ts +11 -0
  160. package/dist/rules/spam/near-duplicate.d.ts.map +1 -0
  161. package/dist/rules/spam/near-duplicate.js +25 -0
  162. package/dist/rules/spam/near-duplicate.js.map +1 -0
  163. package/dist/rules/spam/publication-velocity.d.ts +3 -0
  164. package/dist/rules/spam/publication-velocity.d.ts.map +1 -0
  165. package/dist/rules/spam/publication-velocity.js +25 -0
  166. package/dist/rules/spam/publication-velocity.js.map +1 -0
  167. package/dist/rules/spam/template-coverage.d.ts +3 -0
  168. package/dist/rules/spam/template-coverage.d.ts.map +1 -0
  169. package/dist/rules/spam/template-coverage.js +87 -0
  170. package/dist/rules/spam/template-coverage.js.map +1 -0
  171. package/dist/rules/spam/template-diversity.d.ts +3 -0
  172. package/dist/rules/spam/template-diversity.d.ts.map +1 -0
  173. package/dist/rules/spam/template-diversity.js +19 -0
  174. package/dist/rules/spam/template-diversity.js.map +1 -0
  175. package/dist/rules/spam/thin-content.d.ts +6 -0
  176. package/dist/rules/spam/thin-content.d.ts.map +1 -0
  177. package/dist/rules/spam/thin-content.js +22 -0
  178. package/dist/rules/spam/thin-content.js.map +1 -0
  179. package/dist/rules/tech/canonical-consistency.d.ts +4 -0
  180. package/dist/rules/tech/canonical-consistency.d.ts.map +1 -0
  181. package/dist/rules/tech/canonical-consistency.js +78 -0
  182. package/dist/rules/tech/canonical-consistency.js.map +1 -0
  183. package/dist/rules/tech/canonical-noindex-conflict.d.ts +3 -0
  184. package/dist/rules/tech/canonical-noindex-conflict.d.ts.map +1 -0
  185. package/dist/rules/tech/canonical-noindex-conflict.js +27 -0
  186. package/dist/rules/tech/canonical-noindex-conflict.js.map +1 -0
  187. package/dist/rules/tech/hreflang-consistency.d.ts +3 -0
  188. package/dist/rules/tech/hreflang-consistency.d.ts.map +1 -0
  189. package/dist/rules/tech/hreflang-consistency.js +99 -0
  190. package/dist/rules/tech/hreflang-consistency.js.map +1 -0
  191. package/dist/rules/tech/og-completeness.d.ts +3 -0
  192. package/dist/rules/tech/og-completeness.d.ts.map +1 -0
  193. package/dist/rules/tech/og-completeness.js +35 -0
  194. package/dist/rules/tech/og-completeness.js.map +1 -0
  195. package/dist/rules/tech/redirect-chain.d.ts +3 -0
  196. package/dist/rules/tech/redirect-chain.d.ts.map +1 -0
  197. package/dist/rules/tech/redirect-chain.js +20 -0
  198. package/dist/rules/tech/redirect-chain.js.map +1 -0
  199. package/dist/rules/tech/robots-noindex-conflict.d.ts +3 -0
  200. package/dist/rules/tech/robots-noindex-conflict.d.ts.map +1 -0
  201. package/dist/rules/tech/robots-noindex-conflict.js +30 -0
  202. package/dist/rules/tech/robots-noindex-conflict.js.map +1 -0
  203. package/dist/rules/tech/robots-sitemap-presence.d.ts +3 -0
  204. package/dist/rules/tech/robots-sitemap-presence.d.ts.map +1 -0
  205. package/dist/rules/tech/robots-sitemap-presence.js +61 -0
  206. package/dist/rules/tech/robots-sitemap-presence.js.map +1 -0
  207. package/dist/rules/tech/sitemap-completeness.d.ts +3 -0
  208. package/dist/rules/tech/sitemap-completeness.d.ts.map +1 -0
  209. package/dist/rules/tech/sitemap-completeness.js +40 -0
  210. package/dist/rules/tech/sitemap-completeness.js.map +1 -0
  211. package/dist/rules/tech/soft-404.d.ts +3 -0
  212. package/dist/rules/tech/soft-404.d.ts.map +1 -0
  213. package/dist/rules/tech/soft-404.js +24 -0
  214. package/dist/rules/tech/soft-404.js.map +1 -0
  215. package/dist/types.d.ts +170 -0
  216. package/dist/types.d.ts.map +1 -0
  217. package/dist/types.js +2 -0
  218. package/dist/types.js.map +1 -0
  219. package/dist/url-normalize.d.ts +10 -0
  220. package/dist/url-normalize.d.ts.map +1 -0
  221. package/dist/url-normalize.js +52 -0
  222. package/dist/url-normalize.js.map +1 -0
  223. package/package.json +46 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 ouranos-labs
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,53 @@
1
+ # @pseolint/core
2
+
3
+ > Programmatic SEO audit engine — 34 rules across 6 categories for SpamBrain risk detection.
4
+
5
+ The core engine behind [pseolint](https://www.npmjs.com/package/pseolint). Use this package to integrate pSEO auditing into your own tools.
6
+
7
+ ## Install
8
+
9
+ ```bash
10
+ npm install @pseolint/core
11
+ ```
12
+
13
+ ## Usage
14
+
15
+ ```typescript
16
+ import { auditSource } from "@pseolint/core";
17
+
18
+ const summary = await auditSource("./out");
19
+ console.log(`Score: ${summary.score}/100`);
20
+ console.log(`Findings: ${summary.findings.length}`);
21
+ ```
22
+
23
+ ## What It Checks
24
+
25
+ 34 rules across 6 categories:
26
+
27
+ - **SpamBrain Risk** — near-duplicate detection (SimHash), entity-swap doorway pages, thin content, boilerplate ratio, template diversity
28
+ - **Content Quality** — unique value per page, heading/meta uniqueness, author attribution, E-E-A-T signals
29
+ - **Internal Linking** — orphan pages, dead ends, cluster connectivity, hub pages, link depth
30
+ - **Technical SEO** — canonical consistency, sitemap completeness, soft 404s, redirect chains, hreflang, Open Graph
31
+ - **Structured Data** — JSON-LD validation, required fields, schema consistency
32
+ - **Cannibalization** — title overlap, keyword collision, URL pattern conflicts
33
+
34
+ ## API
35
+
36
+ ### `auditSource(source, options?)`
37
+
38
+ Audits a directory path or URL. Returns an `AuditSummary` with score, category scores, and enriched findings.
39
+
40
+ ### Formatters
41
+
42
+ ```typescript
43
+ import { formatConsole, formatJson, formatMarkdown, formatHtml } from "@pseolint/core";
44
+
45
+ const output = formatConsole(summary);
46
+ const json = formatJson(summary);
47
+ const md = formatMarkdown(summary);
48
+ const html = formatHtml(summary);
49
+ ```
50
+
51
+ ## License
52
+
53
+ MIT
@@ -0,0 +1,3 @@
1
+ import type { EntityMaskPattern } from "../types.js";
2
+ export declare function maskEntities(input: string, patterns: EntityMaskPattern[]): string;
3
+ //# sourceMappingURL=entity-mask.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"entity-mask.d.ts","sourceRoot":"","sources":["../../src/algorithms/entity-mask.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAErD,wBAAgB,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,iBAAiB,EAAE,GAAG,MAAM,CAMjF"}
@@ -0,0 +1,8 @@
1
+ export function maskEntities(input, patterns) {
2
+ return patterns.reduce((text, entry) => {
3
+ const flags = entry.pattern.flags.includes("g") ? entry.pattern.flags : `${entry.pattern.flags}g`;
4
+ const safePattern = new RegExp(entry.pattern.source, flags);
5
+ return text.replace(safePattern, entry.placeholder);
6
+ }, input);
7
+ }
8
+ //# sourceMappingURL=entity-mask.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"entity-mask.js","sourceRoot":"","sources":["../../src/algorithms/entity-mask.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,YAAY,CAAC,KAAa,EAAE,QAA6B;IACvE,OAAO,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;QACrC,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,GAAG,CAAC;QAClG,MAAM,WAAW,GAAG,IAAI,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC;QAC5D,OAAO,IAAI,CAAC,OAAO,CAAC,WAAW,EAAE,KAAK,CAAC,WAAW,CAAC,CAAC;IACtD,CAAC,EAAE,KAAK,CAAC,CAAC;AACZ,CAAC"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=entity-mask.test.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"entity-mask.test.d.ts","sourceRoot":"","sources":["../../src/algorithms/entity-mask.test.ts"],"names":[],"mappings":""}
@@ -0,0 +1,23 @@
1
+ import { describe, expect, test } from "vitest";
2
+ import { maskEntities } from "./entity-mask.js";
3
+ describe("entity masking", () => {
4
+ test("replaces matched entities with placeholders", () => {
5
+ const input = "Form your California LLC in Los Angeles with zip 90210.";
6
+ const output = maskEntities(input, [
7
+ { placeholder: "[STATE]", pattern: /\bCalifornia\b/gi },
8
+ { placeholder: "[CITY]", pattern: /\bLos Angeles\b/gi },
9
+ { placeholder: "[ZIP]", pattern: /\b\d{5}\b/g }
10
+ ]);
11
+ expect(output).toContain("[STATE]");
12
+ expect(output).toContain("[CITY]");
13
+ expect(output).toContain("[ZIP]");
14
+ expect(output).not.toContain("California");
15
+ expect(output).not.toContain("90210");
16
+ });
17
+ test("replaces all occurrences even if regex is not global", () => {
18
+ const input = "California LLC filing in California has California-specific rules.";
19
+ const output = maskEntities(input, [{ placeholder: "[STATE]", pattern: /\bCalifornia\b/i }]);
20
+ expect(output).toBe("[STATE] LLC filing in [STATE] has [STATE]-specific rules.");
21
+ });
22
+ });
23
+ //# sourceMappingURL=entity-mask.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"entity-mask.test.js","sourceRoot":"","sources":["../../src/algorithms/entity-mask.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,QAAQ,CAAC;AAChD,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAEhD,QAAQ,CAAC,gBAAgB,EAAE,GAAG,EAAE;IAC9B,IAAI,CAAC,6CAA6C,EAAE,GAAG,EAAE;QACvD,MAAM,KAAK,GAAG,yDAAyD,CAAC;QACxE,MAAM,MAAM,GAAG,YAAY,CAAC,KAAK,EAAE;YACjC,EAAE,WAAW,EAAE,SAAS,EAAE,OAAO,EAAE,kBAAkB,EAAE;YACvD,EAAE,WAAW,EAAE,QAAQ,EAAE,OAAO,EAAE,mBAAmB,EAAE;YACvD,EAAE,WAAW,EAAE,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE;SAChD,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC;QACpC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;QACnC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;QAClC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;QAC3C,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;IACxC,CAAC,CAAC,CAAC;IAEH,IAAI,CAAC,sDAAsD,EAAE,GAAG,EAAE;QAChE,MAAM,KAAK,GAAG,oEAAoE,CAAC;QACnF,MAAM,MAAM,GAAG,YAAY,CAAC,KAAK,EAAE,CAAC,EAAE,WAAW,EAAE,SAAS,EAAE,OAAO,EAAE,iBAAiB,EAAE,CAAC,CAAC,CAAC;QAE7F,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,2DAA2D,CAAC,CAAC;IACnF,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -0,0 +1,4 @@
1
+ export declare function simHashFromText(input: string): bigint;
2
+ export declare function hammingDistance(left: bigint, right: bigint): number;
3
+ export declare function similarityFromDistance(distance: number): number;
4
+ //# sourceMappingURL=simhash.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"simhash.d.ts","sourceRoot":"","sources":["../../src/algorithms/simhash.ts"],"names":[],"mappings":"AAoCA,wBAAgB,eAAe,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAwBrD;AAED,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,MAAM,CAUnE;AAED,wBAAgB,sBAAsB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAE/D"}
@@ -0,0 +1,64 @@
1
+ const SHINGLE_SIZE = 3;
2
+ const SIMHASH_BITS = 64;
3
+ const MAX_64 = (1n << 64n) - 1n;
4
+ function normalizeText(input) {
5
+ return input
6
+ .toLowerCase()
7
+ .replace(/[^\p{L}\p{N}\s]+/gu, " ")
8
+ .split(/\s+/)
9
+ .filter(Boolean);
10
+ }
11
+ function buildShingles(tokens) {
12
+ if (tokens.length < SHINGLE_SIZE) {
13
+ return [tokens.join(" ")];
14
+ }
15
+ const shingles = [];
16
+ for (let i = 0; i <= tokens.length - SHINGLE_SIZE; i += 1) {
17
+ shingles.push(tokens.slice(i, i + SHINGLE_SIZE).join(" "));
18
+ }
19
+ return shingles;
20
+ }
21
+ function fnv1a64(value) {
22
+ let hash = 0xcbf29ce484222325n;
23
+ const prime = 0x100000001b3n;
24
+ for (const char of value) {
25
+ hash ^= BigInt(char.codePointAt(0) ?? 0);
26
+ hash = (hash * prime) & MAX_64;
27
+ }
28
+ return hash;
29
+ }
30
+ export function simHashFromText(input) {
31
+ const tokens = normalizeText(input);
32
+ if (tokens.length === 0) {
33
+ return 0n;
34
+ }
35
+ const shingles = buildShingles(tokens);
36
+ const bitWeights = Array.from({ length: SIMHASH_BITS }, () => 0);
37
+ for (const shingle of shingles) {
38
+ const hash = fnv1a64(shingle);
39
+ for (let bit = 0; bit < SIMHASH_BITS; bit += 1) {
40
+ const isSet = (hash & (1n << BigInt(bit))) !== 0n;
41
+ bitWeights[bit] += isSet ? 1 : -1;
42
+ }
43
+ }
44
+ let fingerprint = 0n;
45
+ for (let bit = 0; bit < SIMHASH_BITS; bit += 1) {
46
+ if (bitWeights[bit] > 0) {
47
+ fingerprint |= 1n << BigInt(bit);
48
+ }
49
+ }
50
+ return fingerprint;
51
+ }
52
+ export function hammingDistance(left, right) {
53
+ let diff = left ^ right;
54
+ let count = 0;
55
+ while (diff !== 0n) {
56
+ count += Number(diff & 1n);
57
+ diff >>= 1n;
58
+ }
59
+ return count;
60
+ }
61
+ export function similarityFromDistance(distance) {
62
+ return Math.max(0, (SIMHASH_BITS - distance) / SIMHASH_BITS);
63
+ }
64
+ //# sourceMappingURL=simhash.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"simhash.js","sourceRoot":"","sources":["../../src/algorithms/simhash.ts"],"names":[],"mappings":"AAAA,MAAM,YAAY,GAAG,CAAC,CAAC;AACvB,MAAM,YAAY,GAAG,EAAE,CAAC;AACxB,MAAM,MAAM,GAAG,CAAC,EAAE,IAAI,GAAG,CAAC,GAAG,EAAE,CAAC;AAEhC,SAAS,aAAa,CAAC,KAAa;IAClC,OAAO,KAAK;SACT,WAAW,EAAE;SACb,OAAO,CAAC,oBAAoB,EAAE,GAAG,CAAC;SAClC,KAAK,CAAC,KAAK,CAAC;SACZ,MAAM,CAAC,OAAO,CAAC,CAAC;AACrB,CAAC;AAED,SAAS,aAAa,CAAC,MAAgB;IACrC,IAAI,MAAM,CAAC,MAAM,GAAG,YAAY,EAAE,CAAC;QACjC,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IAC5B,CAAC;IAED,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,MAAM,CAAC,MAAM,GAAG,YAAY,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QAC1D,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,YAAY,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IAC7D,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,SAAS,OAAO,CAAC,KAAa;IAC5B,IAAI,IAAI,GAAG,mBAAmB,CAAC;IAC/B,MAAM,KAAK,GAAG,cAAc,CAAC;IAE7B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,IAAI,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;QACzC,IAAI,GAAG,CAAC,IAAI,GAAG,KAAK,CAAC,GAAG,MAAM,CAAC;IACjC,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,KAAa;IAC3C,MAAM,MAAM,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC;IACpC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,EAAE,CAAC;IACZ,CAAC;IACD,MAAM,QAAQ,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC;IACvC,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,YAAY,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;IAEjE,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,MAAM,IAAI,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;QAC9B,KAAK,IAAI,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,YAAY,EAAE,GAAG,IAAI,CAAC,EAAE,CAAC;YAC/C,MAAM,KAAK,GAAG,CAAC,IAAI,GAAG,CAAC,EAAE,IAAI,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC;YAClD,UAAU,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACpC,CAAC;IACH,CAAC;IAED,IAAI,WAAW,GAAG,EAAE,CAAC;IACrB,KAAK,IAAI,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,YAAY,EAAE,GAAG,IAAI,CAAC,EAAE,CAAC;QAC/C,IAAI,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YACxB,WAAW,IAAI,EAAE,IAAI,MAAM,CAAC,GAAG,CAAC,CAAC;QACnC,CAAC;IACH,CAAC;IAED,OAAO,WAAW,CAAC;AACrB,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,IAAY,EAAE,KAAa;IACzD,IAAI,IAAI,GAAG,IAAI,GAAG,KAAK,CAAC;IACxB,IAAI,KAAK,GAAG,CAAC,CAAC;IAEd,OAAO,IAAI,KAAK,EAAE,EAAE,CAAC;QACnB,KAAK,IAAI,MAAM,CAAC,IAAI,GAAG,EAAE,CAAC,CAAC;QAC3B,IAAI,KAAK,EAAE,CAAC;IACd,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,sBAAsB,CAAC,QAAgB;IACrD,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,YAAY,GAAG,QAAQ,CAAC,GAAG,YAAY,CAAC,CAAC;AAC/D,CAAC"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=simhash.test.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"simhash.test.d.ts","sourceRoot":"","sources":["../../src/algorithms/simhash.test.ts"],"names":[],"mappings":""}
@@ -0,0 +1,23 @@
1
+ import { describe, expect, test } from "vitest";
2
+ import { hammingDistance, simHashFromText, similarityFromDistance } from "./simhash.js";
3
+ describe("simhash", () => {
4
+ test("returns same fingerprint for identical text", () => {
5
+ const a = simHashFromText("california llc filing requirements and annual fees");
6
+ const b = simHashFromText("california llc filing requirements and annual fees");
7
+ expect(a).toBe(b);
8
+ expect(hammingDistance(a, b)).toBe(0);
9
+ expect(similarityFromDistance(0)).toBe(1);
10
+ });
11
+ test("returns lower similarity for different text", () => {
12
+ const a = simHashFromText("california llc filing requirements and annual fees");
13
+ const b = simHashFromText("best sourdough starter hydration and oven spring tips");
14
+ const distance = hammingDistance(a, b);
15
+ expect(distance).toBeGreaterThan(10);
16
+ expect(similarityFromDistance(distance)).toBeLessThan(0.85);
17
+ });
18
+ test("returns neutral fingerprint for empty input", () => {
19
+ const fingerprint = simHashFromText(" ");
20
+ expect(fingerprint).toBe(0n);
21
+ });
22
+ });
23
+ //# sourceMappingURL=simhash.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"simhash.test.js","sourceRoot":"","sources":["../../src/algorithms/simhash.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,QAAQ,CAAC;AAChD,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,sBAAsB,EAAE,MAAM,cAAc,CAAC;AAExF,QAAQ,CAAC,SAAS,EAAE,GAAG,EAAE;IACvB,IAAI,CAAC,6CAA6C,EAAE,GAAG,EAAE;QACvD,MAAM,CAAC,GAAG,eAAe,CAAC,oDAAoD,CAAC,CAAC;QAChF,MAAM,CAAC,GAAG,eAAe,CAAC,oDAAoD,CAAC,CAAC;QAEhF,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,MAAM,CAAC,eAAe,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACtC,MAAM,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;IAEH,IAAI,CAAC,6CAA6C,EAAE,GAAG,EAAE;QACvD,MAAM,CAAC,GAAG,eAAe,CAAC,oDAAoD,CAAC,CAAC;QAChF,MAAM,CAAC,GAAG,eAAe,CAAC,uDAAuD,CAAC,CAAC;QAEnF,MAAM,QAAQ,GAAG,eAAe,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QACvC,MAAM,CAAC,QAAQ,CAAC,CAAC,eAAe,CAAC,EAAE,CAAC,CAAC;QACrC,MAAM,CAAC,sBAAsB,CAAC,QAAQ,CAAC,CAAC,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;IAC9D,CAAC,CAAC,CAAC;IAEH,IAAI,CAAC,6CAA6C,EAAE,GAAG,EAAE;QACvD,MAAM,WAAW,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC3C,MAAM,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC/B,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -0,0 +1,8 @@
1
+ export interface TfIdfCorpus {
2
+ docSets: Set<string>[];
3
+ docCount: number;
4
+ }
5
+ export declare function buildCorpus(allTexts: string[]): TfIdfCorpus;
6
+ export declare function extractKeywords(text: string, allTexts: string[], topN: number): string[];
7
+ export declare function extractKeywords(text: string, corpus: TfIdfCorpus, topN: number): string[];
8
+ //# sourceMappingURL=tf-idf.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tf-idf.d.ts","sourceRoot":"","sources":["../../src/algorithms/tf-idf.ts"],"names":[],"mappings":"AAqBA,MAAM,WAAW,WAAW;IAC1B,OAAO,EAAE,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC;IACvB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,wBAAgB,WAAW,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,WAAW,CAK3D;AAeD,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;AAC1F,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC"}
@@ -0,0 +1,55 @@
1
+ function tokenize(text) {
2
+ return text
3
+ .toLowerCase()
4
+ .replace(/[^\p{L}\p{N}\s]+/gu, " ")
5
+ .split(/\s+/)
6
+ .filter(Boolean);
7
+ }
8
+ function termFrequency(tokens) {
9
+ const counts = new Map();
10
+ for (const token of tokens) {
11
+ counts.set(token, (counts.get(token) ?? 0) + 1);
12
+ }
13
+ const total = tokens.length;
14
+ const tf = new Map();
15
+ for (const [term, count] of counts) {
16
+ tf.set(term, count / total);
17
+ }
18
+ return tf;
19
+ }
20
+ export function buildCorpus(allTexts) {
21
+ return {
22
+ docSets: allTexts.map((t) => new Set(tokenize(t))),
23
+ docCount: allTexts.length
24
+ };
25
+ }
26
+ function inverseDocumentFrequency(term, corpus) {
27
+ let docsWithTerm = 0;
28
+ for (const docSet of corpus.docSets) {
29
+ if (docSet.has(term)) {
30
+ docsWithTerm += 1;
31
+ }
32
+ }
33
+ if (docsWithTerm === 0) {
34
+ return 0;
35
+ }
36
+ return Math.log(corpus.docCount / docsWithTerm);
37
+ }
38
+ export function extractKeywords(text, source, topN) {
39
+ const corpus = Array.isArray(source) ? buildCorpus(source) : source;
40
+ const tokens = tokenize(text);
41
+ if (tokens.length === 0) {
42
+ return [];
43
+ }
44
+ const tf = termFrequency(tokens);
45
+ const scores = new Map();
46
+ for (const term of tf.keys()) {
47
+ const idf = inverseDocumentFrequency(term, corpus);
48
+ scores.set(term, (tf.get(term) ?? 0) * idf);
49
+ }
50
+ return Array.from(scores.entries())
51
+ .sort((a, b) => b[1] - a[1])
52
+ .slice(0, topN)
53
+ .map(([term]) => term);
54
+ }
55
+ //# sourceMappingURL=tf-idf.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tf-idf.js","sourceRoot":"","sources":["../../src/algorithms/tf-idf.ts"],"names":[],"mappings":"AAAA,SAAS,QAAQ,CAAC,IAAY;IAC5B,OAAO,IAAI;SACR,WAAW,EAAE;SACb,OAAO,CAAC,oBAAoB,EAAE,GAAG,CAAC;SAClC,KAAK,CAAC,KAAK,CAAC;SACZ,MAAM,CAAC,OAAO,CAAC,CAAC;AACrB,CAAC;AAED,SAAS,aAAa,CAAC,MAAgB;IACrC,MAAM,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAC;IACzC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,MAAM,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAClD,CAAC;IACD,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC;IAC5B,MAAM,EAAE,GAAG,IAAI,GAAG,EAAkB,CAAC;IACrC,KAAK,MAAM,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,MAAM,EAAE,CAAC;QACnC,EAAE,CAAC,GAAG,CAAC,IAAI,EAAE,KAAK,GAAG,KAAK,CAAC,CAAC;IAC9B,CAAC;IACD,OAAO,EAAE,CAAC;AACZ,CAAC;AAOD,MAAM,UAAU,WAAW,CAAC,QAAkB;IAC5C,OAAO;QACL,OAAO,EAAE,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;QAClD,QAAQ,EAAE,QAAQ,CAAC,MAAM;KAC1B,CAAC;AACJ,CAAC;AAED,SAAS,wBAAwB,CAAC,IAAY,EAAE,MAAmB;IACjE,IAAI,YAAY,GAAG,CAAC,CAAC;IACrB,KAAK,MAAM,MAAM,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;QACpC,IAAI,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;YACrB,YAAY,IAAI,CAAC,CAAC;QACpB,CAAC;IACH,CAAC;IACD,IAAI,YAAY,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,CAAC,CAAC;IACX,CAAC;IACD,OAAO,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,QAAQ,GAAG,YAAY,CAAC,CAAC;AAClD,CAAC;AAID,MAAM,UAAU,eAAe,CAAC,IAAY,EAAE,MAA8B,EAAE,IAAY;IACxF,MAAM,MAAM,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IACpE,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC;IAC9B,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,EAAE,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC;IACjC,MAAM,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAC;IAEzC,KAAK,MAAM,IAAI,IAAI,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC;QAC7B,MAAM,GAAG,GAAG,wBAAwB,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;QACnD,MAAM,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC;IAC9C,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;SAChC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;SAC3B,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC;SACd,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC;AAC3B,CAAC"}
@@ -0,0 +1,3 @@
1
+ import type { AuditOptions, AuditSummary } from "./types.js";
2
+ export declare function auditSource(source: string, options?: AuditOptions): Promise<AuditSummary>;
3
+ //# sourceMappingURL=auditor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"auditor.d.ts","sourceRoot":"","sources":["../src/auditor.ts"],"names":[],"mappings":"AAwCA,OAAO,KAAK,EAAE,YAAY,EAAE,YAAY,EAA4F,MAAM,YAAY,CAAC;AAurBvJ,wBAAsB,WAAW,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC,CA6I/F"}