slopless 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. package/README.md +64 -0
  2. package/dist/families/metrics/avg-sentence-length.js +22 -0
  3. package/dist/families/metrics/avg-sentence-length.js.map +1 -0
  4. package/dist/families/metrics/coleman-liau.js +27 -0
  5. package/dist/families/metrics/coleman-liau.js.map +1 -0
  6. package/dist/families/metrics/flesch-kincaid.js +27 -0
  7. package/dist/families/metrics/flesch-kincaid.js.map +1 -0
  8. package/dist/families/metrics/gunning-fog.js +27 -0
  9. package/dist/families/metrics/gunning-fog.js.map +1 -0
  10. package/dist/families/metrics/paragraph-length.js +21 -0
  11. package/dist/families/metrics/paragraph-length.js.map +1 -0
  12. package/dist/families/metrics/word-repetition.js +46 -0
  13. package/dist/families/metrics/word-repetition.js.map +1 -0
  14. package/dist/families/orthography/colon-dramatic.js +117 -0
  15. package/dist/families/orthography/colon-dramatic.js.map +1 -0
  16. package/dist/families/orthography/em-dashes.js +41 -0
  17. package/dist/families/orthography/em-dashes.js.map +1 -0
  18. package/dist/families/orthography/exclamation-density.js +51 -0
  19. package/dist/families/orthography/exclamation-density.js.map +1 -0
  20. package/dist/families/orthography/fake-timestamps.js +103 -0
  21. package/dist/families/orthography/fake-timestamps.js.map +1 -0
  22. package/dist/families/orthography/sentence-case.js +66 -0
  23. package/dist/families/orthography/sentence-case.js.map +1 -0
  24. package/dist/families/orthography/smart-quotes.js +53 -0
  25. package/dist/families/orthography/smart-quotes.js.map +1 -0
  26. package/dist/families/phrases/cliches.js +17 -0
  27. package/dist/families/phrases/cliches.js.map +1 -0
  28. package/dist/families/phrases/corporate-speak.js +23 -0
  29. package/dist/families/phrases/corporate-speak.js.map +1 -0
  30. package/dist/families/phrases/data/cliches.json +699 -0
  31. package/dist/families/phrases/data/corporate-speak.json +27 -0
  32. package/dist/families/phrases/data/prohibited-phrases.json +8 -0
  33. package/dist/families/phrases/data/skunked-terms.json +10 -0
  34. package/dist/families/phrases/data/uncomparables.json +57 -0
  35. package/dist/families/phrases/humble-bragger.js +26 -0
  36. package/dist/families/phrases/humble-bragger.js.map +1 -0
  37. package/dist/families/phrases/jargon-faker.js +84 -0
  38. package/dist/families/phrases/jargon-faker.js.map +1 -0
  39. package/dist/families/phrases/llm-disclaimer.js +54 -0
  40. package/dist/families/phrases/llm-disclaimer.js.map +1 -0
  41. package/dist/families/phrases/prohibited-phrases.js +17 -0
  42. package/dist/families/phrases/prohibited-phrases.js.map +1 -0
  43. package/dist/families/phrases/skunked-terms.js +23 -0
  44. package/dist/families/phrases/skunked-terms.js.map +1 -0
  45. package/dist/families/phrases/uncomparables.js +78 -0
  46. package/dist/families/phrases/uncomparables.js.map +1 -0
  47. package/dist/families/semantic-thinness/patterns/abstract-contrast.json +85 -0
  48. package/dist/families/semantic-thinness/patterns/abstract-metaphor-claim.json +112 -0
  49. package/dist/families/semantic-thinness/patterns/body-emotion-shorthand.json +77 -0
  50. package/dist/families/semantic-thinness/patterns/body-knows.json +80 -0
  51. package/dist/families/semantic-thinness/patterns/deictic-summary.json +109 -0
  52. package/dist/families/semantic-thinness/patterns/empty-atmosphere-shift.json +70 -0
  53. package/dist/families/semantic-thinness/patterns/empty-emotional-weather.json +104 -0
  54. package/dist/families/semantic-thinness/patterns/empty-scene-state.json +117 -0
  55. package/dist/families/semantic-thinness/patterns/empty-scene-transition.json +98 -0
  56. package/dist/families/semantic-thinness/patterns/gaze-choreography.json +87 -0
  57. package/dist/families/semantic-thinness/patterns/generic-lesson-extraction.json +82 -0
  58. package/dist/families/semantic-thinness/patterns/generic-pressure-or-stakes.json +107 -0
  59. package/dist/families/semantic-thinness/patterns/generic-realization.json +113 -0
  60. package/dist/families/semantic-thinness/patterns/hollow-significance.json +103 -0
  61. package/dist/families/semantic-thinness/patterns/low-information-physical-blocking.json +113 -0
  62. package/dist/families/semantic-thinness/patterns/point-is-frame.json +81 -0
  63. package/dist/families/semantic-thinness/patterns/real-work-begins.json +66 -0
  64. package/dist/families/semantic-thinness/patterns/silence-as-actor.json +69 -0
  65. package/dist/families/semantic-thinness/patterns/something-shifted.json +73 -0
  66. package/dist/families/semantic-thinness/patterns/truth-answer-moves.json +75 -0
  67. package/dist/families/semantic-thinness/patterns/vague-connective-payoff.json +90 -0
  68. package/dist/families/semantic-thinness/patterns/vague-threshold-change.json +98 -0
  69. package/dist/families/semantic-thinness/private/pattern-data-a.js +15 -0
  70. package/dist/families/semantic-thinness/private/pattern-data-a.js.map +1 -0
  71. package/dist/families/semantic-thinness/private/pattern-data-b.js +15 -0
  72. package/dist/families/semantic-thinness/private/pattern-data-b.js.map +1 -0
  73. package/dist/families/semantic-thinness/private/pattern-data-c.js +9 -0
  74. package/dist/families/semantic-thinness/private/pattern-data-c.js.map +1 -0
  75. package/dist/families/semantic-thinness/private/pattern-data-d.js +17 -0
  76. package/dist/families/semantic-thinness/private/pattern-data-d.js.map +1 -0
  77. package/dist/families/semantic-thinness/private/pattern-data.js +11 -0
  78. package/dist/families/semantic-thinness/private/pattern-data.js.map +1 -0
  79. package/dist/families/semantic-thinness/private/pattern-matcher.js +172 -0
  80. package/dist/families/semantic-thinness/private/pattern-matcher.js.map +1 -0
  81. package/dist/families/semantic-thinness/semantic-thinness.js +25 -0
  82. package/dist/families/semantic-thinness/semantic-thinness.js.map +1 -0
  83. package/dist/families/syntactic-patterns/authority/authority-padding.js +81 -0
  84. package/dist/families/syntactic-patterns/authority/authority-padding.js.map +1 -0
  85. package/dist/families/syntactic-patterns/closers/affirmation-closers.js +48 -0
  86. package/dist/families/syntactic-patterns/closers/affirmation-closers.js.map +1 -0
  87. package/dist/families/syntactic-patterns/closers/boilerplate-conclusion.js +131 -0
  88. package/dist/families/syntactic-patterns/closers/boilerplate-conclusion.js.map +1 -0
  89. package/dist/families/syntactic-patterns/closers/false-question.js +30 -0
  90. package/dist/families/syntactic-patterns/closers/false-question.js.map +1 -0
  91. package/dist/families/syntactic-patterns/closers/summative-closer.js +33 -0
  92. package/dist/families/syntactic-patterns/closers/summative-closer.js.map +1 -0
  93. package/dist/families/syntactic-patterns/contrast/blame-reframe.js +59 -0
  94. package/dist/families/syntactic-patterns/contrast/blame-reframe.js.map +1 -0
  95. package/dist/families/syntactic-patterns/contrast/contrastive-aphorism.js +203 -0
  96. package/dist/families/syntactic-patterns/contrast/contrastive-aphorism.js.map +1 -0
  97. package/dist/families/syntactic-patterns/contrast/negation-reframe.js +20 -0
  98. package/dist/families/syntactic-patterns/contrast/negation-reframe.js.map +1 -0
  99. package/dist/families/syntactic-patterns/contrast/private/negation-reframe-matcher.js +218 -0
  100. package/dist/families/syntactic-patterns/contrast/private/negation-reframe-matcher.js.map +1 -0
  101. package/dist/families/syntactic-patterns/contrast/private/negation-reframe-parts.js +136 -0
  102. package/dist/families/syntactic-patterns/contrast/private/negation-reframe-parts.js.map +1 -0
  103. package/dist/families/syntactic-patterns/generalization/softening-language.js +143 -0
  104. package/dist/families/syntactic-patterns/generalization/softening-language.js.map +1 -0
  105. package/dist/families/syntactic-patterns/generalization/universalizing-claims.js +137 -0
  106. package/dist/families/syntactic-patterns/generalization/universalizing-claims.js.map +1 -0
  107. package/dist/families/syntactic-patterns/lead-ins/boilerplate-framing.js +104 -0
  108. package/dist/families/syntactic-patterns/lead-ins/boilerplate-framing.js.map +1 -0
  109. package/dist/families/syntactic-patterns/lead-ins/generic-signposting.js +230 -0
  110. package/dist/families/syntactic-patterns/lead-ins/generic-signposting.js.map +1 -0
  111. package/dist/families/syntactic-patterns/lead-ins/lesson-framing.js +76 -0
  112. package/dist/families/syntactic-patterns/lead-ins/lesson-framing.js.map +1 -0
  113. package/dist/families/syntactic-patterns/lead-ins/llm-openers.js +24 -0
  114. package/dist/families/syntactic-patterns/lead-ins/llm-openers.js.map +1 -0
  115. package/dist/families/syntactic-patterns/lead-ins/observer-guidance.js +95 -0
  116. package/dist/families/syntactic-patterns/lead-ins/observer-guidance.js.map +1 -0
  117. package/dist/families/syntactic-patterns/llm-artifacts/response-wrapper.js +106 -0
  118. package/dist/families/syntactic-patterns/llm-artifacts/response-wrapper.js.map +1 -0
  119. package/dist/families/syntactic-patterns/repetition/demonstrative-emphasis.js +251 -0
  120. package/dist/families/syntactic-patterns/repetition/demonstrative-emphasis.js.map +1 -0
  121. package/dist/families/syntactic-patterns/repetition/empty-emphasis.js +108 -0
  122. package/dist/families/syntactic-patterns/repetition/empty-emphasis.js.map +1 -0
  123. package/dist/families/syntactic-patterns/repetition/fragment-stacking.js +306 -0
  124. package/dist/families/syntactic-patterns/repetition/fragment-stacking.js.map +1 -0
  125. package/dist/families/syntactic-patterns/repetition/triple-repeat.js +149 -0
  126. package/dist/families/syntactic-patterns/repetition/triple-repeat.js.map +1 -0
  127. package/dist/families/term-policy/recommended-terms.js +87 -0
  128. package/dist/families/term-policy/recommended-terms.js.map +1 -0
  129. package/dist/families/term-policy/required-terms.js +32 -0
  130. package/dist/families/term-policy/required-terms.js.map +1 -0
  131. package/dist/families/words/data/prohibited-words.json +12 -0
  132. package/dist/families/words/data/simplicity-pairs.json +7 -0
  133. package/dist/families/words/hedge-stacking.js +43 -0
  134. package/dist/families/words/hedge-stacking.js.map +1 -0
  135. package/dist/families/words/llm-vocabulary.js +33 -0
  136. package/dist/families/words/llm-vocabulary.js.map +1 -0
  137. package/dist/families/words/prohibited-words.js +17 -0
  138. package/dist/families/words/prohibited-words.js.map +1 -0
  139. package/dist/families/words/simplicity.js +27 -0
  140. package/dist/families/words/simplicity.js.map +1 -0
  141. package/dist/index.js +26 -0
  142. package/dist/index.js.map +1 -0
  143. package/dist/presets/everything.js +52 -0
  144. package/dist/presets/everything.js.map +1 -0
  145. package/dist/registries/metrics.js +15 -0
  146. package/dist/registries/metrics.js.map +1 -0
  147. package/dist/registries/orthography.js +15 -0
  148. package/dist/registries/orthography.js.map +1 -0
  149. package/dist/registries/phrases.js +19 -0
  150. package/dist/registries/phrases.js.map +1 -0
  151. package/dist/registries/semantic-thinness.js +5 -0
  152. package/dist/registries/semantic-thinness.js.map +1 -0
  153. package/dist/registries/syntactic-patterns/authority.js +5 -0
  154. package/dist/registries/syntactic-patterns/authority.js.map +1 -0
  155. package/dist/registries/syntactic-patterns/closers.js +11 -0
  156. package/dist/registries/syntactic-patterns/closers.js.map +1 -0
  157. package/dist/registries/syntactic-patterns/contrast.js +9 -0
  158. package/dist/registries/syntactic-patterns/contrast.js.map +1 -0
  159. package/dist/registries/syntactic-patterns/generalization.js +7 -0
  160. package/dist/registries/syntactic-patterns/generalization.js.map +1 -0
  161. package/dist/registries/syntactic-patterns/lead-ins.js +13 -0
  162. package/dist/registries/syntactic-patterns/lead-ins.js.map +1 -0
  163. package/dist/registries/syntactic-patterns/llm-artifacts.js +5 -0
  164. package/dist/registries/syntactic-patterns/llm-artifacts.js.map +1 -0
  165. package/dist/registries/syntactic-patterns/repetition.js +11 -0
  166. package/dist/registries/syntactic-patterns/repetition.js.map +1 -0
  167. package/dist/registries/syntactic-patterns.js +17 -0
  168. package/dist/registries/syntactic-patterns.js.map +1 -0
  169. package/dist/registries/term-policy.js +7 -0
  170. package/dist/registries/term-policy.js.map +1 -0
  171. package/dist/registries/words.js +11 -0
  172. package/dist/registries/words.js.map +1 -0
  173. package/dist/shared/matchers/phrases.js +71 -0
  174. package/dist/shared/matchers/phrases.js.map +1 -0
  175. package/dist/shared/matchers/prose-patterns.js +104 -0
  176. package/dist/shared/matchers/prose-patterns.js.map +1 -0
  177. package/dist/shared/text/document.js +18 -0
  178. package/dist/shared/text/document.js.map +1 -0
  179. package/dist/shared/text/normalize.js +30 -0
  180. package/dist/shared/text/normalize.js.map +1 -0
  181. package/dist/shared/text/quotes.js +20 -0
  182. package/dist/shared/text/quotes.js.map +1 -0
  183. package/dist/shared/text/sections.js +119 -0
  184. package/dist/shared/text/sections.js.map +1 -0
  185. package/dist/shared/text/sentences.js +79 -0
  186. package/dist/shared/text/sentences.js.map +1 -0
  187. package/dist/shared/text/tokens.js +18 -0
  188. package/dist/shared/text/tokens.js.map +1 -0
  189. package/dist/shared/text/traverse.js +10 -0
  190. package/dist/shared/text/traverse.js.map +1 -0
  191. package/dist/shared/text/whitespace.js +25 -0
  192. package/dist/shared/text/whitespace.js.map +1 -0
  193. package/package.json +115 -0
@@ -0,0 +1,104 @@
1
+ import { normalizeForMatch } from "../text/normalize.js";
2
+ import { stripOuterQuotes } from "../text/quotes.js";
3
+ import { wordTokens } from "../text/tokens.js";
4
+ export function cleanSentence(sentence, prefixes = []) {
5
+ const normalized = stripQuotedSegments(normalizeForMatch(sentence));
6
+ return stripLeadingPrefixes(stripOuterQuotes(normalized), prefixes);
7
+ }
8
+ export function stripLeadingPrefixes(text, prefixes) {
9
+ let stripped = text;
10
+ for (const prefix of prefixes) {
11
+ if (stripped.startsWith(prefix)) {
12
+ stripped = stripped.slice(prefix.length);
13
+ break;
14
+ }
15
+ }
16
+ return stripped;
17
+ }
18
+ export function stripQuotedSegments(text) {
19
+ let stripped = "";
20
+ let quote;
21
+ for (const character of text) {
22
+ if (quote !== undefined) {
23
+ if (isMatchingQuote(quote, character)) {
24
+ quote = undefined;
25
+ }
26
+ continue;
27
+ }
28
+ if (isQuoteStart(character)) {
29
+ quote = character;
30
+ continue;
31
+ }
32
+ stripped += character;
33
+ }
34
+ return stripped;
35
+ }
36
+ function isQuoteStart(character) {
37
+ return character === '"' || character === "\u201C";
38
+ }
39
+ function isMatchingQuote(open, close) {
40
+ if (open === "\u201C") {
41
+ return close === "\u201D";
42
+ }
43
+ return close === open;
44
+ }
45
+ export function tokens(text) {
46
+ return wordTokens(text).map((token) => token.normalized);
47
+ }
48
+ export function containsAny(text, patterns) {
49
+ return patterns.find((pattern) => text.includes(pattern));
50
+ }
51
+ export function startsWithAnyText(text, patterns) {
52
+ return patterns.find((pattern) => text.startsWith(pattern));
53
+ }
54
+ export function textStartsWithPattern(text, pattern) {
55
+ if (!text.startsWith(pattern)) {
56
+ return false;
57
+ }
58
+ const next = text.at(pattern.length);
59
+ return (next === undefined ||
60
+ next === " " ||
61
+ next === "," ||
62
+ next === "." ||
63
+ next === ":" ||
64
+ next === ";");
65
+ }
66
+ export function tokensContainInOrder(sourceTokens, groups) {
67
+ let searchFrom = 0;
68
+ for (const group of groups) {
69
+ const position = sourceTokens
70
+ .slice(searchFrom)
71
+ .findIndex((token) => group.includes(token));
72
+ if (position < 0) {
73
+ return false;
74
+ }
75
+ searchFrom += position + 1;
76
+ }
77
+ return true;
78
+ }
79
+ export function trimTerminalPunctuation(text) {
80
+ let end = text.length;
81
+ while (end > 0) {
82
+ const character = text.at(end - 1);
83
+ if (character !== "." &&
84
+ character !== "!" &&
85
+ character !== "?" &&
86
+ character !== ":") {
87
+ break;
88
+ }
89
+ end -= 1;
90
+ }
91
+ return text.slice(0, end);
92
+ }
93
+ export function startsWithWords(sourceTokens, expected) {
94
+ if (sourceTokens.length < expected.length) {
95
+ return false;
96
+ }
97
+ for (let index = 0; index < expected.length; index += 1) {
98
+ if (sourceTokens[index] !== expected[index]) {
99
+ return false;
100
+ }
101
+ }
102
+ return true;
103
+ }
104
+ //# sourceMappingURL=prose-patterns.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"prose-patterns.js","sourceRoot":"","sources":["../../../src/shared/matchers/prose-patterns.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,sBAAsB,CAAC;AACzD,OAAO,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAO/C,MAAM,UAAU,aAAa,CAC3B,QAAgB,EAChB,WAA8B,EAAE;IAEhC,MAAM,UAAU,GAAG,mBAAmB,CAAC,iBAAiB,CAAC,QAAQ,CAAC,CAAC,CAAC;IACpE,OAAO,oBAAoB,CAAC,gBAAgB,CAAC,UAAU,CAAC,EAAE,QAAQ,CAAC,CAAC;AACtE,CAAC;AAED,MAAM,UAAU,oBAAoB,CAClC,IAAY,EACZ,QAA2B;IAE3B,IAAI,QAAQ,GAAG,IAAI,CAAC;IAEpB,KAAK,MAAM,MAAM,IAAI,QAAQ,EAAE,CAAC;QAC9B,IAAI,QAAQ,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC;YAChC,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YACzC,MAAM;QACR,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,MAAM,UAAU,mBAAmB,CAAC,IAAY;IAC9C,IAAI,QAAQ,GAAG,EAAE,CAAC;IAClB,IAAI,KAAyB,CAAC;IAE9B,KAAK,MAAM,SAAS,IAAI,IAAI,EAAE,CAAC;QAC7B,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;YACxB,IAAI,eAAe,CAAC,KAAK,EAAE,SAAS,CAAC,EAAE,CAAC;gBACtC,KAAK,GAAG,SAAS,CAAC;YACpB,CAAC;YACD,SAAS;QACX,CAAC;QAED,IAAI,YAAY,CAAC,SAAS,CAAC,EAAE,CAAC;YAC5B,KAAK,GAAG,SAAS,CAAC;YAClB,SAAS;QACX,CAAC;QAED,QAAQ,IAAI,SAAS,CAAC;IACxB,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,SAAS,YAAY,CAAC,SAAiB;IACrC,OAAO,SAAS,KAAK,GAAG,IAAI,SAAS,KAAK,QAAQ,CAAC;AACrD,CAAC;AAED,SAAS,eAAe,CAAC,IAAY,EAAE,KAAa;IAClD,IAAI,IAAI,KAAK,QAAQ,EAAE,CAAC;QACtB,OAAO,KAAK,KAAK,QAAQ,CAAC;IAC5B,CAAC;IAED,OAAO,KAAK,KAAK,IAAI,CAAC;AACxB,CAAC;AAED,MAAM,UAAU,MAAM,CAAC,IAAY;IACjC,OAAO,UAAU,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;AAC3D,CAAC;AAED,MAAM,UAAU,WAAW,CACzB,IAAY,EACZ,QAA2B;IAE3B,OAAO,QAAQ,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC;AAC5D,CAAC;AAED,MAAM,UAAU,iBAAiB,CAC/B,IAAY,EACZ,QAA2B;IAE3B,OAAO,QAAQ,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC;AAC9D,CAAC;AAED,MAAM,UAAU,qBAAqB,CAAC,IAAY,EAAE,OAAe;IACjE,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;QAC9B,OAAO,KAAK,CAAC;IACf,CAAC;IAED,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IACrC,OAAO,CACL,IAAI,KAAK,SAAS;QAClB,IAAI,KAAK,GAAG;QACZ,IAAI,KAAK,GAAG;QACZ,IAAI,KAAK,GAAG;QACZ,IAAI,KAAK,GAAG;QACZ,IAAI,KAAK,GAAG,CACb,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,oBAAoB,CAClC,YAA+B,EAC/B,MAAsC;IAEtC,IAAI,UAAU,GAAG,CAAC,CAAC;IAEnB,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,MAAM,QAAQ,GAAG,YAAY;aAC1B,KAAK,CAAC,UAAU,CAAC;aACjB,SAAS,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;QAE/C,IAAI,QAAQ,GAAG,CAAC,EAAE,CAAC;YACjB,OAAO,KAAK,CAAC;QACf,CAAC;QAED,UAAU,IAAI,QAAQ,GAAG,CAAC,CAAC;IAC7B,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,UAAU,uBAAuB,CAAC,IAAY;IAClD,IAAI,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC;IAEtB,OAAO,GAAG,GAAG,CAAC,EAAE,CAAC;QACf,MAAM,SAAS,GAAG,IAAI,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC;QACnC,IACE,SAAS,KAAK,GAAG;YACjB,SAAS,KAAK,GAAG;YACjB,SAAS,KAAK,GAAG;YACjB,SAAS,KAAK,GAAG,EACjB,CAAC;YACD,MAAM;QACR,CAAC;QACD,GAAG,IAAI,CAAC,CAAC;IACX,CAAC;IAED,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;AAC5B,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,YAA+B,EAC/B,QAA2B;IAE3B,IAAI,YAAY,CAAC,MAAM,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC;QAC1C,OAAO,KAAK,CAAC;IACf,CAAC;IAED,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,QAAQ,CAAC,MAAM,EAAE,KAAK,IAAI,CAAC,EAAE,CAAC;QACxD,IAAI,YAAY,CAAC,KAAK,CAAC,KAAK,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;YAC5C,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC"}
@@ -0,0 +1,18 @@
1
+ import { allParagraphs } from "./sections.js";
2
+ import { splitSentences } from "./sentences.js";
3
+ import { wordTokens } from "./tokens.js";
4
+ export function documentText(document) {
5
+ return allParagraphs(document)
6
+ .map((paragraph) => paragraph.text.trim())
7
+ .filter((text) => text.length > 0)
8
+ .join("\n\n");
9
+ }
10
+ export function documentMetrics(document) {
11
+ const text = documentText(document);
12
+ return {
13
+ sentenceCount: splitSentences(text).length,
14
+ text,
15
+ wordCount: wordTokens(text).length
16
+ };
17
+ }
18
+ //# sourceMappingURL=document.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"document.js","sourceRoot":"","sources":["../../../src/shared/text/document.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,aAAa,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAChD,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAQzC,MAAM,UAAU,YAAY,CAAC,QAAyB;IACpD,OAAO,aAAa,CAAC,QAAQ,CAAC;SAC3B,GAAG,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;SACzC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;SACjC,IAAI,CAAC,MAAM,CAAC,CAAC;AAClB,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,QAAyB;IACvD,MAAM,IAAI,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;IAEpC,OAAO;QACL,aAAa,EAAE,cAAc,CAAC,IAAI,CAAC,CAAC,MAAM;QAC1C,IAAI;QACJ,SAAS,EAAE,UAAU,CAAC,IAAI,CAAC,CAAC,MAAM;KACnC,CAAC;AACJ,CAAC"}
@@ -0,0 +1,30 @@
1
+ const SMART_TO_STRAIGHT_QUOTES = new Map([
2
+ ["\u2018", "'"],
3
+ ["\u2019", "'"],
4
+ ["\u201C", '"'],
5
+ ["\u201D", '"']
6
+ ]);
7
+ export function normalizeForMatch(text) {
8
+ let normalized = "";
9
+ for (const char of text) {
10
+ normalized += SMART_TO_STRAIGHT_QUOTES.get(char) ?? char;
11
+ }
12
+ return normalizeWhitespace(normalized).toLocaleLowerCase("en");
13
+ }
14
+ export function normalizeWhitespace(text) {
15
+ let normalized = "";
16
+ let previousWasWhitespace = false;
17
+ for (const char of text) {
18
+ if (char.trim() === "") {
19
+ if (!previousWasWhitespace) {
20
+ normalized += " ";
21
+ }
22
+ previousWasWhitespace = true;
23
+ continue;
24
+ }
25
+ normalized += char;
26
+ previousWasWhitespace = false;
27
+ }
28
+ return normalized.trim();
29
+ }
30
+ //# sourceMappingURL=normalize.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"normalize.js","sourceRoot":"","sources":["../../../src/shared/text/normalize.ts"],"names":[],"mappings":"AAAA,MAAM,wBAAwB,GAAG,IAAI,GAAG,CAAiB;IACvD,CAAC,QAAQ,EAAE,GAAG,CAAC;IACf,CAAC,QAAQ,EAAE,GAAG,CAAC;IACf,CAAC,QAAQ,EAAE,GAAG,CAAC;IACf,CAAC,QAAQ,EAAE,GAAG,CAAC;CAChB,CAAC,CAAC;AAEH,MAAM,UAAU,iBAAiB,CAAC,IAAY;IAC5C,IAAI,UAAU,GAAG,EAAE,CAAC;IAEpB,KAAK,MAAM,IAAI,IAAI,IAAI,EAAE,CAAC;QACxB,UAAU,IAAI,wBAAwB,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC;IAC3D,CAAC;IAED,OAAO,mBAAmB,CAAC,UAAU,CAAC,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC;AACjE,CAAC;AAED,MAAM,UAAU,mBAAmB,CAAC,IAAY;IAC9C,IAAI,UAAU,GAAG,EAAE,CAAC;IACpB,IAAI,qBAAqB,GAAG,KAAK,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,IAAI,EAAE,CAAC;QACxB,IAAI,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;YACvB,IAAI,CAAC,qBAAqB,EAAE,CAAC;gBAC3B,UAAU,IAAI,GAAG,CAAC;YACpB,CAAC;YACD,qBAAqB,GAAG,IAAI,CAAC;YAC7B,SAAS;QACX,CAAC;QAED,UAAU,IAAI,IAAI,CAAC;QACnB,qBAAqB,GAAG,KAAK,CAAC;IAChC,CAAC;IAED,OAAO,UAAU,CAAC,IAAI,EAAE,CAAC;AAC3B,CAAC"}
@@ -0,0 +1,20 @@
1
+ const QUOTE_CHARS = new Set(['"', "'", "\u2018", "\u2019", "\u201C", "\u201D"]);
2
+ export function isMostlyQuoted(text) {
3
+ const trimmed = text.trim();
4
+ if (trimmed.length < 2) {
5
+ return false;
6
+ }
7
+ const first = trimmed[0];
8
+ const last = trimmed.at(-1);
9
+ return (first !== undefined &&
10
+ last !== undefined &&
11
+ QUOTE_CHARS.has(first) &&
12
+ QUOTE_CHARS.has(last));
13
+ }
14
+ export function stripOuterQuotes(text) {
15
+ if (!isMostlyQuoted(text)) {
16
+ return text;
17
+ }
18
+ return text.slice(1, -1).trim();
19
+ }
20
+ //# sourceMappingURL=quotes.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"quotes.js","sourceRoot":"","sources":["../../../src/shared/text/quotes.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAEhF,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAE5B,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,OAAO,KAAK,CAAC;IACf,CAAC;IAED,MAAM,KAAK,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;IACzB,MAAM,IAAI,GAAG,OAAO,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IAE5B,OAAO,CACL,KAAK,KAAK,SAAS;QACnB,IAAI,KAAK,SAAS;QAClB,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC;QACtB,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,CACtB,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,IAAY;IAC3C,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,EAAE,CAAC;QAC1B,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;AAClC,CAAC"}
@@ -0,0 +1,119 @@
1
+ import { splitSentences } from "./sentences.js";
2
+ import { sourceText } from "./traverse.js";
3
+ function isParagraphNode(node) {
4
+ return node.type === "Paragraph";
5
+ }
6
+ function isParentNode(node) {
7
+ return "children" in node;
8
+ }
9
+ function hasStringValue(node) {
10
+ return "value" in node && typeof node.value === "string";
11
+ }
12
+ function plainText(node) {
13
+ if (hasStringValue(node)) {
14
+ return node.value;
15
+ }
16
+ if (node.type === "Break") {
17
+ return " ";
18
+ }
19
+ if (!isParentNode(node)) {
20
+ return "";
21
+ }
22
+ return node.children.map((child) => plainText(child)).join("");
23
+ }
24
+ function collectParagraphs(node, paragraphs) {
25
+ if (isParagraphNode(node)) {
26
+ paragraphs.push(node);
27
+ return;
28
+ }
29
+ if (node.type !== "BlockQuote" || !isParentNode(node)) {
30
+ return;
31
+ }
32
+ for (const child of node.children) {
33
+ collectParagraphs(child, paragraphs);
34
+ }
35
+ }
36
+ function sectionParagraphs(section) {
37
+ const paragraphs = [];
38
+ for (const node of section) {
39
+ collectParagraphs(node, paragraphs);
40
+ }
41
+ return paragraphs;
42
+ }
43
+ function documentSections(document) {
44
+ const sections = [];
45
+ let current = [];
46
+ for (const child of document.children) {
47
+ if (child.type === "Header") {
48
+ if (current.length > 0) {
49
+ sections.push(current);
50
+ }
51
+ current = [];
52
+ continue;
53
+ }
54
+ current.push(child);
55
+ }
56
+ if (current.length > 0) {
57
+ sections.push(current);
58
+ }
59
+ return sections;
60
+ }
61
+ function paragraphSentences(paragraph) {
62
+ const source = sourceText(paragraph);
63
+ return splitSentences(source.text).map((sentence) => ({
64
+ paragraph,
65
+ sentence,
66
+ source
67
+ }));
68
+ }
69
+ export function allParagraphSentences(document) {
70
+ const sentences = [];
71
+ for (const section of documentSections(document)) {
72
+ for (const paragraph of sectionParagraphs(section)) {
73
+ sentences.push(...paragraphSentences(paragraph));
74
+ }
75
+ }
76
+ return sentences;
77
+ }
78
+ export function allParagraphs(document) {
79
+ const paragraphs = [];
80
+ for (const section of documentSections(document)) {
81
+ for (const paragraph of sectionParagraphs(section)) {
82
+ paragraphs.push({
83
+ paragraph,
84
+ source: sourceText(paragraph),
85
+ text: plainText(paragraph)
86
+ });
87
+ }
88
+ }
89
+ return paragraphs;
90
+ }
91
+ export function sectionFirstSentences(document) {
92
+ const sentences = [];
93
+ for (const section of documentSections(document)) {
94
+ const firstParagraph = sectionParagraphs(section).at(0);
95
+ if (firstParagraph === undefined) {
96
+ continue;
97
+ }
98
+ const firstSentence = paragraphSentences(firstParagraph).at(0);
99
+ if (firstSentence !== undefined) {
100
+ sentences.push(firstSentence);
101
+ }
102
+ }
103
+ return sentences;
104
+ }
105
+ export function sectionLastSentences(document) {
106
+ const sentences = [];
107
+ for (const section of documentSections(document)) {
108
+ const lastParagraph = sectionParagraphs(section).at(-1);
109
+ if (lastParagraph === undefined) {
110
+ continue;
111
+ }
112
+ const lastSentence = paragraphSentences(lastParagraph).at(-1);
113
+ if (lastSentence !== undefined) {
114
+ sentences.push(lastSentence);
115
+ }
116
+ }
117
+ return sentences;
118
+ }
119
+ //# sourceMappingURL=sections.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"sections.js","sourceRoot":"","sources":["../../../src/shared/text/sections.ts"],"names":[],"mappings":"AAMA,OAAO,EAAsB,cAAc,EAAE,MAAM,gBAAgB,CAAC;AACpE,OAAO,EAAmB,UAAU,EAAE,MAAM,eAAe,CAAC;AAgB5D,SAAS,eAAe,CAAC,IAAgB;IACvC,OAAO,IAAI,CAAC,IAAI,KAAK,WAAW,CAAC;AACnC,CAAC;AAED,SAAS,YAAY,CAAC,IAAgB;IACpC,OAAO,UAAU,IAAI,IAAI,CAAC;AAC5B,CAAC;AAMD,SAAS,cAAc,CAAC,IAAgB;IACtC,OAAO,OAAO,IAAI,IAAI,IAAI,OAAO,IAAI,CAAC,KAAK,KAAK,QAAQ,CAAC;AAC3D,CAAC;AAED,SAAS,SAAS,CAAC,IAAgB;IACjC,IAAI,cAAc,CAAC,IAAI,CAAC,EAAE,CAAC;QACzB,OAAO,IAAI,CAAC,KAAK,CAAC;IACpB,CAAC;IAED,IAAI,IAAI,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;QAC1B,OAAO,GAAG,CAAC;IACb,CAAC;IAED,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,EAAE,CAAC;QACxB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,OAAO,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;AACjE,CAAC;AAED,SAAS,iBAAiB,CACxB,IAAgB,EAChB,UAA8B;IAE9B,IAAI,eAAe,CAAC,IAAI,CAAC,EAAE,CAAC;QAC1B,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACtB,OAAO;IACT,CAAC;IAED,IAAI,IAAI,CAAC,IAAI,KAAK,YAAY,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,EAAE,CAAC;QACtD,OAAO;IACT,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;QAClC,iBAAiB,CAAC,KAAK,EAAE,UAAU,CAAC,CAAC;IACvC,CAAC;AACH,CAAC;AAED,SAAS,iBAAiB,CAAC,OAAgB;IACzC,MAAM,UAAU,GAAuB,EAAE,CAAC;IAE1C,KAAK,MAAM,IAAI,IAAI,OAAO,EAAE,CAAC;QAC3B,iBAAiB,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC;IACtC,CAAC;IAED,OAAO,UAAU,CAAC;AACpB,CAAC;AAED,SAAS,gBAAgB,CAAC,QAAyB;IACjD,MAAM,QAAQ,GAAc,EAAE,CAAC;IAC/B,IAAI,OAAO,GAAiB,EAAE,CAAC;IAE/B,KAAK,MAAM,KAAK,IAAI,QAAQ,CAAC,QAAQ,EAAE,CAAC;QACtC,IAAI,KAAK,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC5B,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACvB,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACzB,CAAC;YACD,OAAO,GAAG,EAAE,CAAC;YACb,SAAS;QACX,CAAC;QAED,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACtB,CAAC;IAED,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACzB,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,SAAS,kBAAkB,CAAC,SAA2B;IACrD,MAAM,MAAM,GAAG,UAAU,CAAC,SAAS,CAAC,CAAC;IAErC,OAAO,cAAc,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QACpD,SAAS;QACT,QAAQ;QACR,MAAM;KACP,CAAC,CAAC,CAAC;AACN,CAAC;AAED,MAAM,UAAU,qBAAqB,CACnC,QAAyB;IAEzB,MAAM,SAAS,GAAsB,EAAE,CAAC;IAExC,KAAK,MAAM,OAAO,IAAI,gBAAgB,CAAC,QAAQ,CAAC,EAAE,CAAC;QACjD,KAAK,MAAM,SAAS,IAAI,iBAAiB,CAAC,OAAO,CAAC,EAAE,CAAC;YACnD,SAAS,CAAC,IAAI,CAAC,GAAG,kBAAkB,CAAC,SAAS,CAAC,CAAC,CAAC;QACnD,CAAC;IACH,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,QAAyB;IACrD,MAAM,UAAU,GAAuB,EAAE,CAAC;IAE1C,KAAK,MAAM,OAAO,IAAI,gBAAgB,CAAC,QAAQ,CAAC,EAAE,CAAC;QACjD,KAAK,MAAM,SAAS,IAAI,iBAAiB,CAAC,OAAO,CAAC,EAAE,CAAC;YACnD,UAAU,CAAC,IAAI,CAAC;gBACd,SAAS;gBACT,MAAM,EAAE,UAAU,CAAC,SAAS,CAAC;gBAC7B,IAAI,EAAE,SAAS,CAAC,SAAS,CAAC;aAC3B,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,UAAU,CAAC;AACpB,CAAC;AAED,MAAM,UAAU,qBAAqB,CACnC,QAAyB;IAEzB,MAAM,SAAS,GAAsB,EAAE,CAAC;IAExC,KAAK,MAAM,OAAO,IAAI,gBAAgB,CAAC,QAAQ,CAAC,EAAE,CAAC;QACjD,MAAM,cAAc,GAAG,iBAAiB,CAAC,OAAO,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QACxD,IAAI,cAAc,KAAK,SAAS,EAAE,CAAC;YACjC,SAAS;QACX,CAAC;QAED,MAAM,aAAa,GAAG,kBAAkB,CAAC,cAAc,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAC/D,IAAI,aAAa,KAAK,SAAS,EAAE,CAAC;YAChC,SAAS,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAChC,CAAC;IACH,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,MAAM,UAAU,oBAAoB,CAClC,QAAyB;IAEzB,MAAM,SAAS,GAAsB,EAAE,CAAC;IAExC,KAAK,MAAM,OAAO,IAAI,gBAAgB,CAAC,QAAQ,CAAC,EAAE,CAAC;QACjD,MAAM,aAAa,GAAG,iBAAiB,CAAC,OAAO,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QACxD,IAAI,aAAa,KAAK,SAAS,EAAE,CAAC;YAChC,SAAS;QACX,CAAC;QAED,MAAM,YAAY,GAAG,kBAAkB,CAAC,aAAa,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QAC9D,IAAI,YAAY,KAAK,SAAS,EAAE,CAAC;YAC/B,SAAS,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC/B,CAAC;IACH,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC"}
@@ -0,0 +1,79 @@
1
+ const SENTENCE_SEGMENTER = new Intl.Segmenter("en", {
2
+ granularity: "sentence"
3
+ });
4
+ function isUppercaseLetter(character) {
5
+ if (character === undefined) {
6
+ return false;
7
+ }
8
+ const lower = character.toLocaleLowerCase("en");
9
+ const upper = character.toLocaleUpperCase("en");
10
+ return lower !== upper && character === upper;
11
+ }
12
+ function firstNonWhitespaceIndex(text) {
13
+ for (let index = 0; index < text.length; index += 1) {
14
+ if (text[index]?.trim() !== "") {
15
+ return index;
16
+ }
17
+ }
18
+ return undefined;
19
+ }
20
+ function trimEndIndex(text) {
21
+ for (let index = text.length; index > 0; index -= 1) {
22
+ if (text[index - 1]?.trim() !== "") {
23
+ return index;
24
+ }
25
+ }
26
+ return 0;
27
+ }
28
+ function pushSegment(sentences, text, start, end) {
29
+ const raw = text.slice(start, end);
30
+ const trimStart = firstNonWhitespaceIndex(raw);
31
+ if (trimStart === undefined) {
32
+ return;
33
+ }
34
+ const trimEnd = trimEndIndex(raw);
35
+ sentences.push({
36
+ end: start + trimEnd,
37
+ start: start + trimStart,
38
+ text: raw.slice(trimStart, trimEnd)
39
+ });
40
+ }
41
+ function splitMissingBreakSpacing(segment) {
42
+ const sentences = [];
43
+ let start = 0;
44
+ for (let index = 0; index < segment.text.length - 1; index += 1) {
45
+ const character = segment.text[index];
46
+ const next = segment.text[index + 1];
47
+ if ((character === "." || character === "?" || character === "!") &&
48
+ isUppercaseLetter(next)) {
49
+ pushSegment(sentences, segment.text, start, index + 1);
50
+ start = index + 1;
51
+ }
52
+ }
53
+ pushSegment(sentences, segment.text, start, segment.text.length);
54
+ return sentences.map((sentence) => ({
55
+ end: segment.start + sentence.end,
56
+ start: segment.start + sentence.start,
57
+ text: sentence.text
58
+ }));
59
+ }
60
+ export function splitSentences(text) {
61
+ const sentences = [];
62
+ for (const segment of SENTENCE_SEGMENTER.segment(text)) {
63
+ const trimStart = firstNonWhitespaceIndex(segment.segment);
64
+ if (trimStart === undefined) {
65
+ continue;
66
+ }
67
+ const trimEnd = trimEndIndex(segment.segment);
68
+ const start = segment.index + trimStart;
69
+ const end = segment.index + trimEnd;
70
+ const sentence = {
71
+ end,
72
+ start,
73
+ text: segment.segment.slice(trimStart, trimEnd)
74
+ };
75
+ sentences.push(...splitMissingBreakSpacing(sentence));
76
+ }
77
+ return sentences;
78
+ }
79
+ //# sourceMappingURL=sentences.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"sentences.js","sourceRoot":"","sources":["../../../src/shared/text/sentences.ts"],"names":[],"mappings":"AAMA,MAAM,kBAAkB,GAAG,IAAI,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE;IAClD,WAAW,EAAE,UAAU;CACxB,CAAC,CAAC;AAEH,SAAS,iBAAiB,CAAC,SAA6B;IACtD,IAAI,SAAS,KAAK,SAAS,EAAE,CAAC;QAC5B,OAAO,KAAK,CAAC;IACf,CAAC;IAED,MAAM,KAAK,GAAG,SAAS,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC;IAChD,MAAM,KAAK,GAAG,SAAS,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC;IAEhD,OAAO,KAAK,KAAK,KAAK,IAAI,SAAS,KAAK,KAAK,CAAC;AAChD,CAAC;AAED,SAAS,uBAAuB,CAAC,IAAY;IAC3C,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,IAAI,CAAC,MAAM,EAAE,KAAK,IAAI,CAAC,EAAE,CAAC;QACpD,IAAI,IAAI,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;YAC/B,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,SAAS,YAAY,CAAC,IAAY;IAChC,KAAK,IAAI,KAAK,GAAG,IAAI,CAAC,MAAM,EAAE,KAAK,GAAG,CAAC,EAAE,KAAK,IAAI,CAAC,EAAE,CAAC;QACpD,IAAI,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;YACnC,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,CAAC,CAAC;AACX,CAAC;AAED,SAAS,WAAW,CAClB,SAA0B,EAC1B,IAAY,EACZ,KAAa,EACb,GAAW;IAEX,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;IACnC,MAAM,SAAS,GAAG,uBAAuB,CAAC,GAAG,CAAC,CAAC;IAC/C,IAAI,SAAS,KAAK,SAAS,EAAE,CAAC;QAC5B,OAAO;IACT,CAAC;IAED,MAAM,OAAO,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC;IAElC,SAAS,CAAC,IAAI,CAAC;QACb,GAAG,EAAE,KAAK,GAAG,OAAO;QACpB,KAAK,EAAE,KAAK,GAAG,SAAS;QACxB,IAAI,EAAE,GAAG,CAAC,KAAK,CAAC,SAAS,EAAE,OAAO,CAAC;KACpC,CAAC,CAAC;AACL,CAAC;AAED,SAAS,wBAAwB,CAAC,OAAsB;IACtD,MAAM,SAAS,GAAoB,EAAE,CAAC;IACtC,IAAI,KAAK,GAAG,CAAC,CAAC;IAEd,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,KAAK,IAAI,CAAC,EAAE,CAAC;QAChE,MAAM,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACtC,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;QACrC,IACE,CAAC,SAAS,KAAK,GAAG,IAAI,SAAS,KAAK,GAAG,IAAI,SAAS,KAAK,GAAG,CAAC;YAC7D,iBAAiB,CAAC,IAAI,CAAC,EACvB,CAAC;YACD,WAAW,CAAC,SAAS,EAAE,OAAO,CAAC,IAAI,EAAE,KAAK,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC;YACvD,KAAK,GAAG,KAAK,GAAG,CAAC,CAAC;QACpB,CAAC;IACH,CAAC;IAED,WAAW,CAAC,SAAS,EAAE,OAAO,CAAC,IAAI,EAAE,KAAK,EAAE,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAEjE,OAAO,SAAS,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QAClC,GAAG,EAAE,OAAO,CAAC,KAAK,GAAG,QAAQ,CAAC,GAAG;QACjC,KAAK,EAAE,OAAO,CAAC,KAAK,GAAG,QAAQ,CAAC,KAAK;QACrC,IAAI,EAAE,QAAQ,CAAC,IAAI;KACpB,CAAC,CAAC,CAAC;AACN,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,MAAM,SAAS,GAAoB,EAAE,CAAC;IAEtC,KAAK,MAAM,OAAO,IAAI,kBAAkB,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;QACvD,MAAM,SAAS,GAAG,uBAAuB,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAC3D,IAAI,SAAS,KAAK,SAAS,EAAE,CAAC;YAC5B,SAAS;QACX,CAAC;QAED,MAAM,OAAO,GAAG,YAAY,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAC9C,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,GAAG,SAAS,CAAC;QACxC,MAAM,GAAG,GAAG,OAAO,CAAC,KAAK,GAAG,OAAO,CAAC;QAEpC,MAAM,QAAQ,GAAG;YACf,GAAG;YACH,KAAK;YACL,IAAI,EAAE,OAAO,CAAC,OAAO,CAAC,KAAK,CAAC,SAAS,EAAE,OAAO,CAAC;SAChD,CAAC;QAEF,SAAS,CAAC,IAAI,CAAC,GAAG,wBAAwB,CAAC,QAAQ,CAAC,CAAC,CAAC;IACxD,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC"}
@@ -0,0 +1,18 @@
1
+ import { normalizeForMatch } from "./normalize.js";
2
+ const WORD_SEGMENTER = new Intl.Segmenter("en", { granularity: "word" });
3
+ export function wordTokens(text) {
4
+ const tokens = [];
5
+ for (const segment of WORD_SEGMENTER.segment(text)) {
6
+ if (segment.isWordLike !== true) {
7
+ continue;
8
+ }
9
+ tokens.push({
10
+ end: segment.index + segment.segment.length,
11
+ normalized: normalizeForMatch(segment.segment),
12
+ start: segment.index,
13
+ text: segment.segment
14
+ });
15
+ }
16
+ return tokens;
17
+ }
18
+ //# sourceMappingURL=tokens.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tokens.js","sourceRoot":"","sources":["../../../src/shared/text/tokens.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAC;AAEnD,MAAM,cAAc,GAAG,IAAI,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,EAAE,WAAW,EAAE,MAAM,EAAE,CAAC,CAAC;AASzE,MAAM,UAAU,UAAU,CAAC,IAAY;IACrC,MAAM,MAAM,GAAY,EAAE,CAAC;IAE3B,KAAK,MAAM,OAAO,IAAI,cAAc,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;QACnD,IAAI,OAAO,CAAC,UAAU,KAAK,IAAI,EAAE,CAAC;YAChC,SAAS;QACX,CAAC;QAED,MAAM,CAAC,IAAI,CAAC;YACV,GAAG,EAAE,OAAO,CAAC,KAAK,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM;YAC3C,UAAU,EAAE,iBAAiB,CAAC,OAAO,CAAC,OAAO,CAAC;YAC9C,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,IAAI,EAAE,OAAO,CAAC,OAAO;SACtB,CAAC,CAAC;IACL,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
@@ -0,0 +1,10 @@
1
+ import { StringSource } from "textlint-util-to-string";
2
+ export function sourceText(node) {
3
+ const source = new StringSource(node);
4
+ return {
5
+ originalEndFor: (end) => source.originalIndexFromIndex(end, true) ?? end,
6
+ originalStartFor: (start) => source.originalIndexFromIndex(start) ?? start,
7
+ text: source.toString()
8
+ };
9
+ }
10
+ //# sourceMappingURL=traverse.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"traverse.js","sourceRoot":"","sources":["../../../src/shared/text/traverse.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AAQvD,MAAM,UAAU,UAAU,CAAC,IAAmB;IAC5C,MAAM,MAAM,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,CAAC;IAEtC,OAAO;QACL,cAAc,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,MAAM,CAAC,sBAAsB,CAAC,GAAG,EAAE,IAAI,CAAC,IAAI,GAAG;QACxE,gBAAgB,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,MAAM,CAAC,sBAAsB,CAAC,KAAK,CAAC,IAAI,KAAK;QAC1E,IAAI,EAAE,MAAM,CAAC,QAAQ,EAAE;KACxB,CAAC;AACJ,CAAC"}
@@ -0,0 +1,25 @@
1
+ export function isWhitespace(value) {
2
+ return value === undefined || value.trim() === "";
3
+ }
4
+ export function splitWhitespace(text) {
5
+ const words = [];
6
+ let current = "";
7
+ for (const character of text) {
8
+ if (character.trim() === "") {
9
+ if (current !== "") {
10
+ words.push(current);
11
+ current = "";
12
+ }
13
+ continue;
14
+ }
15
+ current += character;
16
+ }
17
+ if (current !== "") {
18
+ words.push(current);
19
+ }
20
+ return words;
21
+ }
22
+ export function countWhitespaceSeparatedWords(text) {
23
+ return splitWhitespace(text).length;
24
+ }
25
+ //# sourceMappingURL=whitespace.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"whitespace.js","sourceRoot":"","sources":["../../../src/shared/text/whitespace.ts"],"names":[],"mappings":"AAAA,MAAM,UAAU,YAAY,CAAC,KAAyB;IACpD,OAAO,KAAK,KAAK,SAAS,IAAI,KAAK,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC;AACpD,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,IAAY;IAC1C,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,IAAI,OAAO,GAAG,EAAE,CAAC;IAEjB,KAAK,MAAM,SAAS,IAAI,IAAI,EAAE,CAAC;QAC7B,IAAI,SAAS,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;YAC5B,IAAI,OAAO,KAAK,EAAE,EAAE,CAAC;gBACnB,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBACpB,OAAO,GAAG,EAAE,CAAC;YACf,CAAC;YACD,SAAS;QACX,CAAC;QAED,OAAO,IAAI,SAAS,CAAC;IACvB,CAAC;IAED,IAAI,OAAO,KAAK,EAAE,EAAE,CAAC;QACnB,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACtB,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,6BAA6B,CAAC,IAAY;IACxD,OAAO,eAAe,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC;AACtC,CAAC"}
package/package.json ADDED
@@ -0,0 +1,115 @@
1
+ {
2
+ "name": "slopless",
3
+ "version": "0.1.0",
4
+ "description": "Deterministic textlint rules for detecting slop in prose.",
5
+ "license": "MIT",
6
+ "repository": {
7
+ "type": "git",
8
+ "url": "git+https://github.com/agent-quality-controls/slopless.git",
9
+ "directory": "packages/textlint-rules"
10
+ },
11
+ "homepage": "https://github.com/agent-quality-controls/slopless#readme",
12
+ "bugs": {
13
+ "url": "https://github.com/agent-quality-controls/slopless/issues"
14
+ },
15
+ "type": "module",
16
+ "engines": {
17
+ "node": ">=20.0.0"
18
+ },
19
+ "scripts": {
20
+ "build": "rm -rf dist && tsc -p tsconfig.json",
21
+ "format": "prettier --write .",
22
+ "format:check": "prettier --check .",
23
+ "lint": "eslint --max-warnings 0 .",
24
+ "lint:css": "stylelint --max-warnings 0 \"styles/**/*.css\"",
25
+ "spellcheck": "cspell .",
26
+ "typecov": "type-coverage --at-least 100",
27
+ "validate": "npm run build && npm run lint && npm run lint:css && npm run format:check && npm run spellcheck && npm run typecov && g3ts validate --path . --rules-only"
28
+ },
29
+ "files": [
30
+ "dist",
31
+ "README.md"
32
+ ],
33
+ "exports": {
34
+ ".": "./dist/index.js",
35
+ "./families/metrics/avg-sentence-length": "./dist/families/metrics/avg-sentence-length.js",
36
+ "./families/metrics/coleman-liau": "./dist/families/metrics/coleman-liau.js",
37
+ "./families/metrics/flesch-kincaid": "./dist/families/metrics/flesch-kincaid.js",
38
+ "./families/metrics/gunning-fog": "./dist/families/metrics/gunning-fog.js",
39
+ "./families/metrics/paragraph-length": "./dist/families/metrics/paragraph-length.js",
40
+ "./families/metrics/word-repetition": "./dist/families/metrics/word-repetition.js",
41
+ "./families/orthography/colon-dramatic": "./dist/families/orthography/colon-dramatic.js",
42
+ "./families/orthography/em-dashes": "./dist/families/orthography/em-dashes.js",
43
+ "./families/orthography/exclamation-density": "./dist/families/orthography/exclamation-density.js",
44
+ "./families/orthography/fake-timestamps": "./dist/families/orthography/fake-timestamps.js",
45
+ "./families/orthography/sentence-case": "./dist/families/orthography/sentence-case.js",
46
+ "./families/orthography/smart-quotes": "./dist/families/orthography/smart-quotes.js",
47
+ "./families/phrases/cliches": "./dist/families/phrases/cliches.js",
48
+ "./families/phrases/corporate-speak": "./dist/families/phrases/corporate-speak.js",
49
+ "./families/phrases/humble-bragger": "./dist/families/phrases/humble-bragger.js",
50
+ "./families/phrases/jargon-faker": "./dist/families/phrases/jargon-faker.js",
51
+ "./families/phrases/llm-disclaimer": "./dist/families/phrases/llm-disclaimer.js",
52
+ "./families/phrases/prohibited-phrases": "./dist/families/phrases/prohibited-phrases.js",
53
+ "./families/phrases/skunked-terms": "./dist/families/phrases/skunked-terms.js",
54
+ "./families/phrases/uncomparables": "./dist/families/phrases/uncomparables.js",
55
+ "./families/semantic-thinness/semantic-thinness": "./dist/families/semantic-thinness/semantic-thinness.js",
56
+ "./families/term-policy/recommended-terms": "./dist/families/term-policy/recommended-terms.js",
57
+ "./families/term-policy/required-terms": "./dist/families/term-policy/required-terms.js",
58
+ "./families/syntactic-patterns/authority/authority-padding": "./dist/families/syntactic-patterns/authority/authority-padding.js",
59
+ "./families/syntactic-patterns/closers/affirmation-closers": "./dist/families/syntactic-patterns/closers/affirmation-closers.js",
60
+ "./families/syntactic-patterns/closers/boilerplate-conclusion": "./dist/families/syntactic-patterns/closers/boilerplate-conclusion.js",
61
+ "./families/syntactic-patterns/closers/false-question": "./dist/families/syntactic-patterns/closers/false-question.js",
62
+ "./families/syntactic-patterns/closers/summative-closer": "./dist/families/syntactic-patterns/closers/summative-closer.js",
63
+ "./families/syntactic-patterns/contrast/blame-reframe": "./dist/families/syntactic-patterns/contrast/blame-reframe.js",
64
+ "./families/syntactic-patterns/contrast/contrastive-aphorism": "./dist/families/syntactic-patterns/contrast/contrastive-aphorism.js",
65
+ "./families/syntactic-patterns/contrast/negation-reframe": "./dist/families/syntactic-patterns/contrast/negation-reframe.js",
66
+ "./families/syntactic-patterns/generalization/softening-language": "./dist/families/syntactic-patterns/generalization/softening-language.js",
67
+ "./families/syntactic-patterns/generalization/universalizing-claims": "./dist/families/syntactic-patterns/generalization/universalizing-claims.js",
68
+ "./families/syntactic-patterns/llm-artifacts/response-wrapper": "./dist/families/syntactic-patterns/llm-artifacts/response-wrapper.js",
69
+ "./families/syntactic-patterns/lead-ins/boilerplate-framing": "./dist/families/syntactic-patterns/lead-ins/boilerplate-framing.js",
70
+ "./families/syntactic-patterns/lead-ins/generic-signposting": "./dist/families/syntactic-patterns/lead-ins/generic-signposting.js",
71
+ "./families/syntactic-patterns/lead-ins/lesson-framing": "./dist/families/syntactic-patterns/lead-ins/lesson-framing.js",
72
+ "./families/syntactic-patterns/lead-ins/llm-openers": "./dist/families/syntactic-patterns/lead-ins/llm-openers.js",
73
+ "./families/syntactic-patterns/lead-ins/observer-guidance": "./dist/families/syntactic-patterns/lead-ins/observer-guidance.js",
74
+ "./families/syntactic-patterns/repetition/demonstrative-emphasis": "./dist/families/syntactic-patterns/repetition/demonstrative-emphasis.js",
75
+ "./families/syntactic-patterns/repetition/empty-emphasis": "./dist/families/syntactic-patterns/repetition/empty-emphasis.js",
76
+ "./families/syntactic-patterns/repetition/fragment-stacking": "./dist/families/syntactic-patterns/repetition/fragment-stacking.js",
77
+ "./families/syntactic-patterns/repetition/triple-repeat": "./dist/families/syntactic-patterns/repetition/triple-repeat.js",
78
+ "./families/words/hedge-stacking": "./dist/families/words/hedge-stacking.js",
79
+ "./families/words/llm-vocabulary": "./dist/families/words/llm-vocabulary.js",
80
+ "./families/words/simplicity": "./dist/families/words/simplicity.js",
81
+ "./families/words/prohibited-words": "./dist/families/words/prohibited-words.js"
82
+ },
83
+ "dependencies": {
84
+ "@lunarisapp/readability": "^1.1.0",
85
+ "sentence-splitter": "5.0.1",
86
+ "textlint-rule-helper": "2.5.0",
87
+ "textlint-util-to-string": "3.3.4"
88
+ },
89
+ "peerDependencies": {
90
+ "textlint": "^15.6.1"
91
+ },
92
+ "devDependencies": {
93
+ "@double-great/stylelint-a11y": "3.4.12",
94
+ "@eslint-community/eslint-plugin-eslint-comments": "4.7.1",
95
+ "@textlint/ast-node-types": "15.6.1",
96
+ "@textlint/types": "15.6.1",
97
+ "@typescript-eslint/eslint-plugin": "8.59.3",
98
+ "@typescript-eslint/parser": "8.59.3",
99
+ "cspell": "10.0.0",
100
+ "eslint": "9.39.1",
101
+ "eslint-plugin-import-x": "4.16.2",
102
+ "eslint-plugin-regexp": "3.1.0",
103
+ "eslint-plugin-sonarjs": "4.0.3",
104
+ "eslint-plugin-unicorn": "64.0.0",
105
+ "g3ts-eslint-plugin-style-policy": "0.1.3",
106
+ "jscpd": "4.1.1",
107
+ "prettier": "3.8.3",
108
+ "stylelint": "17.11.0",
109
+ "stylelint-config-standard": "40.0.0",
110
+ "stylelint-config-tailwindcss": "1.0.1",
111
+ "textlint": "15.6.1",
112
+ "type-coverage": "2.29.7",
113
+ "typescript": "5.9.3"
114
+ }
115
+ }