echokyt 0.0.2 β†’ 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/README.md +52 -6
  2. package/dist/analyze/index.d.ts +2 -0
  3. package/dist/analyze/index.js +2 -0
  4. package/dist/analyze/readingEase.d.ts +4 -0
  5. package/dist/analyze/readingEase.js +31 -0
  6. package/dist/cjs/analyze/index.cjs +18 -0
  7. package/dist/cjs/analyze/readingEase.cjs +35 -0
  8. package/dist/cjs/analyze/readingTime.cjs +14 -0
  9. package/dist/cjs/echokyt.cjs +66 -0
  10. package/dist/cjs/format/index.cjs +18 -0
  11. package/dist/cjs/format/slugify.cjs +16 -0
  12. package/dist/cjs/format/truncate.cjs +15 -0
  13. package/dist/cjs/index.cjs +21 -0
  14. package/dist/cjs/parse/hashtags.cjs +22 -0
  15. package/dist/cjs/parse/index.cjs +18 -0
  16. package/dist/cjs/parse/keywordDensity.cjs +35 -0
  17. package/dist/cjs/parse/mentions.cjs +22 -0
  18. package/dist/cjs/parse/stopWords.cjs +129 -0
  19. package/dist/cjs/sanitize/cleanWhitespace.cjs +12 -0
  20. package/dist/cjs/sanitize/index.cjs +16 -0
  21. package/dist/cjs/sanitize/normalizeQuotes.cjs +12 -0
  22. package/dist/cjs/sanitize/removeScriptsOrStyles.cjs +14 -0
  23. package/dist/cjs/sanitize/stripHtml.cjs +22 -0
  24. package/dist/cjs/sanitize/stripInvisibleChars.cjs +13 -0
  25. package/dist/cjs/sanitize/stripPromptPatterns.cjs +21 -0
  26. package/dist/echokyt.d.ts +22 -0
  27. package/dist/echokyt.js +62 -0
  28. package/dist/format/index.d.ts +2 -0
  29. package/dist/format/index.js +2 -0
  30. package/dist/index.d.ts +5 -3
  31. package/dist/index.js +5 -3
  32. package/dist/parse/hashtags.d.ts +9 -0
  33. package/dist/parse/hashtags.js +18 -0
  34. package/dist/parse/index.d.ts +14 -0
  35. package/dist/parse/index.js +10 -0
  36. package/dist/parse/keywordDensity.d.ts +9 -0
  37. package/dist/parse/keywordDensity.js +31 -0
  38. package/dist/parse/mentions.d.ts +9 -0
  39. package/dist/parse/mentions.js +18 -0
  40. package/dist/parse/stopWords.d.ts +1 -0
  41. package/dist/parse/stopWords.js +126 -0
  42. package/dist/sanitize/cleanWhitespace.d.ts +4 -0
  43. package/dist/sanitize/cleanWhitespace.js +8 -0
  44. package/dist/sanitize/index.d.ts +6 -0
  45. package/dist/sanitize/index.js +6 -0
  46. package/dist/sanitize/normalizeQuotes.d.ts +4 -0
  47. package/dist/sanitize/normalizeQuotes.js +8 -0
  48. package/dist/sanitize/removeScriptsOrStyles.d.ts +4 -0
  49. package/dist/sanitize/removeScriptsOrStyles.js +10 -0
  50. package/dist/sanitize/stripHtml.d.ts +4 -0
  51. package/dist/sanitize/stripHtml.js +18 -0
  52. package/dist/sanitize/stripInvisibleChars.d.ts +4 -0
  53. package/dist/sanitize/stripInvisibleChars.js +9 -0
  54. package/dist/sanitize/stripPromptPatterns.d.ts +5 -0
  55. package/dist/sanitize/stripPromptPatterns.js +17 -0
  56. package/package.json +94 -7
package/README.md CHANGED
@@ -1,19 +1,65 @@
1
- # πŸ“¦ EchoKit
1
+ # πŸ“¦ EchoKyt
2
2
 
3
- A lightweight, zero-dependency utility library for smart text manipulation in TypeScript.
3
+ A lightweight, zero-runtime-dependency utility library for smart text manipulation in TypeScript.
4
4
 
5
- [![Tests](https://github.com/nyigoro/EchoKit/actions/workflows/test.yml/badge.svg)](https://github.com/nyigoro/EchoKit/actions)
5
+ [![Tests](https://github.com/nyigoro/EchoKit/actions/workflows/ci.yml/badge.svg)](https://github.com/nyigoro/EchoKit/actions)
6
6
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
7
7
 
8
8
  ## ✨ Features
9
9
 
10
+ - **Fluent API**: Chainable, readable text pipelines.
11
+ - **Parser Utilities**: Mentions, hashtags, keyword density.
12
+ - **Sanitization & Safety**: Strip HTML, scripts/styles, normalize quotes.
13
+ - **Prompt Hygiene**: Clean whitespace, invisible chars, and common markers.
10
14
  - **Smart Truncate**: Cuts text at word boundaries, not mid-word.
11
- - **Reading Time**: Quick estimation for blog posts.
12
- - **Zero Dependencies**: Keep your bundle size tiny.
15
+ - **Reading Metrics**: Reading time + Flesch Reading Ease score.
13
16
  - **TypeScript Ready**: Full type definitions included.
14
17
 
15
18
  ## πŸš€ Installation
16
19
 
17
20
  ```bash
18
- npm install echo-kit
21
+ npm install echokyt
19
22
  ```
23
+
24
+ ## βœ… Quick Example (Fluent)
25
+
26
+ ```ts
27
+ import { EchoKyt } from 'echokyt';
28
+
29
+ const score = EchoKyt.from('<p>β€œHello” <strong>world</strong><script>alert(1)</script></p>')
30
+ .removeScriptsOrStyles()
31
+ .stripHtml(['strong', 'em'])
32
+ .normalizeQuotes()
33
+ .getReadingEase();
34
+
35
+ // Flesch Reading Ease score (approx.)
36
+ ```
37
+
38
+ ## βœ… Quick Example (Functional)
39
+
40
+ ```ts
41
+ import {
42
+ extractMentions,
43
+ extractHashtags,
44
+ getKeywordDensity,
45
+ stripHtml,
46
+ normalizeQuotes,
47
+ cleanWhitespace,
48
+ } from 'echokyt';
49
+
50
+ const text = 'Hello @alice, welcome to #EchoKyt!';
51
+
52
+ extractMentions(text); // [{ value: 'alice', index: 6, raw: '@alice' }]
53
+ extractHashtags(text); // [{ value: 'EchoKyt', index: 29, raw: '#EchoKyt' }]
54
+ getKeywordDensity(text); // { hello: 0.25, alice: 0.25, welcome: 0.25, echokyt: 0.25 }
55
+ stripHtml('<p>Hello <strong>World</strong></p>'); // "Hello World"
56
+ normalizeQuotes('β€œHello”'); // "\"Hello\""
57
+ cleanWhitespace(' hi there '); // "hi there"
58
+ ```
59
+
60
+ ## πŸ“Š Reading Ease Guide (Flesch)
61
+
62
+ - 90–100: Very Easy
63
+ - 60–70: Standard / Plain English
64
+ - 30–50: Academic
65
+ - 0–30: Very Difficult
@@ -0,0 +1,2 @@
1
+ export * from './readingTime.js';
2
+ export * from './readingEase.js';
@@ -0,0 +1,2 @@
1
+ export * from './readingTime.js';
2
+ export * from './readingEase.js';
@@ -0,0 +1,4 @@
1
+ /**
2
+ * Computes the Flesch Reading Ease score (approximate, English heuristic).
3
+ */
4
+ export declare const getReadingEase: (text: string) => number;
@@ -0,0 +1,31 @@
1
+ const WORD_REGEX = /[A-Za-z]+(?:'[A-Za-z]+)*/g;
2
+ const SENTENCE_REGEX = /[.!?]+/g;
3
+ const countSyllables = (raw) => {
4
+ const word = raw.toLowerCase().replace(/[^a-z]/g, '');
5
+ if (!word)
6
+ return 0;
7
+ if (word.length <= 3)
8
+ return 1;
9
+ const groups = word.match(/[aeiouy]+/g);
10
+ let syllables = groups ? groups.length : 0;
11
+ if (word.endsWith('e') && !word.endsWith('le') && !word.endsWith('ye')) {
12
+ syllables -= 1;
13
+ }
14
+ return syllables > 0 ? syllables : 1;
15
+ };
16
+ /**
17
+ * Computes the Flesch Reading Ease score (approximate, English heuristic).
18
+ */
19
+ export const getReadingEase = (text) => {
20
+ if (!text || !text.trim())
21
+ return 0;
22
+ const sanitized = text.replace(/<[^>]*>/g, ' ');
23
+ const words = sanitized.match(WORD_REGEX) ?? [];
24
+ if (words.length === 0)
25
+ return 0;
26
+ const sentences = (sanitized.match(SENTENCE_REGEX) ?? []).length || 1;
27
+ const syllables = words.reduce((sum, word) => sum + countSyllables(word), 0);
28
+ const wordsPerSentence = words.length / sentences;
29
+ const syllablesPerWord = syllables / words.length;
30
+ return 206.835 - 1.015 * wordsPerSentence - 84.6 * syllablesPerWord;
31
+ };
@@ -0,0 +1,18 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
+ for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
+ };
16
+ Object.defineProperty(exports, "__esModule", { value: true });
17
+ __exportStar(require("./readingTime.js"), exports);
18
+ __exportStar(require("./readingEase.js"), exports);
@@ -0,0 +1,35 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.getReadingEase = void 0;
4
+ const WORD_REGEX = /[A-Za-z]+(?:'[A-Za-z]+)*/g;
5
+ const SENTENCE_REGEX = /[.!?]+/g;
6
+ const countSyllables = (raw) => {
7
+ const word = raw.toLowerCase().replace(/[^a-z]/g, '');
8
+ if (!word)
9
+ return 0;
10
+ if (word.length <= 3)
11
+ return 1;
12
+ const groups = word.match(/[aeiouy]+/g);
13
+ let syllables = groups ? groups.length : 0;
14
+ if (word.endsWith('e') && !word.endsWith('le') && !word.endsWith('ye')) {
15
+ syllables -= 1;
16
+ }
17
+ return syllables > 0 ? syllables : 1;
18
+ };
19
+ /**
20
+ * Computes the Flesch Reading Ease score (approximate, English heuristic).
21
+ */
22
+ const getReadingEase = (text) => {
23
+ if (!text || !text.trim())
24
+ return 0;
25
+ const sanitized = text.replace(/<[^>]*>/g, ' ');
26
+ const words = sanitized.match(WORD_REGEX) ?? [];
27
+ if (words.length === 0)
28
+ return 0;
29
+ const sentences = (sanitized.match(SENTENCE_REGEX) ?? []).length || 1;
30
+ const syllables = words.reduce((sum, word) => sum + countSyllables(word), 0);
31
+ const wordsPerSentence = words.length / sentences;
32
+ const syllablesPerWord = syllables / words.length;
33
+ return 206.835 - 1.015 * wordsPerSentence - 84.6 * syllablesPerWord;
34
+ };
35
+ exports.getReadingEase = getReadingEase;
@@ -0,0 +1,14 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.readingTime = void 0;
4
+ /**
5
+ * Estimates reading time in minutes.
6
+ * Assumes an average reading speed of 200 WPM.
7
+ */
8
+ const readingTime = (text, wordsPerMinute = 200) => {
9
+ if (!text.trim())
10
+ return 0;
11
+ const words = text.trim().split(/\s+/).length;
12
+ return Math.ceil(words / wordsPerMinute);
13
+ };
14
+ exports.readingTime = readingTime;
@@ -0,0 +1,66 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.EchoKyt = void 0;
4
+ const readingTime_js_1 = require("./analyze/readingTime.js");
5
+ const readingEase_js_1 = require("./analyze/readingEase.js");
6
+ const slugify_js_1 = require("./format/slugify.js");
7
+ const truncate_js_1 = require("./format/truncate.js");
8
+ const index_js_1 = require("./parse/index.js");
9
+ const index_js_2 = require("./sanitize/index.js");
10
+ class EchoKyt {
11
+ constructor(text) {
12
+ this.text = text;
13
+ }
14
+ static from(text) {
15
+ return new EchoKyt(text ?? '');
16
+ }
17
+ value() {
18
+ return this.text;
19
+ }
20
+ toString() {
21
+ return this.text;
22
+ }
23
+ cleanWhitespace() {
24
+ return new EchoKyt((0, index_js_2.cleanWhitespace)(this.text));
25
+ }
26
+ stripInvisible() {
27
+ return new EchoKyt((0, index_js_2.stripInvisibleChars)(this.text));
28
+ }
29
+ removeScriptsOrStyles() {
30
+ return new EchoKyt((0, index_js_2.removeScriptsOrStyles)(this.text));
31
+ }
32
+ stripHtml(allowedTags) {
33
+ return new EchoKyt((0, index_js_2.stripHtml)(this.text, allowedTags));
34
+ }
35
+ normalizeQuotes() {
36
+ return new EchoKyt((0, index_js_2.normalizeQuotes)(this.text));
37
+ }
38
+ stripPromptPatterns(patterns) {
39
+ return new EchoKyt((0, index_js_2.stripPromptPatterns)(this.text, patterns));
40
+ }
41
+ truncate(limit) {
42
+ return new EchoKyt((0, truncate_js_1.smartTruncate)(this.text, limit));
43
+ }
44
+ slug() {
45
+ return new EchoKyt((0, slugify_js_1.slugify)(this.text));
46
+ }
47
+ mentions() {
48
+ return (0, index_js_1.extractMentions)(this.text);
49
+ }
50
+ hashtags() {
51
+ return (0, index_js_1.extractHashtags)(this.text);
52
+ }
53
+ parse() {
54
+ return (0, index_js_1.parseMentionsAndHashtags)(this.text);
55
+ }
56
+ keywordDensity(options) {
57
+ return (0, index_js_1.getKeywordDensity)(this.text, options);
58
+ }
59
+ readingTime(wordsPerMinute) {
60
+ return (0, readingTime_js_1.readingTime)(this.text, wordsPerMinute);
61
+ }
62
+ getReadingEase() {
63
+ return (0, readingEase_js_1.getReadingEase)(this.text);
64
+ }
65
+ }
66
+ exports.EchoKyt = EchoKyt;
@@ -0,0 +1,18 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
+ for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
+ };
16
+ Object.defineProperty(exports, "__esModule", { value: true });
17
+ __exportStar(require("./slugify.js"), exports);
18
+ __exportStar(require("./truncate.js"), exports);
@@ -0,0 +1,16 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.slugify = void 0;
4
+ /**
5
+ * Converts a string into a URL-friendly slug.
6
+ * Example: "Hello World!" -> "hello-world"
7
+ */
8
+ const slugify = (text) => {
9
+ return text
10
+ .toLowerCase()
11
+ .trim()
12
+ .replace(/[^\w\s-]/g, '') // Remove special characters
13
+ .replace(/[\s_-]+/g, '-') // Replace spaces/underscores with a single hyphen
14
+ .replace(/^-+|-+$/g, ''); // Remove leading/trailing hyphens
15
+ };
16
+ exports.slugify = slugify;
@@ -0,0 +1,15 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.smartTruncate = void 0;
4
+ /**
5
+ * Truncates text at the nearest whole word.
6
+ */
7
+ const smartTruncate = (text, limit) => {
8
+ if (text.length <= limit)
9
+ return text;
10
+ const truncated = text.slice(0, limit);
11
+ const lastSpace = truncated.lastIndexOf(' ');
12
+ const cleanCut = lastSpace > 0 ? truncated.slice(0, lastSpace) : truncated;
13
+ return `${cleanCut}...`;
14
+ };
15
+ exports.smartTruncate = smartTruncate;
@@ -0,0 +1,21 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
+ for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
+ };
16
+ Object.defineProperty(exports, "__esModule", { value: true });
17
+ __exportStar(require("./format/index.js"), exports);
18
+ __exportStar(require("./analyze/index.js"), exports);
19
+ __exportStar(require("./parse/index.js"), exports);
20
+ __exportStar(require("./sanitize/index.js"), exports);
21
+ __exportStar(require("./echokyt.js"), exports);
@@ -0,0 +1,22 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.extractHashtags = void 0;
4
+ const HASHTAG_REGEX = /(^|[^\p{L}\p{M}0-9_])#([\p{L}\p{M}0-9_]{1,64})/gu;
5
+ /**
6
+ * Extracts #hashtags from text.
7
+ */
8
+ const extractHashtags = (text) => {
9
+ if (!text)
10
+ return [];
11
+ const results = [];
12
+ let match;
13
+ HASHTAG_REGEX.lastIndex = 0;
14
+ while ((match = HASHTAG_REGEX.exec(text)) !== null) {
15
+ const prefix = match[1];
16
+ const value = match[2];
17
+ const index = match.index + prefix.length;
18
+ results.push({ value, index, raw: `#${value}` });
19
+ }
20
+ return results;
21
+ };
22
+ exports.extractHashtags = extractHashtags;
@@ -0,0 +1,18 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.parseMentionsAndHashtags = exports.DEFAULT_STOP_WORDS = exports.getKeywordDensity = exports.extractHashtags = exports.extractMentions = void 0;
4
+ const mentions_js_1 = require("./mentions.js");
5
+ const hashtags_js_1 = require("./hashtags.js");
6
+ var mentions_js_2 = require("./mentions.js");
7
+ Object.defineProperty(exports, "extractMentions", { enumerable: true, get: function () { return mentions_js_2.extractMentions; } });
8
+ var hashtags_js_2 = require("./hashtags.js");
9
+ Object.defineProperty(exports, "extractHashtags", { enumerable: true, get: function () { return hashtags_js_2.extractHashtags; } });
10
+ var keywordDensity_js_1 = require("./keywordDensity.js");
11
+ Object.defineProperty(exports, "getKeywordDensity", { enumerable: true, get: function () { return keywordDensity_js_1.getKeywordDensity; } });
12
+ var stopWords_js_1 = require("./stopWords.js");
13
+ Object.defineProperty(exports, "DEFAULT_STOP_WORDS", { enumerable: true, get: function () { return stopWords_js_1.DEFAULT_STOP_WORDS; } });
14
+ const parseMentionsAndHashtags = (text) => ({
15
+ mentions: (0, mentions_js_1.extractMentions)(text),
16
+ hashtags: (0, hashtags_js_1.extractHashtags)(text),
17
+ });
18
+ exports.parseMentionsAndHashtags = parseMentionsAndHashtags;
@@ -0,0 +1,35 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.getKeywordDensity = void 0;
4
+ const stopWords_js_1 = require("./stopWords.js");
5
+ const WORD_REGEX = /[\p{L}\p{M}0-9_']+/gu;
6
+ /**
7
+ * Returns keyword frequency (0-1) for filtered tokens in the text.
8
+ */
9
+ const getKeywordDensity = (text, options = {}) => {
10
+ if (!text || !text.trim())
11
+ return {};
12
+ const minLength = options.minLength ?? 3;
13
+ const normalize = options.normalize ?? true;
14
+ const stopWords = options.stopWords ?? [...stopWords_js_1.DEFAULT_STOP_WORDS];
15
+ const stopSet = new Set(normalize ? stopWords.map((word) => word.toLowerCase()) : stopWords);
16
+ const tokens = text.match(WORD_REGEX) ?? [];
17
+ const counts = {};
18
+ let total = 0;
19
+ for (const raw of tokens) {
20
+ const token = normalize ? raw.toLowerCase() : raw;
21
+ if (token.length < minLength)
22
+ continue;
23
+ if (stopSet.has(token))
24
+ continue;
25
+ counts[token] = (counts[token] ?? 0) + 1;
26
+ total += 1;
27
+ }
28
+ if (total === 0)
29
+ return {};
30
+ for (const key of Object.keys(counts)) {
31
+ counts[key] = counts[key] / total;
32
+ }
33
+ return counts;
34
+ };
35
+ exports.getKeywordDensity = getKeywordDensity;
@@ -0,0 +1,22 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.extractMentions = void 0;
4
+ const MENTION_REGEX = /(^|[^\p{L}\p{M}0-9_])@([\p{L}\p{M}0-9_]{1,32})/gu;
5
+ /**
6
+ * Extracts @mentions from text.
7
+ */
8
+ const extractMentions = (text) => {
9
+ if (!text)
10
+ return [];
11
+ const results = [];
12
+ let match;
13
+ MENTION_REGEX.lastIndex = 0;
14
+ while ((match = MENTION_REGEX.exec(text)) !== null) {
15
+ const prefix = match[1];
16
+ const value = match[2];
17
+ const index = match.index + prefix.length;
18
+ results.push({ value, index, raw: `@${value}` });
19
+ }
20
+ return results;
21
+ };
22
+ exports.extractMentions = extractMentions;
@@ -0,0 +1,129 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.DEFAULT_STOP_WORDS = void 0;
4
+ exports.DEFAULT_STOP_WORDS = [
5
+ 'a',
6
+ 'about',
7
+ 'above',
8
+ 'after',
9
+ 'again',
10
+ 'against',
11
+ 'all',
12
+ 'am',
13
+ 'an',
14
+ 'and',
15
+ 'any',
16
+ 'are',
17
+ 'as',
18
+ 'at',
19
+ 'be',
20
+ 'because',
21
+ 'been',
22
+ 'before',
23
+ 'being',
24
+ 'below',
25
+ 'between',
26
+ 'both',
27
+ 'but',
28
+ 'by',
29
+ 'can',
30
+ 'did',
31
+ 'do',
32
+ 'does',
33
+ 'doing',
34
+ 'down',
35
+ 'during',
36
+ 'each',
37
+ 'few',
38
+ 'for',
39
+ 'from',
40
+ 'further',
41
+ 'had',
42
+ 'has',
43
+ 'have',
44
+ 'having',
45
+ 'he',
46
+ 'her',
47
+ 'here',
48
+ 'hers',
49
+ 'herself',
50
+ 'him',
51
+ 'himself',
52
+ 'his',
53
+ 'how',
54
+ 'i',
55
+ 'if',
56
+ 'in',
57
+ 'into',
58
+ 'is',
59
+ 'it',
60
+ 'its',
61
+ 'itself',
62
+ 'just',
63
+ 'me',
64
+ 'more',
65
+ 'most',
66
+ 'my',
67
+ 'myself',
68
+ 'no',
69
+ 'nor',
70
+ 'not',
71
+ 'now',
72
+ 'of',
73
+ 'off',
74
+ 'on',
75
+ 'once',
76
+ 'only',
77
+ 'or',
78
+ 'other',
79
+ 'our',
80
+ 'ours',
81
+ 'ourselves',
82
+ 'out',
83
+ 'over',
84
+ 'own',
85
+ 'same',
86
+ 'she',
87
+ 'should',
88
+ 'so',
89
+ 'some',
90
+ 'such',
91
+ 'than',
92
+ 'that',
93
+ 'the',
94
+ 'their',
95
+ 'theirs',
96
+ 'them',
97
+ 'themselves',
98
+ 'then',
99
+ 'there',
100
+ 'these',
101
+ 'they',
102
+ 'this',
103
+ 'those',
104
+ 'through',
105
+ 'to',
106
+ 'too',
107
+ 'under',
108
+ 'until',
109
+ 'up',
110
+ 'very',
111
+ 'was',
112
+ 'we',
113
+ 'were',
114
+ 'what',
115
+ 'when',
116
+ 'where',
117
+ 'which',
118
+ 'while',
119
+ 'who',
120
+ 'whom',
121
+ 'why',
122
+ 'with',
123
+ 'would',
124
+ 'you',
125
+ 'your',
126
+ 'yours',
127
+ 'yourself',
128
+ 'yourselves',
129
+ ];
@@ -0,0 +1,12 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.cleanWhitespace = void 0;
4
+ /**
5
+ * Collapses whitespace and trims the ends.
6
+ */
7
+ const cleanWhitespace = (text) => {
8
+ if (!text)
9
+ return '';
10
+ return text.replace(/\s+/g, ' ').trim();
11
+ };
12
+ exports.cleanWhitespace = cleanWhitespace;
@@ -0,0 +1,16 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.normalizeQuotes = exports.removeScriptsOrStyles = exports.stripHtml = exports.DEFAULT_PROMPT_PATTERNS = exports.stripPromptPatterns = exports.stripInvisibleChars = exports.cleanWhitespace = void 0;
4
+ var cleanWhitespace_js_1 = require("./cleanWhitespace.js");
5
+ Object.defineProperty(exports, "cleanWhitespace", { enumerable: true, get: function () { return cleanWhitespace_js_1.cleanWhitespace; } });
6
+ var stripInvisibleChars_js_1 = require("./stripInvisibleChars.js");
7
+ Object.defineProperty(exports, "stripInvisibleChars", { enumerable: true, get: function () { return stripInvisibleChars_js_1.stripInvisibleChars; } });
8
+ var stripPromptPatterns_js_1 = require("./stripPromptPatterns.js");
9
+ Object.defineProperty(exports, "stripPromptPatterns", { enumerable: true, get: function () { return stripPromptPatterns_js_1.stripPromptPatterns; } });
10
+ Object.defineProperty(exports, "DEFAULT_PROMPT_PATTERNS", { enumerable: true, get: function () { return stripPromptPatterns_js_1.DEFAULT_PROMPT_PATTERNS; } });
11
+ var stripHtml_js_1 = require("./stripHtml.js");
12
+ Object.defineProperty(exports, "stripHtml", { enumerable: true, get: function () { return stripHtml_js_1.stripHtml; } });
13
+ var removeScriptsOrStyles_js_1 = require("./removeScriptsOrStyles.js");
14
+ Object.defineProperty(exports, "removeScriptsOrStyles", { enumerable: true, get: function () { return removeScriptsOrStyles_js_1.removeScriptsOrStyles; } });
15
+ var normalizeQuotes_js_1 = require("./normalizeQuotes.js");
16
+ Object.defineProperty(exports, "normalizeQuotes", { enumerable: true, get: function () { return normalizeQuotes_js_1.normalizeQuotes; } });
@@ -0,0 +1,12 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.normalizeQuotes = void 0;
4
+ /**
5
+ * Normalizes smart quotes to straight quotes.
6
+ */
7
+ const normalizeQuotes = (text) => {
8
+ if (!text)
9
+ return '';
10
+ return text.replace(/[β€œβ€]/g, '"').replace(/[β€˜β€™]/g, "'");
11
+ };
12
+ exports.normalizeQuotes = normalizeQuotes;
@@ -0,0 +1,14 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.removeScriptsOrStyles = void 0;
4
+ const SCRIPT_REGEX = /<script\b[^>]*>[\s\S]*?<\/script>/gi;
5
+ const STYLE_REGEX = /<style\b[^>]*>[\s\S]*?<\/style>/gi;
6
+ /**
7
+ * Removes <script> and <style> blocks entirely. Preserves <noscript>.
8
+ */
9
+ const removeScriptsOrStyles = (text) => {
10
+ if (!text)
11
+ return '';
12
+ return text.replace(SCRIPT_REGEX, '').replace(STYLE_REGEX, '');
13
+ };
14
+ exports.removeScriptsOrStyles = removeScriptsOrStyles;
@@ -0,0 +1,22 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.stripHtml = void 0;
4
+ const TAG_REGEX = /<\/?([A-Za-z][A-Za-z0-9-]*)\b[^>]*>/g;
5
+ /**
6
+ * Removes HTML tags, optionally preserving a whitelist of tag names (attributes are always stripped).
7
+ */
8
+ const stripHtml = (text, allowedTags = []) => {
9
+ if (!text)
10
+ return '';
11
+ const allowed = new Set(allowedTags.map((tag) => tag.toLowerCase()));
12
+ const withoutDisallowed = text.replace(TAG_REGEX, (match, tag) => {
13
+ return allowed.has(String(tag).toLowerCase()) ? match : '';
14
+ });
15
+ return withoutDisallowed.replace(TAG_REGEX, (match, tag) => {
16
+ const name = String(tag).toLowerCase();
17
+ if (!allowed.has(name))
18
+ return '';
19
+ return match.startsWith('</') ? `</${name}>` : `<${name}>`;
20
+ });
21
+ };
22
+ exports.stripHtml = stripHtml;
@@ -0,0 +1,13 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.stripInvisibleChars = void 0;
4
+ const INVISIBLE_REGEX = /[\u200B-\u200D\u2060\uFEFF\u180E]/g;
5
+ /**
6
+ * Removes zero-width and invisible characters.
7
+ */
8
+ const stripInvisibleChars = (text) => {
9
+ if (!text)
10
+ return '';
11
+ return text.replace(INVISIBLE_REGEX, '');
12
+ };
13
+ exports.stripInvisibleChars = stripInvisibleChars;