echokyt 0.0.3 β 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +51 -5
- package/dist/analyze/index.d.ts +1 -0
- package/dist/analyze/index.js +1 -0
- package/dist/analyze/readingEase.d.ts +4 -0
- package/dist/analyze/readingEase.js +31 -0
- package/dist/cjs/analyze/index.cjs +1 -0
- package/dist/cjs/analyze/readingEase.cjs +35 -0
- package/dist/cjs/echokyt.cjs +66 -0
- package/dist/cjs/index.cjs +2 -0
- package/dist/cjs/parse/hashtags.cjs +1 -1
- package/dist/cjs/parse/index.cjs +5 -1
- package/dist/cjs/parse/keywordDensity.cjs +35 -0
- package/dist/cjs/parse/mentions.cjs +1 -1
- package/dist/cjs/parse/stopWords.cjs +129 -0
- package/dist/cjs/sanitize/cleanWhitespace.cjs +12 -0
- package/dist/cjs/sanitize/index.cjs +16 -0
- package/dist/cjs/sanitize/normalizeQuotes.cjs +12 -0
- package/dist/cjs/sanitize/removeScriptsOrStyles.cjs +14 -0
- package/dist/cjs/sanitize/stripHtml.cjs +22 -0
- package/dist/cjs/sanitize/stripInvisibleChars.cjs +13 -0
- package/dist/cjs/sanitize/stripPromptPatterns.cjs +21 -0
- package/dist/echokyt.d.ts +22 -0
- package/dist/echokyt.js +62 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +2 -0
- package/dist/parse/hashtags.js +1 -1
- package/dist/parse/index.d.ts +9 -3
- package/dist/parse/index.js +2 -0
- package/dist/parse/keywordDensity.d.ts +9 -0
- package/dist/parse/keywordDensity.js +31 -0
- package/dist/parse/mentions.js +1 -1
- package/dist/parse/stopWords.d.ts +1 -0
- package/dist/parse/stopWords.js +126 -0
- package/dist/sanitize/cleanWhitespace.d.ts +4 -0
- package/dist/sanitize/cleanWhitespace.js +8 -0
- package/dist/sanitize/index.d.ts +6 -0
- package/dist/sanitize/index.js +6 -0
- package/dist/sanitize/normalizeQuotes.d.ts +4 -0
- package/dist/sanitize/normalizeQuotes.js +8 -0
- package/dist/sanitize/removeScriptsOrStyles.d.ts +4 -0
- package/dist/sanitize/removeScriptsOrStyles.js +10 -0
- package/dist/sanitize/stripHtml.d.ts +4 -0
- package/dist/sanitize/stripHtml.js +18 -0
- package/dist/sanitize/stripInvisibleChars.d.ts +4 -0
- package/dist/sanitize/stripInvisibleChars.js +9 -0
- package/dist/sanitize/stripPromptPatterns.d.ts +5 -0
- package/dist/sanitize/stripPromptPatterns.js +17 -0
- package/package.json +38 -4
package/README.md
CHANGED
|
@@ -1,15 +1,18 @@
|
|
|
1
|
-
# π¦
|
|
1
|
+
# π¦ EchoKyt
|
|
2
2
|
|
|
3
|
-
A lightweight, zero-dependency utility library for smart text manipulation in TypeScript.
|
|
3
|
+
A lightweight, zero-runtime-dependency utility library for smart text manipulation in TypeScript.
|
|
4
4
|
|
|
5
|
-
[](https://github.com/nyigoro/EchoKit/actions)
|
|
6
6
|
[](https://opensource.org/licenses/MIT)
|
|
7
7
|
|
|
8
8
|
## β¨ Features
|
|
9
9
|
|
|
10
|
+
- **Fluent API**: Chainable, readable text pipelines.
|
|
11
|
+
- **Parser Utilities**: Mentions, hashtags, keyword density.
|
|
12
|
+
- **Sanitization & Safety**: Strip HTML, scripts/styles, normalize quotes.
|
|
13
|
+
- **Prompt Hygiene**: Clean whitespace, invisible chars, and common markers.
|
|
10
14
|
- **Smart Truncate**: Cuts text at word boundaries, not mid-word.
|
|
11
|
-
- **Reading
|
|
12
|
-
- **Zero Dependencies**: Keep your bundle size tiny.
|
|
15
|
+
- **Reading Metrics**: Reading time + Flesch Reading Ease score.
|
|
13
16
|
- **TypeScript Ready**: Full type definitions included.
|
|
14
17
|
|
|
15
18
|
## π Installation
|
|
@@ -17,3 +20,46 @@ A lightweight, zero-dependency utility library for smart text manipulation in Ty
|
|
|
17
20
|
```bash
|
|
18
21
|
npm install echokyt
|
|
19
22
|
```
|
|
23
|
+
|
|
24
|
+
## β
Quick Example (Fluent)
|
|
25
|
+
|
|
26
|
+
```ts
|
|
27
|
+
import { EchoKyt } from 'echokyt';
|
|
28
|
+
|
|
29
|
+
const score = EchoKyt.from('<p>βHelloβ <strong>world</strong><script>alert(1)</script></p>')
|
|
30
|
+
.removeScriptsOrStyles()
|
|
31
|
+
.stripHtml(['strong', 'em'])
|
|
32
|
+
.normalizeQuotes()
|
|
33
|
+
.getReadingEase();
|
|
34
|
+
|
|
35
|
+
// Flesch Reading Ease score (approx.)
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## β
Quick Example (Functional)
|
|
39
|
+
|
|
40
|
+
```ts
|
|
41
|
+
import {
|
|
42
|
+
extractMentions,
|
|
43
|
+
extractHashtags,
|
|
44
|
+
getKeywordDensity,
|
|
45
|
+
stripHtml,
|
|
46
|
+
normalizeQuotes,
|
|
47
|
+
cleanWhitespace,
|
|
48
|
+
} from 'echokyt';
|
|
49
|
+
|
|
50
|
+
const text = 'Hello @alice, welcome to #EchoKyt!';
|
|
51
|
+
|
|
52
|
+
extractMentions(text); // [{ value: 'alice', index: 6, raw: '@alice' }]
|
|
53
|
+
extractHashtags(text); // [{ value: 'EchoKyt', index: 29, raw: '#EchoKyt' }]
|
|
54
|
+
getKeywordDensity(text); // { hello: 0.25, alice: 0.25, welcome: 0.25, echokyt: 0.25 }
|
|
55
|
+
stripHtml('<p>Hello <strong>World</strong></p>'); // "Hello World"
|
|
56
|
+
normalizeQuotes('βHelloβ'); // "\"Hello\""
|
|
57
|
+
cleanWhitespace(' hi there '); // "hi there"
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## π Reading Ease Guide (Flesch)
|
|
61
|
+
|
|
62
|
+
- 90β100: Very Easy
|
|
63
|
+
- 60β70: Standard / Plain English
|
|
64
|
+
- 30β50: Academic
|
|
65
|
+
- 0β30: Very Difficult
|
package/dist/analyze/index.d.ts
CHANGED
package/dist/analyze/index.js
CHANGED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
const WORD_REGEX = /[A-Za-z]+(?:'[A-Za-z]+)*/g;
|
|
2
|
+
const SENTENCE_REGEX = /[.!?]+/g;
|
|
3
|
+
const countSyllables = (raw) => {
|
|
4
|
+
const word = raw.toLowerCase().replace(/[^a-z]/g, '');
|
|
5
|
+
if (!word)
|
|
6
|
+
return 0;
|
|
7
|
+
if (word.length <= 3)
|
|
8
|
+
return 1;
|
|
9
|
+
const groups = word.match(/[aeiouy]+/g);
|
|
10
|
+
let syllables = groups ? groups.length : 0;
|
|
11
|
+
if (word.endsWith('e') && !word.endsWith('le') && !word.endsWith('ye')) {
|
|
12
|
+
syllables -= 1;
|
|
13
|
+
}
|
|
14
|
+
return syllables > 0 ? syllables : 1;
|
|
15
|
+
};
|
|
16
|
+
/**
|
|
17
|
+
* Computes the Flesch Reading Ease score (approximate, English heuristic).
|
|
18
|
+
*/
|
|
19
|
+
export const getReadingEase = (text) => {
|
|
20
|
+
if (!text || !text.trim())
|
|
21
|
+
return 0;
|
|
22
|
+
const sanitized = text.replace(/<[^>]*>/g, ' ');
|
|
23
|
+
const words = sanitized.match(WORD_REGEX) ?? [];
|
|
24
|
+
if (words.length === 0)
|
|
25
|
+
return 0;
|
|
26
|
+
const sentences = (sanitized.match(SENTENCE_REGEX) ?? []).length || 1;
|
|
27
|
+
const syllables = words.reduce((sum, word) => sum + countSyllables(word), 0);
|
|
28
|
+
const wordsPerSentence = words.length / sentences;
|
|
29
|
+
const syllablesPerWord = syllables / words.length;
|
|
30
|
+
return 206.835 - 1.015 * wordsPerSentence - 84.6 * syllablesPerWord;
|
|
31
|
+
};
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.getReadingEase = void 0;
|
|
4
|
+
const WORD_REGEX = /[A-Za-z]+(?:'[A-Za-z]+)*/g;
|
|
5
|
+
const SENTENCE_REGEX = /[.!?]+/g;
|
|
6
|
+
const countSyllables = (raw) => {
|
|
7
|
+
const word = raw.toLowerCase().replace(/[^a-z]/g, '');
|
|
8
|
+
if (!word)
|
|
9
|
+
return 0;
|
|
10
|
+
if (word.length <= 3)
|
|
11
|
+
return 1;
|
|
12
|
+
const groups = word.match(/[aeiouy]+/g);
|
|
13
|
+
let syllables = groups ? groups.length : 0;
|
|
14
|
+
if (word.endsWith('e') && !word.endsWith('le') && !word.endsWith('ye')) {
|
|
15
|
+
syllables -= 1;
|
|
16
|
+
}
|
|
17
|
+
return syllables > 0 ? syllables : 1;
|
|
18
|
+
};
|
|
19
|
+
/**
|
|
20
|
+
* Computes the Flesch Reading Ease score (approximate, English heuristic).
|
|
21
|
+
*/
|
|
22
|
+
const getReadingEase = (text) => {
|
|
23
|
+
if (!text || !text.trim())
|
|
24
|
+
return 0;
|
|
25
|
+
const sanitized = text.replace(/<[^>]*>/g, ' ');
|
|
26
|
+
const words = sanitized.match(WORD_REGEX) ?? [];
|
|
27
|
+
if (words.length === 0)
|
|
28
|
+
return 0;
|
|
29
|
+
const sentences = (sanitized.match(SENTENCE_REGEX) ?? []).length || 1;
|
|
30
|
+
const syllables = words.reduce((sum, word) => sum + countSyllables(word), 0);
|
|
31
|
+
const wordsPerSentence = words.length / sentences;
|
|
32
|
+
const syllablesPerWord = syllables / words.length;
|
|
33
|
+
return 206.835 - 1.015 * wordsPerSentence - 84.6 * syllablesPerWord;
|
|
34
|
+
};
|
|
35
|
+
exports.getReadingEase = getReadingEase;
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.EchoKyt = void 0;
|
|
4
|
+
const readingTime_js_1 = require("./analyze/readingTime.js");
|
|
5
|
+
const readingEase_js_1 = require("./analyze/readingEase.js");
|
|
6
|
+
const slugify_js_1 = require("./format/slugify.js");
|
|
7
|
+
const truncate_js_1 = require("./format/truncate.js");
|
|
8
|
+
const index_js_1 = require("./parse/index.js");
|
|
9
|
+
const index_js_2 = require("./sanitize/index.js");
|
|
10
|
+
class EchoKyt {
|
|
11
|
+
constructor(text) {
|
|
12
|
+
this.text = text;
|
|
13
|
+
}
|
|
14
|
+
static from(text) {
|
|
15
|
+
return new EchoKyt(text ?? '');
|
|
16
|
+
}
|
|
17
|
+
value() {
|
|
18
|
+
return this.text;
|
|
19
|
+
}
|
|
20
|
+
toString() {
|
|
21
|
+
return this.text;
|
|
22
|
+
}
|
|
23
|
+
cleanWhitespace() {
|
|
24
|
+
return new EchoKyt((0, index_js_2.cleanWhitespace)(this.text));
|
|
25
|
+
}
|
|
26
|
+
stripInvisible() {
|
|
27
|
+
return new EchoKyt((0, index_js_2.stripInvisibleChars)(this.text));
|
|
28
|
+
}
|
|
29
|
+
removeScriptsOrStyles() {
|
|
30
|
+
return new EchoKyt((0, index_js_2.removeScriptsOrStyles)(this.text));
|
|
31
|
+
}
|
|
32
|
+
stripHtml(allowedTags) {
|
|
33
|
+
return new EchoKyt((0, index_js_2.stripHtml)(this.text, allowedTags));
|
|
34
|
+
}
|
|
35
|
+
normalizeQuotes() {
|
|
36
|
+
return new EchoKyt((0, index_js_2.normalizeQuotes)(this.text));
|
|
37
|
+
}
|
|
38
|
+
stripPromptPatterns(patterns) {
|
|
39
|
+
return new EchoKyt((0, index_js_2.stripPromptPatterns)(this.text, patterns));
|
|
40
|
+
}
|
|
41
|
+
truncate(limit) {
|
|
42
|
+
return new EchoKyt((0, truncate_js_1.smartTruncate)(this.text, limit));
|
|
43
|
+
}
|
|
44
|
+
slug() {
|
|
45
|
+
return new EchoKyt((0, slugify_js_1.slugify)(this.text));
|
|
46
|
+
}
|
|
47
|
+
mentions() {
|
|
48
|
+
return (0, index_js_1.extractMentions)(this.text);
|
|
49
|
+
}
|
|
50
|
+
hashtags() {
|
|
51
|
+
return (0, index_js_1.extractHashtags)(this.text);
|
|
52
|
+
}
|
|
53
|
+
parse() {
|
|
54
|
+
return (0, index_js_1.parseMentionsAndHashtags)(this.text);
|
|
55
|
+
}
|
|
56
|
+
keywordDensity(options) {
|
|
57
|
+
return (0, index_js_1.getKeywordDensity)(this.text, options);
|
|
58
|
+
}
|
|
59
|
+
readingTime(wordsPerMinute) {
|
|
60
|
+
return (0, readingTime_js_1.readingTime)(this.text, wordsPerMinute);
|
|
61
|
+
}
|
|
62
|
+
getReadingEase() {
|
|
63
|
+
return (0, readingEase_js_1.getReadingEase)(this.text);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
exports.EchoKyt = EchoKyt;
|
package/dist/cjs/index.cjs
CHANGED
|
@@ -17,3 +17,5 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
17
17
|
__exportStar(require("./format/index.js"), exports);
|
|
18
18
|
__exportStar(require("./analyze/index.js"), exports);
|
|
19
19
|
__exportStar(require("./parse/index.js"), exports);
|
|
20
|
+
__exportStar(require("./sanitize/index.js"), exports);
|
|
21
|
+
__exportStar(require("./echokyt.js"), exports);
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.extractHashtags = void 0;
|
|
4
|
-
const HASHTAG_REGEX = /(^|[^\
|
|
4
|
+
const HASHTAG_REGEX = /(^|[^\p{L}\p{M}0-9_])#([\p{L}\p{M}0-9_]{1,64})/gu;
|
|
5
5
|
/**
|
|
6
6
|
* Extracts #hashtags from text.
|
|
7
7
|
*/
|
package/dist/cjs/parse/index.cjs
CHANGED
|
@@ -1,12 +1,16 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.parseMentionsAndHashtags = exports.extractHashtags = exports.extractMentions = void 0;
|
|
3
|
+
exports.parseMentionsAndHashtags = exports.DEFAULT_STOP_WORDS = exports.getKeywordDensity = exports.extractHashtags = exports.extractMentions = void 0;
|
|
4
4
|
const mentions_js_1 = require("./mentions.js");
|
|
5
5
|
const hashtags_js_1 = require("./hashtags.js");
|
|
6
6
|
var mentions_js_2 = require("./mentions.js");
|
|
7
7
|
Object.defineProperty(exports, "extractMentions", { enumerable: true, get: function () { return mentions_js_2.extractMentions; } });
|
|
8
8
|
var hashtags_js_2 = require("./hashtags.js");
|
|
9
9
|
Object.defineProperty(exports, "extractHashtags", { enumerable: true, get: function () { return hashtags_js_2.extractHashtags; } });
|
|
10
|
+
var keywordDensity_js_1 = require("./keywordDensity.js");
|
|
11
|
+
Object.defineProperty(exports, "getKeywordDensity", { enumerable: true, get: function () { return keywordDensity_js_1.getKeywordDensity; } });
|
|
12
|
+
var stopWords_js_1 = require("./stopWords.js");
|
|
13
|
+
Object.defineProperty(exports, "DEFAULT_STOP_WORDS", { enumerable: true, get: function () { return stopWords_js_1.DEFAULT_STOP_WORDS; } });
|
|
10
14
|
const parseMentionsAndHashtags = (text) => ({
|
|
11
15
|
mentions: (0, mentions_js_1.extractMentions)(text),
|
|
12
16
|
hashtags: (0, hashtags_js_1.extractHashtags)(text),
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.getKeywordDensity = void 0;
|
|
4
|
+
const stopWords_js_1 = require("./stopWords.js");
|
|
5
|
+
const WORD_REGEX = /[\p{L}\p{M}0-9_']+/gu;
|
|
6
|
+
/**
|
|
7
|
+
* Returns keyword frequency (0-1) for filtered tokens in the text.
|
|
8
|
+
*/
|
|
9
|
+
const getKeywordDensity = (text, options = {}) => {
|
|
10
|
+
if (!text || !text.trim())
|
|
11
|
+
return {};
|
|
12
|
+
const minLength = options.minLength ?? 3;
|
|
13
|
+
const normalize = options.normalize ?? true;
|
|
14
|
+
const stopWords = options.stopWords ?? [...stopWords_js_1.DEFAULT_STOP_WORDS];
|
|
15
|
+
const stopSet = new Set(normalize ? stopWords.map((word) => word.toLowerCase()) : stopWords);
|
|
16
|
+
const tokens = text.match(WORD_REGEX) ?? [];
|
|
17
|
+
const counts = {};
|
|
18
|
+
let total = 0;
|
|
19
|
+
for (const raw of tokens) {
|
|
20
|
+
const token = normalize ? raw.toLowerCase() : raw;
|
|
21
|
+
if (token.length < minLength)
|
|
22
|
+
continue;
|
|
23
|
+
if (stopSet.has(token))
|
|
24
|
+
continue;
|
|
25
|
+
counts[token] = (counts[token] ?? 0) + 1;
|
|
26
|
+
total += 1;
|
|
27
|
+
}
|
|
28
|
+
if (total === 0)
|
|
29
|
+
return {};
|
|
30
|
+
for (const key of Object.keys(counts)) {
|
|
31
|
+
counts[key] = counts[key] / total;
|
|
32
|
+
}
|
|
33
|
+
return counts;
|
|
34
|
+
};
|
|
35
|
+
exports.getKeywordDensity = getKeywordDensity;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.extractMentions = void 0;
|
|
4
|
-
const MENTION_REGEX = /(^|[^\
|
|
4
|
+
const MENTION_REGEX = /(^|[^\p{L}\p{M}0-9_])@([\p{L}\p{M}0-9_]{1,32})/gu;
|
|
5
5
|
/**
|
|
6
6
|
* Extracts @mentions from text.
|
|
7
7
|
*/
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.DEFAULT_STOP_WORDS = void 0;
|
|
4
|
+
exports.DEFAULT_STOP_WORDS = [
|
|
5
|
+
'a',
|
|
6
|
+
'about',
|
|
7
|
+
'above',
|
|
8
|
+
'after',
|
|
9
|
+
'again',
|
|
10
|
+
'against',
|
|
11
|
+
'all',
|
|
12
|
+
'am',
|
|
13
|
+
'an',
|
|
14
|
+
'and',
|
|
15
|
+
'any',
|
|
16
|
+
'are',
|
|
17
|
+
'as',
|
|
18
|
+
'at',
|
|
19
|
+
'be',
|
|
20
|
+
'because',
|
|
21
|
+
'been',
|
|
22
|
+
'before',
|
|
23
|
+
'being',
|
|
24
|
+
'below',
|
|
25
|
+
'between',
|
|
26
|
+
'both',
|
|
27
|
+
'but',
|
|
28
|
+
'by',
|
|
29
|
+
'can',
|
|
30
|
+
'did',
|
|
31
|
+
'do',
|
|
32
|
+
'does',
|
|
33
|
+
'doing',
|
|
34
|
+
'down',
|
|
35
|
+
'during',
|
|
36
|
+
'each',
|
|
37
|
+
'few',
|
|
38
|
+
'for',
|
|
39
|
+
'from',
|
|
40
|
+
'further',
|
|
41
|
+
'had',
|
|
42
|
+
'has',
|
|
43
|
+
'have',
|
|
44
|
+
'having',
|
|
45
|
+
'he',
|
|
46
|
+
'her',
|
|
47
|
+
'here',
|
|
48
|
+
'hers',
|
|
49
|
+
'herself',
|
|
50
|
+
'him',
|
|
51
|
+
'himself',
|
|
52
|
+
'his',
|
|
53
|
+
'how',
|
|
54
|
+
'i',
|
|
55
|
+
'if',
|
|
56
|
+
'in',
|
|
57
|
+
'into',
|
|
58
|
+
'is',
|
|
59
|
+
'it',
|
|
60
|
+
'its',
|
|
61
|
+
'itself',
|
|
62
|
+
'just',
|
|
63
|
+
'me',
|
|
64
|
+
'more',
|
|
65
|
+
'most',
|
|
66
|
+
'my',
|
|
67
|
+
'myself',
|
|
68
|
+
'no',
|
|
69
|
+
'nor',
|
|
70
|
+
'not',
|
|
71
|
+
'now',
|
|
72
|
+
'of',
|
|
73
|
+
'off',
|
|
74
|
+
'on',
|
|
75
|
+
'once',
|
|
76
|
+
'only',
|
|
77
|
+
'or',
|
|
78
|
+
'other',
|
|
79
|
+
'our',
|
|
80
|
+
'ours',
|
|
81
|
+
'ourselves',
|
|
82
|
+
'out',
|
|
83
|
+
'over',
|
|
84
|
+
'own',
|
|
85
|
+
'same',
|
|
86
|
+
'she',
|
|
87
|
+
'should',
|
|
88
|
+
'so',
|
|
89
|
+
'some',
|
|
90
|
+
'such',
|
|
91
|
+
'than',
|
|
92
|
+
'that',
|
|
93
|
+
'the',
|
|
94
|
+
'their',
|
|
95
|
+
'theirs',
|
|
96
|
+
'them',
|
|
97
|
+
'themselves',
|
|
98
|
+
'then',
|
|
99
|
+
'there',
|
|
100
|
+
'these',
|
|
101
|
+
'they',
|
|
102
|
+
'this',
|
|
103
|
+
'those',
|
|
104
|
+
'through',
|
|
105
|
+
'to',
|
|
106
|
+
'too',
|
|
107
|
+
'under',
|
|
108
|
+
'until',
|
|
109
|
+
'up',
|
|
110
|
+
'very',
|
|
111
|
+
'was',
|
|
112
|
+
'we',
|
|
113
|
+
'were',
|
|
114
|
+
'what',
|
|
115
|
+
'when',
|
|
116
|
+
'where',
|
|
117
|
+
'which',
|
|
118
|
+
'while',
|
|
119
|
+
'who',
|
|
120
|
+
'whom',
|
|
121
|
+
'why',
|
|
122
|
+
'with',
|
|
123
|
+
'would',
|
|
124
|
+
'you',
|
|
125
|
+
'your',
|
|
126
|
+
'yours',
|
|
127
|
+
'yourself',
|
|
128
|
+
'yourselves',
|
|
129
|
+
];
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.cleanWhitespace = void 0;
|
|
4
|
+
/**
|
|
5
|
+
* Collapses whitespace and trims the ends.
|
|
6
|
+
*/
|
|
7
|
+
const cleanWhitespace = (text) => {
|
|
8
|
+
if (!text)
|
|
9
|
+
return '';
|
|
10
|
+
return text.replace(/\s+/g, ' ').trim();
|
|
11
|
+
};
|
|
12
|
+
exports.cleanWhitespace = cleanWhitespace;
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.normalizeQuotes = exports.removeScriptsOrStyles = exports.stripHtml = exports.DEFAULT_PROMPT_PATTERNS = exports.stripPromptPatterns = exports.stripInvisibleChars = exports.cleanWhitespace = void 0;
|
|
4
|
+
var cleanWhitespace_js_1 = require("./cleanWhitespace.js");
|
|
5
|
+
Object.defineProperty(exports, "cleanWhitespace", { enumerable: true, get: function () { return cleanWhitespace_js_1.cleanWhitespace; } });
|
|
6
|
+
var stripInvisibleChars_js_1 = require("./stripInvisibleChars.js");
|
|
7
|
+
Object.defineProperty(exports, "stripInvisibleChars", { enumerable: true, get: function () { return stripInvisibleChars_js_1.stripInvisibleChars; } });
|
|
8
|
+
var stripPromptPatterns_js_1 = require("./stripPromptPatterns.js");
|
|
9
|
+
Object.defineProperty(exports, "stripPromptPatterns", { enumerable: true, get: function () { return stripPromptPatterns_js_1.stripPromptPatterns; } });
|
|
10
|
+
Object.defineProperty(exports, "DEFAULT_PROMPT_PATTERNS", { enumerable: true, get: function () { return stripPromptPatterns_js_1.DEFAULT_PROMPT_PATTERNS; } });
|
|
11
|
+
var stripHtml_js_1 = require("./stripHtml.js");
|
|
12
|
+
Object.defineProperty(exports, "stripHtml", { enumerable: true, get: function () { return stripHtml_js_1.stripHtml; } });
|
|
13
|
+
var removeScriptsOrStyles_js_1 = require("./removeScriptsOrStyles.js");
|
|
14
|
+
Object.defineProperty(exports, "removeScriptsOrStyles", { enumerable: true, get: function () { return removeScriptsOrStyles_js_1.removeScriptsOrStyles; } });
|
|
15
|
+
var normalizeQuotes_js_1 = require("./normalizeQuotes.js");
|
|
16
|
+
Object.defineProperty(exports, "normalizeQuotes", { enumerable: true, get: function () { return normalizeQuotes_js_1.normalizeQuotes; } });
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.normalizeQuotes = void 0;
|
|
4
|
+
/**
|
|
5
|
+
* Normalizes smart quotes to straight quotes.
|
|
6
|
+
*/
|
|
7
|
+
const normalizeQuotes = (text) => {
|
|
8
|
+
if (!text)
|
|
9
|
+
return '';
|
|
10
|
+
return text.replace(/[ββ]/g, '"').replace(/[ββ]/g, "'");
|
|
11
|
+
};
|
|
12
|
+
exports.normalizeQuotes = normalizeQuotes;
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.removeScriptsOrStyles = void 0;
|
|
4
|
+
const SCRIPT_REGEX = /<script\b[^>]*>[\s\S]*?<\/script>/gi;
|
|
5
|
+
const STYLE_REGEX = /<style\b[^>]*>[\s\S]*?<\/style>/gi;
|
|
6
|
+
/**
|
|
7
|
+
* Removes <script> and <style> blocks entirely. Preserves <noscript>.
|
|
8
|
+
*/
|
|
9
|
+
const removeScriptsOrStyles = (text) => {
|
|
10
|
+
if (!text)
|
|
11
|
+
return '';
|
|
12
|
+
return text.replace(SCRIPT_REGEX, '').replace(STYLE_REGEX, '');
|
|
13
|
+
};
|
|
14
|
+
exports.removeScriptsOrStyles = removeScriptsOrStyles;
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.stripHtml = void 0;
|
|
4
|
+
const TAG_REGEX = /<\/?([A-Za-z][A-Za-z0-9-]*)\b[^>]*>/g;
|
|
5
|
+
/**
|
|
6
|
+
* Removes HTML tags, optionally preserving a whitelist of tag names (attributes are always stripped).
|
|
7
|
+
*/
|
|
8
|
+
const stripHtml = (text, allowedTags = []) => {
|
|
9
|
+
if (!text)
|
|
10
|
+
return '';
|
|
11
|
+
const allowed = new Set(allowedTags.map((tag) => tag.toLowerCase()));
|
|
12
|
+
const withoutDisallowed = text.replace(TAG_REGEX, (match, tag) => {
|
|
13
|
+
return allowed.has(String(tag).toLowerCase()) ? match : '';
|
|
14
|
+
});
|
|
15
|
+
return withoutDisallowed.replace(TAG_REGEX, (match, tag) => {
|
|
16
|
+
const name = String(tag).toLowerCase();
|
|
17
|
+
if (!allowed.has(name))
|
|
18
|
+
return '';
|
|
19
|
+
return match.startsWith('</') ? `</${name}>` : `<${name}>`;
|
|
20
|
+
});
|
|
21
|
+
};
|
|
22
|
+
exports.stripHtml = stripHtml;
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.stripInvisibleChars = void 0;
|
|
4
|
+
const INVISIBLE_REGEX = /[\u200B-\u200D\u2060\uFEFF\u180E]/g;
|
|
5
|
+
/**
|
|
6
|
+
* Removes zero-width and invisible characters.
|
|
7
|
+
*/
|
|
8
|
+
const stripInvisibleChars = (text) => {
|
|
9
|
+
if (!text)
|
|
10
|
+
return '';
|
|
11
|
+
return text.replace(INVISIBLE_REGEX, '');
|
|
12
|
+
};
|
|
13
|
+
exports.stripInvisibleChars = stripInvisibleChars;
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.stripPromptPatterns = exports.DEFAULT_PROMPT_PATTERNS = void 0;
|
|
4
|
+
exports.DEFAULT_PROMPT_PATTERNS = [
|
|
5
|
+
/<<\s*SYS\s*>>/gi,
|
|
6
|
+
/<<\s*\/\s*SYS\s*>>/gi,
|
|
7
|
+
/<<\s*USER\s*>>/gi,
|
|
8
|
+
/<<\s*\/\s*USER\s*>>/gi,
|
|
9
|
+
/<<\s*ASSISTANT\s*>>/gi,
|
|
10
|
+
/<<\s*\/\s*ASSISTANT\s*>>/gi,
|
|
11
|
+
/#{3,}/g,
|
|
12
|
+
];
|
|
13
|
+
/**
|
|
14
|
+
* Removes common prompt marker patterns.
|
|
15
|
+
*/
|
|
16
|
+
const stripPromptPatterns = (text, patterns = exports.DEFAULT_PROMPT_PATTERNS) => {
|
|
17
|
+
if (!text)
|
|
18
|
+
return '';
|
|
19
|
+
return patterns.reduce((acc, pattern) => acc.replace(pattern, ''), text);
|
|
20
|
+
};
|
|
21
|
+
exports.stripPromptPatterns = stripPromptPatterns;
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import type { KeywordDensityOptions, ParseResult } from './parse/index.js';
|
|
2
|
+
export declare class EchoKyt {
|
|
3
|
+
private readonly text;
|
|
4
|
+
private constructor();
|
|
5
|
+
static from(text: string): EchoKyt;
|
|
6
|
+
value(): string;
|
|
7
|
+
toString(): string;
|
|
8
|
+
cleanWhitespace(): EchoKyt;
|
|
9
|
+
stripInvisible(): EchoKyt;
|
|
10
|
+
removeScriptsOrStyles(): EchoKyt;
|
|
11
|
+
stripHtml(allowedTags?: string[]): EchoKyt;
|
|
12
|
+
normalizeQuotes(): EchoKyt;
|
|
13
|
+
stripPromptPatterns(patterns?: RegExp[]): EchoKyt;
|
|
14
|
+
truncate(limit: number): EchoKyt;
|
|
15
|
+
slug(): EchoKyt;
|
|
16
|
+
mentions(): import("./parse/mentions.js").MentionToken[];
|
|
17
|
+
hashtags(): import("./parse/hashtags.js").HashtagToken[];
|
|
18
|
+
parse(): ParseResult;
|
|
19
|
+
keywordDensity(options?: KeywordDensityOptions): Record<string, number>;
|
|
20
|
+
readingTime(wordsPerMinute?: number): number;
|
|
21
|
+
getReadingEase(): number;
|
|
22
|
+
}
|
package/dist/echokyt.js
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import { readingTime } from './analyze/readingTime.js';
|
|
2
|
+
import { getReadingEase } from './analyze/readingEase.js';
|
|
3
|
+
import { slugify } from './format/slugify.js';
|
|
4
|
+
import { smartTruncate } from './format/truncate.js';
|
|
5
|
+
import { extractHashtags, extractMentions, getKeywordDensity, parseMentionsAndHashtags, } from './parse/index.js';
|
|
6
|
+
import { cleanWhitespace, normalizeQuotes, removeScriptsOrStyles, stripInvisibleChars, stripHtml, stripPromptPatterns, } from './sanitize/index.js';
|
|
7
|
+
export class EchoKyt {
|
|
8
|
+
constructor(text) {
|
|
9
|
+
this.text = text;
|
|
10
|
+
}
|
|
11
|
+
static from(text) {
|
|
12
|
+
return new EchoKyt(text ?? '');
|
|
13
|
+
}
|
|
14
|
+
value() {
|
|
15
|
+
return this.text;
|
|
16
|
+
}
|
|
17
|
+
toString() {
|
|
18
|
+
return this.text;
|
|
19
|
+
}
|
|
20
|
+
cleanWhitespace() {
|
|
21
|
+
return new EchoKyt(cleanWhitespace(this.text));
|
|
22
|
+
}
|
|
23
|
+
stripInvisible() {
|
|
24
|
+
return new EchoKyt(stripInvisibleChars(this.text));
|
|
25
|
+
}
|
|
26
|
+
removeScriptsOrStyles() {
|
|
27
|
+
return new EchoKyt(removeScriptsOrStyles(this.text));
|
|
28
|
+
}
|
|
29
|
+
stripHtml(allowedTags) {
|
|
30
|
+
return new EchoKyt(stripHtml(this.text, allowedTags));
|
|
31
|
+
}
|
|
32
|
+
normalizeQuotes() {
|
|
33
|
+
return new EchoKyt(normalizeQuotes(this.text));
|
|
34
|
+
}
|
|
35
|
+
stripPromptPatterns(patterns) {
|
|
36
|
+
return new EchoKyt(stripPromptPatterns(this.text, patterns));
|
|
37
|
+
}
|
|
38
|
+
truncate(limit) {
|
|
39
|
+
return new EchoKyt(smartTruncate(this.text, limit));
|
|
40
|
+
}
|
|
41
|
+
slug() {
|
|
42
|
+
return new EchoKyt(slugify(this.text));
|
|
43
|
+
}
|
|
44
|
+
mentions() {
|
|
45
|
+
return extractMentions(this.text);
|
|
46
|
+
}
|
|
47
|
+
hashtags() {
|
|
48
|
+
return extractHashtags(this.text);
|
|
49
|
+
}
|
|
50
|
+
parse() {
|
|
51
|
+
return parseMentionsAndHashtags(this.text);
|
|
52
|
+
}
|
|
53
|
+
keywordDensity(options) {
|
|
54
|
+
return getKeywordDensity(this.text, options);
|
|
55
|
+
}
|
|
56
|
+
readingTime(wordsPerMinute) {
|
|
57
|
+
return readingTime(this.text, wordsPerMinute);
|
|
58
|
+
}
|
|
59
|
+
getReadingEase() {
|
|
60
|
+
return getReadingEase(this.text);
|
|
61
|
+
}
|
|
62
|
+
}
|
package/dist/index.d.ts
CHANGED
package/dist/index.js
CHANGED
package/dist/parse/hashtags.js
CHANGED
package/dist/parse/index.d.ts
CHANGED
|
@@ -1,8 +1,14 @@
|
|
|
1
|
+
import { extractMentions } from './mentions.js';
|
|
2
|
+
import { extractHashtags } from './hashtags.js';
|
|
1
3
|
export { extractMentions } from './mentions.js';
|
|
2
4
|
export type { MentionToken } from './mentions.js';
|
|
3
5
|
export { extractHashtags } from './hashtags.js';
|
|
4
6
|
export type { HashtagToken } from './hashtags.js';
|
|
5
|
-
export
|
|
6
|
-
|
|
7
|
-
|
|
7
|
+
export { getKeywordDensity } from './keywordDensity.js';
|
|
8
|
+
export type { KeywordDensityOptions } from './keywordDensity.js';
|
|
9
|
+
export { DEFAULT_STOP_WORDS } from './stopWords.js';
|
|
10
|
+
export type ParseResult = {
|
|
11
|
+
mentions: ReturnType<typeof extractMentions>;
|
|
12
|
+
hashtags: ReturnType<typeof extractHashtags>;
|
|
8
13
|
};
|
|
14
|
+
export declare const parseMentionsAndHashtags: (text: string) => ParseResult;
|
package/dist/parse/index.js
CHANGED
|
@@ -2,6 +2,8 @@ import { extractMentions } from './mentions.js';
|
|
|
2
2
|
import { extractHashtags } from './hashtags.js';
|
|
3
3
|
export { extractMentions } from './mentions.js';
|
|
4
4
|
export { extractHashtags } from './hashtags.js';
|
|
5
|
+
export { getKeywordDensity } from './keywordDensity.js';
|
|
6
|
+
export { DEFAULT_STOP_WORDS } from './stopWords.js';
|
|
5
7
|
export const parseMentionsAndHashtags = (text) => ({
|
|
6
8
|
mentions: extractMentions(text),
|
|
7
9
|
hashtags: extractHashtags(text),
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
export type KeywordDensityOptions = {
|
|
2
|
+
minLength?: number;
|
|
3
|
+
stopWords?: string[];
|
|
4
|
+
normalize?: boolean;
|
|
5
|
+
};
|
|
6
|
+
/**
|
|
7
|
+
* Returns keyword frequency (0-1) for filtered tokens in the text.
|
|
8
|
+
*/
|
|
9
|
+
export declare const getKeywordDensity: (text: string, options?: KeywordDensityOptions) => Record<string, number>;
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { DEFAULT_STOP_WORDS } from './stopWords.js';
|
|
2
|
+
const WORD_REGEX = /[\p{L}\p{M}0-9_']+/gu;
|
|
3
|
+
/**
|
|
4
|
+
* Returns keyword frequency (0-1) for filtered tokens in the text.
|
|
5
|
+
*/
|
|
6
|
+
export const getKeywordDensity = (text, options = {}) => {
|
|
7
|
+
if (!text || !text.trim())
|
|
8
|
+
return {};
|
|
9
|
+
const minLength = options.minLength ?? 3;
|
|
10
|
+
const normalize = options.normalize ?? true;
|
|
11
|
+
const stopWords = options.stopWords ?? [...DEFAULT_STOP_WORDS];
|
|
12
|
+
const stopSet = new Set(normalize ? stopWords.map((word) => word.toLowerCase()) : stopWords);
|
|
13
|
+
const tokens = text.match(WORD_REGEX) ?? [];
|
|
14
|
+
const counts = {};
|
|
15
|
+
let total = 0;
|
|
16
|
+
for (const raw of tokens) {
|
|
17
|
+
const token = normalize ? raw.toLowerCase() : raw;
|
|
18
|
+
if (token.length < minLength)
|
|
19
|
+
continue;
|
|
20
|
+
if (stopSet.has(token))
|
|
21
|
+
continue;
|
|
22
|
+
counts[token] = (counts[token] ?? 0) + 1;
|
|
23
|
+
total += 1;
|
|
24
|
+
}
|
|
25
|
+
if (total === 0)
|
|
26
|
+
return {};
|
|
27
|
+
for (const key of Object.keys(counts)) {
|
|
28
|
+
counts[key] = counts[key] / total;
|
|
29
|
+
}
|
|
30
|
+
return counts;
|
|
31
|
+
};
|
package/dist/parse/mentions.js
CHANGED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const DEFAULT_STOP_WORDS: readonly ["a", "about", "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", "as", "at", "be", "because", "been", "before", "being", "below", "between", "both", "but", "by", "can", "did", "do", "does", "doing", "down", "during", "each", "few", "for", "from", "further", "had", "has", "have", "having", "he", "her", "here", "hers", "herself", "him", "himself", "his", "how", "i", "if", "in", "into", "is", "it", "its", "itself", "just", "me", "more", "most", "my", "myself", "no", "nor", "not", "now", "of", "off", "on", "once", "only", "or", "other", "our", "ours", "ourselves", "out", "over", "own", "same", "she", "should", "so", "some", "such", "than", "that", "the", "their", "theirs", "them", "themselves", "then", "there", "these", "they", "this", "those", "through", "to", "too", "under", "until", "up", "very", "was", "we", "were", "what", "when", "where", "which", "while", "who", "whom", "why", "with", "would", "you", "your", "yours", "yourself", "yourselves"];
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
export const DEFAULT_STOP_WORDS = [
|
|
2
|
+
'a',
|
|
3
|
+
'about',
|
|
4
|
+
'above',
|
|
5
|
+
'after',
|
|
6
|
+
'again',
|
|
7
|
+
'against',
|
|
8
|
+
'all',
|
|
9
|
+
'am',
|
|
10
|
+
'an',
|
|
11
|
+
'and',
|
|
12
|
+
'any',
|
|
13
|
+
'are',
|
|
14
|
+
'as',
|
|
15
|
+
'at',
|
|
16
|
+
'be',
|
|
17
|
+
'because',
|
|
18
|
+
'been',
|
|
19
|
+
'before',
|
|
20
|
+
'being',
|
|
21
|
+
'below',
|
|
22
|
+
'between',
|
|
23
|
+
'both',
|
|
24
|
+
'but',
|
|
25
|
+
'by',
|
|
26
|
+
'can',
|
|
27
|
+
'did',
|
|
28
|
+
'do',
|
|
29
|
+
'does',
|
|
30
|
+
'doing',
|
|
31
|
+
'down',
|
|
32
|
+
'during',
|
|
33
|
+
'each',
|
|
34
|
+
'few',
|
|
35
|
+
'for',
|
|
36
|
+
'from',
|
|
37
|
+
'further',
|
|
38
|
+
'had',
|
|
39
|
+
'has',
|
|
40
|
+
'have',
|
|
41
|
+
'having',
|
|
42
|
+
'he',
|
|
43
|
+
'her',
|
|
44
|
+
'here',
|
|
45
|
+
'hers',
|
|
46
|
+
'herself',
|
|
47
|
+
'him',
|
|
48
|
+
'himself',
|
|
49
|
+
'his',
|
|
50
|
+
'how',
|
|
51
|
+
'i',
|
|
52
|
+
'if',
|
|
53
|
+
'in',
|
|
54
|
+
'into',
|
|
55
|
+
'is',
|
|
56
|
+
'it',
|
|
57
|
+
'its',
|
|
58
|
+
'itself',
|
|
59
|
+
'just',
|
|
60
|
+
'me',
|
|
61
|
+
'more',
|
|
62
|
+
'most',
|
|
63
|
+
'my',
|
|
64
|
+
'myself',
|
|
65
|
+
'no',
|
|
66
|
+
'nor',
|
|
67
|
+
'not',
|
|
68
|
+
'now',
|
|
69
|
+
'of',
|
|
70
|
+
'off',
|
|
71
|
+
'on',
|
|
72
|
+
'once',
|
|
73
|
+
'only',
|
|
74
|
+
'or',
|
|
75
|
+
'other',
|
|
76
|
+
'our',
|
|
77
|
+
'ours',
|
|
78
|
+
'ourselves',
|
|
79
|
+
'out',
|
|
80
|
+
'over',
|
|
81
|
+
'own',
|
|
82
|
+
'same',
|
|
83
|
+
'she',
|
|
84
|
+
'should',
|
|
85
|
+
'so',
|
|
86
|
+
'some',
|
|
87
|
+
'such',
|
|
88
|
+
'than',
|
|
89
|
+
'that',
|
|
90
|
+
'the',
|
|
91
|
+
'their',
|
|
92
|
+
'theirs',
|
|
93
|
+
'them',
|
|
94
|
+
'themselves',
|
|
95
|
+
'then',
|
|
96
|
+
'there',
|
|
97
|
+
'these',
|
|
98
|
+
'they',
|
|
99
|
+
'this',
|
|
100
|
+
'those',
|
|
101
|
+
'through',
|
|
102
|
+
'to',
|
|
103
|
+
'too',
|
|
104
|
+
'under',
|
|
105
|
+
'until',
|
|
106
|
+
'up',
|
|
107
|
+
'very',
|
|
108
|
+
'was',
|
|
109
|
+
'we',
|
|
110
|
+
'were',
|
|
111
|
+
'what',
|
|
112
|
+
'when',
|
|
113
|
+
'where',
|
|
114
|
+
'which',
|
|
115
|
+
'while',
|
|
116
|
+
'who',
|
|
117
|
+
'whom',
|
|
118
|
+
'why',
|
|
119
|
+
'with',
|
|
120
|
+
'would',
|
|
121
|
+
'you',
|
|
122
|
+
'your',
|
|
123
|
+
'yours',
|
|
124
|
+
'yourself',
|
|
125
|
+
'yourselves',
|
|
126
|
+
];
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
export { cleanWhitespace } from './cleanWhitespace.js';
|
|
2
|
+
export { stripInvisibleChars } from './stripInvisibleChars.js';
|
|
3
|
+
export { stripPromptPatterns, DEFAULT_PROMPT_PATTERNS } from './stripPromptPatterns.js';
|
|
4
|
+
export { stripHtml } from './stripHtml.js';
|
|
5
|
+
export { removeScriptsOrStyles } from './removeScriptsOrStyles.js';
|
|
6
|
+
export { normalizeQuotes } from './normalizeQuotes.js';
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
export { cleanWhitespace } from './cleanWhitespace.js';
|
|
2
|
+
export { stripInvisibleChars } from './stripInvisibleChars.js';
|
|
3
|
+
export { stripPromptPatterns, DEFAULT_PROMPT_PATTERNS } from './stripPromptPatterns.js';
|
|
4
|
+
export { stripHtml } from './stripHtml.js';
|
|
5
|
+
export { removeScriptsOrStyles } from './removeScriptsOrStyles.js';
|
|
6
|
+
export { normalizeQuotes } from './normalizeQuotes.js';
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
const SCRIPT_REGEX = /<script\b[^>]*>[\s\S]*?<\/script>/gi;
|
|
2
|
+
const STYLE_REGEX = /<style\b[^>]*>[\s\S]*?<\/style>/gi;
|
|
3
|
+
/**
|
|
4
|
+
* Removes <script> and <style> blocks entirely. Preserves <noscript>.
|
|
5
|
+
*/
|
|
6
|
+
export const removeScriptsOrStyles = (text) => {
|
|
7
|
+
if (!text)
|
|
8
|
+
return '';
|
|
9
|
+
return text.replace(SCRIPT_REGEX, '').replace(STYLE_REGEX, '');
|
|
10
|
+
};
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
const TAG_REGEX = /<\/?([A-Za-z][A-Za-z0-9-]*)\b[^>]*>/g;
|
|
2
|
+
/**
|
|
3
|
+
* Removes HTML tags, optionally preserving a whitelist of tag names (attributes are always stripped).
|
|
4
|
+
*/
|
|
5
|
+
export const stripHtml = (text, allowedTags = []) => {
|
|
6
|
+
if (!text)
|
|
7
|
+
return '';
|
|
8
|
+
const allowed = new Set(allowedTags.map((tag) => tag.toLowerCase()));
|
|
9
|
+
const withoutDisallowed = text.replace(TAG_REGEX, (match, tag) => {
|
|
10
|
+
return allowed.has(String(tag).toLowerCase()) ? match : '';
|
|
11
|
+
});
|
|
12
|
+
return withoutDisallowed.replace(TAG_REGEX, (match, tag) => {
|
|
13
|
+
const name = String(tag).toLowerCase();
|
|
14
|
+
if (!allowed.has(name))
|
|
15
|
+
return '';
|
|
16
|
+
return match.startsWith('</') ? `</${name}>` : `<${name}>`;
|
|
17
|
+
});
|
|
18
|
+
};
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
export const DEFAULT_PROMPT_PATTERNS = [
|
|
2
|
+
/<<\s*SYS\s*>>/gi,
|
|
3
|
+
/<<\s*\/\s*SYS\s*>>/gi,
|
|
4
|
+
/<<\s*USER\s*>>/gi,
|
|
5
|
+
/<<\s*\/\s*USER\s*>>/gi,
|
|
6
|
+
/<<\s*ASSISTANT\s*>>/gi,
|
|
7
|
+
/<<\s*\/\s*ASSISTANT\s*>>/gi,
|
|
8
|
+
/#{3,}/g,
|
|
9
|
+
];
|
|
10
|
+
/**
|
|
11
|
+
* Removes common prompt marker patterns.
|
|
12
|
+
*/
|
|
13
|
+
export const stripPromptPatterns = (text, patterns = DEFAULT_PROMPT_PATTERNS) => {
|
|
14
|
+
if (!text)
|
|
15
|
+
return '';
|
|
16
|
+
return patterns.reduce((acc, pattern) => acc.replace(pattern, ''), text);
|
|
17
|
+
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "echokyt",
|
|
3
|
-
"version": "0.0
|
|
3
|
+
"version": "0.1.0",
|
|
4
4
|
"description": "A lightweight text utility library",
|
|
5
5
|
"main": "./dist/cjs/index.cjs",
|
|
6
6
|
"module": "./dist/index.js",
|
|
@@ -11,6 +11,11 @@
|
|
|
11
11
|
"import": "./dist/index.js",
|
|
12
12
|
"require": "./dist/cjs/index.cjs"
|
|
13
13
|
},
|
|
14
|
+
"./echokyt": {
|
|
15
|
+
"types": "./dist/echokyt.d.ts",
|
|
16
|
+
"import": "./dist/echokyt.js",
|
|
17
|
+
"require": "./dist/cjs/echokyt.cjs"
|
|
18
|
+
},
|
|
14
19
|
"./format": {
|
|
15
20
|
"types": "./dist/format/index.d.ts",
|
|
16
21
|
"import": "./dist/format/index.js",
|
|
@@ -41,10 +46,23 @@
|
|
|
41
46
|
"import": "./dist/parse/*.js",
|
|
42
47
|
"require": "./dist/cjs/parse/*.cjs"
|
|
43
48
|
},
|
|
49
|
+
"./sanitize": {
|
|
50
|
+
"types": "./dist/sanitize/index.d.ts",
|
|
51
|
+
"import": "./dist/sanitize/index.js",
|
|
52
|
+
"require": "./dist/cjs/sanitize/index.cjs"
|
|
53
|
+
},
|
|
54
|
+
"./sanitize/*": {
|
|
55
|
+
"types": "./dist/sanitize/*.d.ts",
|
|
56
|
+
"import": "./dist/sanitize/*.js",
|
|
57
|
+
"require": "./dist/cjs/sanitize/*.cjs"
|
|
58
|
+
},
|
|
44
59
|
"./package.json": "./package.json"
|
|
45
60
|
},
|
|
46
61
|
"typesVersions": {
|
|
47
62
|
"*": {
|
|
63
|
+
"echokyt": [
|
|
64
|
+
"dist/echokyt.d.ts"
|
|
65
|
+
],
|
|
48
66
|
"format": [
|
|
49
67
|
"dist/format/index.d.ts"
|
|
50
68
|
],
|
|
@@ -63,6 +81,12 @@
|
|
|
63
81
|
"parse/*": [
|
|
64
82
|
"dist/parse/*.d.ts"
|
|
65
83
|
],
|
|
84
|
+
"sanitize": [
|
|
85
|
+
"dist/sanitize/index.d.ts"
|
|
86
|
+
],
|
|
87
|
+
"sanitize/*": [
|
|
88
|
+
"dist/sanitize/*.d.ts"
|
|
89
|
+
],
|
|
66
90
|
"*": [
|
|
67
91
|
"dist/*"
|
|
68
92
|
]
|
|
@@ -81,7 +105,9 @@
|
|
|
81
105
|
"bench": "npm run build && node benchmarks/bench.mjs",
|
|
82
106
|
"release": "standard-version",
|
|
83
107
|
"test": "jest",
|
|
84
|
-
"prepublishOnly": "npm run test && npm run build"
|
|
108
|
+
"prepublishOnly": "npm run lint && npm run test && npm run build",
|
|
109
|
+
"preversion": "npm run lint && npm run test",
|
|
110
|
+
"postversion": "git push && git push --tags"
|
|
85
111
|
},
|
|
86
112
|
"repository": {
|
|
87
113
|
"type": "git",
|
|
@@ -91,6 +117,14 @@
|
|
|
91
117
|
"text",
|
|
92
118
|
"string",
|
|
93
119
|
"utilities",
|
|
120
|
+
"text-manipulation",
|
|
121
|
+
"sanitization",
|
|
122
|
+
"parsing",
|
|
123
|
+
"mentions",
|
|
124
|
+
"hashtags",
|
|
125
|
+
"readability",
|
|
126
|
+
"llm",
|
|
127
|
+
"prompt-hygiene",
|
|
94
128
|
"slugify",
|
|
95
129
|
"truncate",
|
|
96
130
|
"reading-time",
|
|
@@ -112,8 +146,8 @@
|
|
|
112
146
|
"homepage": "https://github.com/nyigoro/EchoKit#readme",
|
|
113
147
|
"devDependencies": {
|
|
114
148
|
"@types/jest": "^29.0.0",
|
|
115
|
-
"@typescript-eslint/eslint-plugin": "^
|
|
116
|
-
"@typescript-eslint/parser": "^
|
|
149
|
+
"@typescript-eslint/eslint-plugin": "^8.55.0",
|
|
150
|
+
"@typescript-eslint/parser": "^8.55.0",
|
|
117
151
|
"eslint": "^8.57.0",
|
|
118
152
|
"jest": "^29.0.0",
|
|
119
153
|
"prettier": "^3.2.5",
|