@hobenakicoffee/libraries 1.29.1 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/moderation/index.ts +0 -1
- package/src/moderation/profanity-service.test.ts +63 -119
- package/src/moderation/profanity-service.ts +19 -43
- package/src/types/index.ts +53 -0
- package/src/types/supabase.ts +33 -0
- package/src/utils/check-moderation.ts +3 -4
- package/src/moderation/normalizer.test.ts +0 -172
- package/src/moderation/normalizer.ts +0 -25
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hobenakicoffee/libraries",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "2.1.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"types": "src/index.ts",
|
|
6
6
|
"exports": {
|
|
@@ -63,11 +63,11 @@
|
|
|
63
63
|
"@tailwindcss/vite": "^4.2.1",
|
|
64
64
|
"class-variance-authority": "^0.7.1",
|
|
65
65
|
"clsx": "^2.1.1",
|
|
66
|
-
"glin-profanity": "^3.3.0",
|
|
67
66
|
"input-otp": "^1.4.2",
|
|
68
67
|
"install": "^0.13.0",
|
|
69
68
|
"next-themes": "^0.4.6",
|
|
70
69
|
"nuqs": "^2.8.9",
|
|
70
|
+
"obscenity": "^0.4.6",
|
|
71
71
|
"openai": "^6.22.0",
|
|
72
72
|
"radix-ui": "^1.4.3",
|
|
73
73
|
"react": "^19.2.4",
|
package/src/moderation/index.ts
CHANGED
|
@@ -1,162 +1,106 @@
|
|
|
1
1
|
import { describe, expect, mock, test } from "bun:test";
|
|
2
|
-
import {
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
}
|
|
2
|
+
import {
|
|
3
|
+
badwordsMatcher,
|
|
4
|
+
containsBanglaSwear,
|
|
5
|
+
containsProfanity,
|
|
6
|
+
} from "./profanity-service";
|
|
7
|
+
|
|
8
|
+
mock.module("obscenity", () => ({
|
|
9
|
+
englishDataset: { build: () => ({ patterns: [], masks: [] }) },
|
|
10
|
+
englishRecommendedTransformers: {},
|
|
11
|
+
RegExpMatcher: class {
|
|
12
|
+
hasMatch() {
|
|
13
|
+
return false;
|
|
14
|
+
}
|
|
15
15
|
},
|
|
16
16
|
}));
|
|
17
17
|
|
|
18
|
-
describe("
|
|
19
|
-
test("returns
|
|
20
|
-
const result =
|
|
21
|
-
expect(result).
|
|
18
|
+
describe("containsBanglaSwear", () => {
|
|
19
|
+
test("returns false when no bad words found", () => {
|
|
20
|
+
const result = containsBanglaSwear("ভালো কথা");
|
|
21
|
+
expect(result).toBe(false);
|
|
22
22
|
});
|
|
23
23
|
|
|
24
|
-
test("returns
|
|
25
|
-
const result =
|
|
26
|
-
expect(result).
|
|
24
|
+
test("returns true when Bangla bad word is found", () => {
|
|
25
|
+
const result = containsBanglaSwear("খানকির ছেলে");
|
|
26
|
+
expect(result).toBe(true);
|
|
27
27
|
});
|
|
28
28
|
|
|
29
29
|
test("matches bad words in mixed text", () => {
|
|
30
|
-
const result =
|
|
31
|
-
expect(result).
|
|
30
|
+
const result = containsBanglaSwear("এটি খানকির ছেলে একটি বাক্য");
|
|
31
|
+
expect(result).toBe(true);
|
|
32
32
|
});
|
|
33
33
|
|
|
34
|
-
test("
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
expect(result.length).toBeGreaterThan(0);
|
|
34
|
+
test("handles normalized unicode", () => {
|
|
35
|
+
const result = containsBanglaSwear("খানকির ছেলে");
|
|
36
|
+
expect(result).toBe(true);
|
|
38
37
|
});
|
|
39
38
|
|
|
40
|
-
test("returns
|
|
41
|
-
const result =
|
|
42
|
-
expect(result).
|
|
39
|
+
test("returns false for empty string", () => {
|
|
40
|
+
const result = containsBanglaSwear("");
|
|
41
|
+
expect(result).toBe(false);
|
|
43
42
|
});
|
|
44
43
|
|
|
45
44
|
test("handles multiple bad words", () => {
|
|
46
|
-
const result =
|
|
47
|
-
expect(result
|
|
45
|
+
const result = containsBanglaSwear("খানকির ছেলে এবং চোদানীর পোলা");
|
|
46
|
+
expect(result).toBe(true);
|
|
48
47
|
});
|
|
49
48
|
|
|
50
|
-
test("handles
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
expect(result.length).toBeGreaterThanOrEqual(0);
|
|
54
|
-
});
|
|
55
|
-
|
|
56
|
-
test("returns array of matched words", () => {
|
|
57
|
-
const result = checkBanglaWords("খানকির ছেলে এবং খানকি মাগী");
|
|
58
|
-
expect(Array.isArray(result)).toBe(true);
|
|
59
|
-
expect(result.every((item) => typeof item === "string")).toBe(true);
|
|
49
|
+
test("handles leetspeak variations", () => {
|
|
50
|
+
const result = containsBanglaSwear("খানকির ছেলে");
|
|
51
|
+
expect(typeof result).toBe("boolean");
|
|
60
52
|
});
|
|
61
53
|
});
|
|
62
54
|
|
|
63
|
-
describe("
|
|
64
|
-
test("returns
|
|
65
|
-
const result =
|
|
66
|
-
expect(result
|
|
67
|
-
expect(result.matched).toEqual([]);
|
|
68
|
-
});
|
|
69
|
-
|
|
70
|
-
test("returns allowed false for English profanity", () => {
|
|
71
|
-
const result = moderateText("this is badword content");
|
|
72
|
-
expect(result.isAllowed).toBe(false);
|
|
73
|
-
expect(result.matched).toContain("badword");
|
|
74
|
-
});
|
|
75
|
-
|
|
76
|
-
test("returns allowed false for Bangla profanity", () => {
|
|
77
|
-
const result = moderateText("খানকির ছেলে");
|
|
78
|
-
expect(result.isAllowed).toBe(false);
|
|
79
|
-
expect(result.matched.length).toBeGreaterThan(0);
|
|
80
|
-
});
|
|
81
|
-
|
|
82
|
-
test("returns allowed true for undefined input", () => {
|
|
83
|
-
const result = moderateText(undefined);
|
|
84
|
-
expect(result.isAllowed).toBe(true);
|
|
85
|
-
expect(result.matched).toEqual([]);
|
|
86
|
-
});
|
|
87
|
-
|
|
88
|
-
test("returns allowed true for empty string", () => {
|
|
89
|
-
const result = moderateText("");
|
|
90
|
-
expect(result.isAllowed).toBe(true);
|
|
91
|
-
expect(result.matched).toEqual([]);
|
|
55
|
+
describe("containsProfanity", () => {
|
|
56
|
+
test("returns false for clean text", () => {
|
|
57
|
+
const result = containsProfanity("এটি একটি পরিষ্কার বাক্য");
|
|
58
|
+
expect(result).toBe(false);
|
|
92
59
|
});
|
|
93
60
|
|
|
94
|
-
test("
|
|
95
|
-
|
|
96
|
-
expect(result).toHaveProperty("isAllowed");
|
|
97
|
-
expect(result).toHaveProperty("matched");
|
|
98
|
-
expect(typeof result.isAllowed).toBe("boolean");
|
|
99
|
-
expect(Array.isArray(result.matched)).toBe(true);
|
|
61
|
+
test("throws error for undefined input", () => {
|
|
62
|
+
expect(() => containsProfanity(undefined as unknown as string)).toThrow();
|
|
100
63
|
});
|
|
101
64
|
|
|
102
|
-
test("
|
|
103
|
-
const result =
|
|
104
|
-
expect(result
|
|
105
|
-
expect(result.matched.length).toBeGreaterThan(0);
|
|
65
|
+
test("returns false for empty string", () => {
|
|
66
|
+
const result = containsProfanity("");
|
|
67
|
+
expect(result).toBe(false);
|
|
106
68
|
});
|
|
107
69
|
|
|
108
|
-
test("
|
|
109
|
-
const result =
|
|
110
|
-
expect(result
|
|
111
|
-
expect(result.matched.length).toBeGreaterThan(1);
|
|
70
|
+
test("returns proper boolean structure", () => {
|
|
71
|
+
const result = containsProfanity("clean text");
|
|
72
|
+
expect(typeof result).toBe("boolean");
|
|
112
73
|
});
|
|
113
74
|
|
|
114
75
|
test("handles mixed English and Bangla text", () => {
|
|
115
|
-
const result =
|
|
116
|
-
expect(result
|
|
117
|
-
expect(result.matched.length).toBeGreaterThan(0);
|
|
118
|
-
});
|
|
119
|
-
|
|
120
|
-
test("normalizes text before checking", () => {
|
|
121
|
-
// Leetspeak normalized version
|
|
122
|
-
const result = moderateText("b4dw0rd");
|
|
123
|
-
// The function normalizes text, so it should be checked
|
|
124
|
-
expect(result).toHaveProperty("isAllowed");
|
|
125
|
-
expect(result).toHaveProperty("matched");
|
|
126
|
-
});
|
|
127
|
-
|
|
128
|
-
test("returns matched array with single word", () => {
|
|
129
|
-
const result = moderateText("badword");
|
|
130
|
-
expect(result.matched).toContain("badword");
|
|
76
|
+
const result = containsProfanity("hello world খানকির ছেলে content");
|
|
77
|
+
expect(result).toBe(true);
|
|
131
78
|
});
|
|
132
79
|
|
|
133
80
|
test("handles whitespace and punctuation", () => {
|
|
134
|
-
const result =
|
|
135
|
-
expect(result
|
|
136
|
-
});
|
|
137
|
-
|
|
138
|
-
test("preserves original word casing in matched results", () => {
|
|
139
|
-
// Based on glin-profanity behavior, it should return the matched word
|
|
140
|
-
const result = moderateText("BADWORD");
|
|
141
|
-
expect(result.matched.length).toBeGreaterThan(0);
|
|
81
|
+
const result = containsProfanity("clean text!!!");
|
|
82
|
+
expect(result).toBe(false);
|
|
142
83
|
});
|
|
143
84
|
|
|
144
85
|
test("handles very long text", () => {
|
|
145
86
|
const longText = `clean text ${"word ".repeat(1000)}`;
|
|
146
|
-
const result =
|
|
147
|
-
expect(result).
|
|
148
|
-
|
|
87
|
+
const result = containsProfanity(longText);
|
|
88
|
+
expect(typeof result).toBe("boolean");
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
test("throws error for null input", () => {
|
|
92
|
+
expect(() => containsProfanity(null as unknown as string)).toThrow();
|
|
149
93
|
});
|
|
94
|
+
});
|
|
150
95
|
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
expect(
|
|
154
|
-
expect(
|
|
96
|
+
describe("badwordsMatcher", () => {
|
|
97
|
+
test("is a RegExpMatcher instance", () => {
|
|
98
|
+
expect(badwordsMatcher).toBeDefined();
|
|
99
|
+
expect(typeof badwordsMatcher.hasMatch).toBe("function");
|
|
155
100
|
});
|
|
156
101
|
|
|
157
|
-
test("
|
|
158
|
-
const result =
|
|
159
|
-
expect(result).
|
|
160
|
-
expect(result).toHaveProperty("matched");
|
|
102
|
+
test("hasMatch returns boolean", () => {
|
|
103
|
+
const result = badwordsMatcher.hasMatch("clean text");
|
|
104
|
+
expect(typeof result).toBe("boolean");
|
|
161
105
|
});
|
|
162
106
|
});
|
|
@@ -1,50 +1,26 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import {
|
|
2
|
+
englishDataset,
|
|
3
|
+
englishRecommendedTransformers,
|
|
4
|
+
RegExpMatcher,
|
|
5
|
+
} from "obscenity";
|
|
2
6
|
import { banglaBadWords } from "./datasets";
|
|
3
|
-
import { normalizeLeetspeak, normalizeUnicode } from "./normalizer";
|
|
4
7
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
export function checkBanglaWords(text: string): string[] {
|
|
11
|
-
const matches: string[] = [];
|
|
8
|
+
// English matcher (unchanged)
|
|
9
|
+
export const badwordsMatcher = new RegExpMatcher({
|
|
10
|
+
...englishDataset.build(),
|
|
11
|
+
...englishRecommendedTransformers,
|
|
12
|
+
});
|
|
12
13
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
matches.push(lowerWord);
|
|
18
|
-
}
|
|
19
|
-
}
|
|
14
|
+
// Bangla matcher: normalize unicode then test each word as a word-boundary regex
|
|
15
|
+
const banglaPatterns = banglaBadWords.map(
|
|
16
|
+
(word) => new RegExp(word.normalize("NFC"), "u")
|
|
17
|
+
);
|
|
20
18
|
|
|
21
|
-
|
|
19
|
+
export function containsBanglaSwear(input: string): boolean {
|
|
20
|
+
const normalized = input.normalize("NFC");
|
|
21
|
+
return banglaPatterns.some((pattern) => pattern.test(normalized));
|
|
22
22
|
}
|
|
23
23
|
|
|
24
|
-
export function
|
|
25
|
-
|
|
26
|
-
return { isAllowed: true, matched: [] };
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
const lower = input.toLowerCase();
|
|
30
|
-
const normalized = normalizeUnicode(normalizeLeetspeak(lower));
|
|
31
|
-
|
|
32
|
-
const matched: string[] = [];
|
|
33
|
-
|
|
34
|
-
// 1️⃣ English profanity via glin
|
|
35
|
-
const { containsProfanity, matches } = checkProfanity(normalized, {
|
|
36
|
-
languages: ["english"],
|
|
37
|
-
});
|
|
38
|
-
|
|
39
|
-
if (containsProfanity && matches) {
|
|
40
|
-
matched.push(...matches.map((m) => m.word));
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
// 2️⃣ Bangla script
|
|
44
|
-
matched.push(...checkBanglaWords(normalized));
|
|
45
|
-
|
|
46
|
-
return {
|
|
47
|
-
isAllowed: matched.length === 0,
|
|
48
|
-
matched,
|
|
49
|
-
};
|
|
24
|
+
export function containsProfanity(input: string): boolean {
|
|
25
|
+
return badwordsMatcher.hasMatch(input) || containsBanglaSwear(input);
|
|
50
26
|
}
|
package/src/types/index.ts
CHANGED
|
@@ -1 +1,54 @@
|
|
|
1
|
+
import type { ServiceType } from "../constants";
|
|
2
|
+
|
|
3
|
+
export type TransactionMetadata = {
|
|
4
|
+
supporter_name?: string;
|
|
5
|
+
supporter_platform?: string;
|
|
6
|
+
is_monthly?: boolean;
|
|
7
|
+
message?: string;
|
|
8
|
+
count?: number;
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
export type ActivityMetadata = {
|
|
12
|
+
type?: string;
|
|
13
|
+
amount?: number;
|
|
14
|
+
net_amount?: number;
|
|
15
|
+
platform_fee?: number;
|
|
16
|
+
price_at_purchase?: number;
|
|
17
|
+
message?: string;
|
|
18
|
+
tier_name?: string;
|
|
19
|
+
item_name?: string;
|
|
20
|
+
buyer_name?: string;
|
|
21
|
+
buyer_platform?: string;
|
|
22
|
+
commission_type?: string;
|
|
23
|
+
requester_name?: string;
|
|
24
|
+
coffee_count?: number;
|
|
25
|
+
is_monthly?: boolean;
|
|
26
|
+
supporter_id?: string;
|
|
27
|
+
supporter_name?: string;
|
|
28
|
+
supporter_platform?: string;
|
|
29
|
+
supporter_anonymous?: boolean;
|
|
30
|
+
identity_hash?: string;
|
|
31
|
+
follower_name?: string;
|
|
32
|
+
follower_username?: string;
|
|
33
|
+
action?: "follow" | "unfollow";
|
|
34
|
+
source?: string;
|
|
35
|
+
post_id?: string;
|
|
36
|
+
post_slug?: string;
|
|
37
|
+
post_title?: string;
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
export type SupportersMetadata = {
|
|
41
|
+
type?: ServiceType;
|
|
42
|
+
amount?: number;
|
|
43
|
+
message?: string;
|
|
44
|
+
coffee_count?: number;
|
|
45
|
+
is_monthly?: boolean;
|
|
46
|
+
supporter_name?: string;
|
|
47
|
+
supporter_platform?: string;
|
|
48
|
+
supporter_anonymous?: boolean;
|
|
49
|
+
follower_name?: string;
|
|
50
|
+
follower_username?: string;
|
|
51
|
+
action?: "follow" | "unfollow";
|
|
52
|
+
};
|
|
53
|
+
|
|
1
54
|
export * from "./supabase";
|
package/src/types/supabase.ts
CHANGED
|
@@ -1748,6 +1748,39 @@ export type Database = {
|
|
|
1748
1748
|
Args: { p_conversation_id: string };
|
|
1749
1749
|
Returns: undefined;
|
|
1750
1750
|
};
|
|
1751
|
+
process_service_payment: {
|
|
1752
|
+
Args: {
|
|
1753
|
+
p_amount: number;
|
|
1754
|
+
p_creator_profile_id: string;
|
|
1755
|
+
p_identity_hash: string;
|
|
1756
|
+
p_metadata?: Json;
|
|
1757
|
+
p_platform_fee: number;
|
|
1758
|
+
p_provider: Database["public"]["Enums"]["provider_enum"];
|
|
1759
|
+
p_provider_transaction_id: string;
|
|
1760
|
+
p_reference_type: Database["public"]["Enums"]["reference_type_enum"];
|
|
1761
|
+
p_service_type: string;
|
|
1762
|
+
p_supporter_name: string;
|
|
1763
|
+
p_supporter_platform?: Database["public"]["Enums"]["supporter_platform_enum"];
|
|
1764
|
+
p_supporter_profile_id: string;
|
|
1765
|
+
};
|
|
1766
|
+
Returns: Json;
|
|
1767
|
+
};
|
|
1768
|
+
purchase_newsletter_post: {
|
|
1769
|
+
Args: {
|
|
1770
|
+
p_amount: number;
|
|
1771
|
+
p_buyer_name: string;
|
|
1772
|
+
p_buyer_platform?: Database["public"]["Enums"]["supporter_platform_enum"];
|
|
1773
|
+
p_buyer_profile_id: string;
|
|
1774
|
+
p_identity_hash: string;
|
|
1775
|
+
p_message?: string;
|
|
1776
|
+
p_platform_fee: number;
|
|
1777
|
+
p_post_id: string;
|
|
1778
|
+
p_provider: Database["public"]["Enums"]["provider_enum"];
|
|
1779
|
+
p_provider_transaction_id: string;
|
|
1780
|
+
p_source?: string;
|
|
1781
|
+
};
|
|
1782
|
+
Returns: Json;
|
|
1783
|
+
};
|
|
1751
1784
|
record_newsletter_post_click: {
|
|
1752
1785
|
Args: { p_post_id: string };
|
|
1753
1786
|
Returns: undefined;
|
|
@@ -1,17 +1,16 @@
|
|
|
1
1
|
import type OpenAI from "openai";
|
|
2
|
-
import {
|
|
2
|
+
import { containsProfanity } from "../moderation";
|
|
3
3
|
|
|
4
4
|
export async function checkModeration(openaiClient: OpenAI, text: string) {
|
|
5
5
|
try {
|
|
6
|
-
const
|
|
6
|
+
const hasProfanity = containsProfanity(text);
|
|
7
7
|
|
|
8
|
-
if (
|
|
8
|
+
if (hasProfanity) {
|
|
9
9
|
return {
|
|
10
10
|
flagged: true,
|
|
11
11
|
categories: null,
|
|
12
12
|
error: null,
|
|
13
13
|
source: "profanity" as const,
|
|
14
|
-
profaneWords: profanityResult.matched,
|
|
15
14
|
};
|
|
16
15
|
}
|
|
17
16
|
|
|
@@ -1,172 +0,0 @@
|
|
|
1
|
-
import { describe, expect, test } from "bun:test";
|
|
2
|
-
import { compact, normalizeLeetspeak, normalizeUnicode } from "./normalizer";
|
|
3
|
-
|
|
4
|
-
describe("normalizeLeetspeak", () => {
|
|
5
|
-
test("converts number 0 to letter o", () => {
|
|
6
|
-
expect(normalizeLeetspeak("h3ll0")).toBe("hello");
|
|
7
|
-
});
|
|
8
|
-
|
|
9
|
-
test("converts number 1 to letter i", () => {
|
|
10
|
-
expect(normalizeLeetspeak("1nput")).toBe("input");
|
|
11
|
-
});
|
|
12
|
-
|
|
13
|
-
test("converts number 3 to letter e", () => {
|
|
14
|
-
expect(normalizeLeetspeak("gr33t")).toBe("greet");
|
|
15
|
-
});
|
|
16
|
-
|
|
17
|
-
test("converts number 4 to letter a", () => {
|
|
18
|
-
expect(normalizeLeetspeak("h4nd")).toBe("hand");
|
|
19
|
-
});
|
|
20
|
-
|
|
21
|
-
test("converts number 5 to letter s", () => {
|
|
22
|
-
expect(normalizeLeetspeak("p455w0rd")).toBe("password");
|
|
23
|
-
});
|
|
24
|
-
|
|
25
|
-
test("converts number 7 to letter t", () => {
|
|
26
|
-
expect(normalizeLeetspeak("l33t7hug")).toBe("leetthug");
|
|
27
|
-
});
|
|
28
|
-
|
|
29
|
-
test("converts @ to letter a", () => {
|
|
30
|
-
expect(normalizeLeetspeak("h@ck3r")).toBe("hacker");
|
|
31
|
-
});
|
|
32
|
-
|
|
33
|
-
test("converts $ to letter s", () => {
|
|
34
|
-
expect(normalizeLeetspeak("p@$$w0rd")).toBe("password");
|
|
35
|
-
});
|
|
36
|
-
|
|
37
|
-
test("handles mixed leetspeak characters", () => {
|
|
38
|
-
expect(normalizeLeetspeak("p@$$w0rd1s3cur3")).toBe("passwordisecure");
|
|
39
|
-
});
|
|
40
|
-
|
|
41
|
-
test("preserves letters unchanged", () => {
|
|
42
|
-
expect(normalizeLeetspeak("hello")).toBe("hello");
|
|
43
|
-
});
|
|
44
|
-
|
|
45
|
-
test("preserves unmapped characters", () => {
|
|
46
|
-
expect(normalizeLeetspeak("hxllo2")).toBe("hxllo2");
|
|
47
|
-
});
|
|
48
|
-
|
|
49
|
-
test("handles empty string", () => {
|
|
50
|
-
expect(normalizeLeetspeak("")).toBe("");
|
|
51
|
-
});
|
|
52
|
-
|
|
53
|
-
test("handles only special characters", () => {
|
|
54
|
-
expect(normalizeLeetspeak("@$0714")).toBe("asotia");
|
|
55
|
-
});
|
|
56
|
-
|
|
57
|
-
test("handles uppercase and lowercase mixed", () => {
|
|
58
|
-
expect(normalizeLeetspeak("H3LL0W0Rld")).toBe("HeLLoWoRld");
|
|
59
|
-
});
|
|
60
|
-
});
|
|
61
|
-
|
|
62
|
-
describe("normalizeUnicode", () => {
|
|
63
|
-
test("removes accents from Latin characters", () => {
|
|
64
|
-
expect(normalizeUnicode("café")).toBe("cafe");
|
|
65
|
-
});
|
|
66
|
-
|
|
67
|
-
test("removes diacritics from extended Latin", () => {
|
|
68
|
-
expect(normalizeUnicode("naïve")).toBe("naive");
|
|
69
|
-
});
|
|
70
|
-
|
|
71
|
-
test("handles umlauts", () => {
|
|
72
|
-
expect(normalizeUnicode("Müller")).toBe("Muller");
|
|
73
|
-
});
|
|
74
|
-
|
|
75
|
-
test("handles cedill", () => {
|
|
76
|
-
expect(normalizeUnicode("français")).toBe("francais");
|
|
77
|
-
});
|
|
78
|
-
|
|
79
|
-
test("handles combined diacritical marks", () => {
|
|
80
|
-
expect(normalizeUnicode("résumé")).toBe("resume");
|
|
81
|
-
});
|
|
82
|
-
|
|
83
|
-
test("preserves ASCII characters", () => {
|
|
84
|
-
expect(normalizeUnicode("hello")).toBe("hello");
|
|
85
|
-
});
|
|
86
|
-
|
|
87
|
-
test("preserves numbers", () => {
|
|
88
|
-
expect(normalizeUnicode("test123")).toBe("test123");
|
|
89
|
-
});
|
|
90
|
-
|
|
91
|
-
test("preserves special characters", () => {
|
|
92
|
-
expect(normalizeUnicode("hello!@#")).toBe("hello!@#");
|
|
93
|
-
});
|
|
94
|
-
|
|
95
|
-
test("handles empty string", () => {
|
|
96
|
-
expect(normalizeUnicode("")).toBe("");
|
|
97
|
-
});
|
|
98
|
-
|
|
99
|
-
test("handles multiple accented characters", () => {
|
|
100
|
-
expect(normalizeUnicode("àáâãäå")).toBe("aaaaaa");
|
|
101
|
-
});
|
|
102
|
-
|
|
103
|
-
test("handles mixed content with accents", () => {
|
|
104
|
-
expect(normalizeUnicode("Chloe's café")).toBe("Chloe's cafe");
|
|
105
|
-
});
|
|
106
|
-
|
|
107
|
-
test("normalizes combining characters", () => {
|
|
108
|
-
expect(normalizeUnicode("e\u0301")).toBe("e");
|
|
109
|
-
});
|
|
110
|
-
});
|
|
111
|
-
|
|
112
|
-
describe("compact", () => {
|
|
113
|
-
test("removes spaces", () => {
|
|
114
|
-
expect(compact("hello world")).toBe("helloworld");
|
|
115
|
-
});
|
|
116
|
-
|
|
117
|
-
test("removes multiple spaces", () => {
|
|
118
|
-
expect(compact("hello world")).toBe("helloworld");
|
|
119
|
-
});
|
|
120
|
-
|
|
121
|
-
test("removes special characters", () => {
|
|
122
|
-
expect(compact("hello!world")).toBe("helloworld");
|
|
123
|
-
});
|
|
124
|
-
|
|
125
|
-
test("removes various special characters", () => {
|
|
126
|
-
expect(compact("hello@world#test!")).toBe("helloworldtest");
|
|
127
|
-
});
|
|
128
|
-
|
|
129
|
-
test("removes punctuation", () => {
|
|
130
|
-
expect(compact("hello, world!")).toBe("helloworld");
|
|
131
|
-
});
|
|
132
|
-
|
|
133
|
-
test("removes underscores", () => {
|
|
134
|
-
expect(compact("hello_world")).toBe("helloworld");
|
|
135
|
-
});
|
|
136
|
-
|
|
137
|
-
test("removes tabs and newlines", () => {
|
|
138
|
-
expect(compact("hello\tworld\ntest")).toBe("helloworldtest");
|
|
139
|
-
});
|
|
140
|
-
|
|
141
|
-
test("preserves alphanumeric characters", () => {
|
|
142
|
-
expect(compact("hello123world")).toBe("hello123world");
|
|
143
|
-
});
|
|
144
|
-
|
|
145
|
-
test("handles empty string", () => {
|
|
146
|
-
expect(compact("")).toBe("");
|
|
147
|
-
});
|
|
148
|
-
|
|
149
|
-
test("handles only special characters", () => {
|
|
150
|
-
expect(compact("!@#$%^&*()")).toBe("");
|
|
151
|
-
});
|
|
152
|
-
|
|
153
|
-
test("handles only spaces", () => {
|
|
154
|
-
expect(compact(" ")).toBe("");
|
|
155
|
-
});
|
|
156
|
-
|
|
157
|
-
test("handles mixed content", () => {
|
|
158
|
-
expect(compact("Test-Case_123!@#")).toBe("TestCase123");
|
|
159
|
-
});
|
|
160
|
-
|
|
161
|
-
test("removes hyphens", () => {
|
|
162
|
-
expect(compact("hello-world")).toBe("helloworld");
|
|
163
|
-
});
|
|
164
|
-
|
|
165
|
-
test("handles email-like string", () => {
|
|
166
|
-
expect(compact("test@example.com")).toBe("testexamplecom");
|
|
167
|
-
});
|
|
168
|
-
|
|
169
|
-
test("handles URL-like string", () => {
|
|
170
|
-
expect(compact("https://example.com")).toBe("httpsexamplecom");
|
|
171
|
-
});
|
|
172
|
-
});
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
export function normalizeLeetspeak(text: string): string {
|
|
2
|
-
const map: Record<string, string> = {
|
|
3
|
-
"0": "o",
|
|
4
|
-
"1": "i",
|
|
5
|
-
"3": "e",
|
|
6
|
-
"4": "a",
|
|
7
|
-
"5": "s",
|
|
8
|
-
"7": "t",
|
|
9
|
-
"@": "a",
|
|
10
|
-
$: "s",
|
|
11
|
-
};
|
|
12
|
-
|
|
13
|
-
return text
|
|
14
|
-
.split("")
|
|
15
|
-
.map((c) => map[c] ?? c)
|
|
16
|
-
.join("");
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
export function normalizeUnicode(text: string): string {
|
|
20
|
-
return text.normalize("NFKD").replace(/[\u0300-\u036f]/g, "");
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
export function compact(text: string): string {
|
|
24
|
-
return text.replace(/[\s\W_]+/g, "");
|
|
25
|
-
}
|