npm - @fldx/sopan - Versions diffs - 1.0.0 - Mend

@fldx/sopan 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Feildrix Liemdra
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

package/README.md ADDED Viewed

@@ -0,0 +1,130 @@
+# sopan
+Small TypeScript-first profanity filter for Indonesian text.
+`sopan` is dependency-free at runtime and exposes a compact API for JavaScript and TypeScript projects.
+## Install
+```sh
+npm install sopan
+```
+```sh
+pnpm add sopan
+```
+```sh
+yarn add sopan
+```
+```sh
+bun add sopan
+```
+## Usage
+```ts
+import { addWords, clean, containsProfanity, findProfanity } from "sopan";
+addWords(["kasar"]);
+containsProfanity("dasar t41"); // true
+findProfanity("A*N*J*I*N*G");
+clean("dasar t41"); // "dasar ***"
+clean("ka$aar!"); // "***!"
+```
+CommonJS is supported too:
+```js
+const { clean, containsProfanity } = require("sopan");
+containsProfanity("ta1"); // true
+clean("ta1"); // "***"
+```
+## API
+### `addWords(words)`
+Adds words to the shared default filter once, so future calls to `containsProfanity`, `findProfanity`, and `clean` detect them automatically.
+```ts
+addWords(["kasar", "kata-baru"]);
+containsProfanity("ka$aar"); // true
+clean("kata-baru"); // "***"
+```
+### `clearWords()`
+Removes words previously registered through `addWords`.
+```ts
+clearWords();
+```
+### `containsProfanity(input, options)`
+Returns `true` when the input contains a profane word.
+```ts
+containsProfanity("ini santai"); // false
+containsProfanity("ini tai"); // true
+containsProfanity("ka$aar", { additionalWords: ["kasar"] }); // true
+```
+### `findProfanity(input, options)`
+Returns match details with the configured dictionary word, raw token, normalized token, and index.
+```ts
+findProfanity("halo t41");
+// [{ word: "tai", raw: "t41", normalized: "tai", index: 5 }]
+findProfanity("ka$aar", { additionalWords: ["kasar"] });
+// [{ word: "kasar", raw: "ka$aar", normalized: "kasar", index: 0 }]
+```
+### `clean(input, options)`
+Replaces profane words with `"***"` by default.
+```ts
+clean("dasar t41"); // "dasar ***"
+clean("dasar t41", { replacement: "[redacted]" });
+clean("dasar t41", { replacement: (match) => `[${match.word}]` });
+clean("ka$aar", { additionalWords: ["kasar"], replacement: "[custom]" });
+```
+### `createFilter(options)`
+Creates a custom filter with your own word list.
+```ts
+const filter = createFilter({
+  words: ["kasar", "contoh"] as const
+});
+filter.containsProfanity("ka$aar");
+```
+## Matching Behavior
+The default filter focuses on Indonesian profanity and normalizes:
+- mixed casing: `TAI`
+- simple leetspeak: `t41`, `ta1`
+- symbol separators: `a*n*j*i*n*g`
+- repeated letters: `annjiiinggg`
+Matching is token-based, so `tai` is detected while `santai` is not.
+The default Indonesian word list was expanded from public Indonesian rude-word references including Wiktionary's Indonesian "kata kasar" category, a translator-maintained Indonesian profanity list, a public GitHub gist, and selected regional Indonesian references for Sundanese, Javanese, Batak, and Medan usage. It intentionally keeps only single-token words in the core package.
+## Roadmap
+- Expand Indonesian dictionary coverage.
+- Add language packs without increasing the default bundle size.
+- Add optional phrase matching for multi-word profanity.

package/dist/cjs/index.js ADDED Viewed

@@ -0,0 +1,158 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.INDONESIAN_WORDS = void 0;
+exports.addWords = addWords;
+exports.clearWords = clearWords;
+exports.containsProfanity = containsProfanity;
+exports.findProfanity = findProfanity;
+exports.clean = clean;
+exports.createFilter = createFilter;
+const normalize_js_1 = require("./normalize.js");
+const words_js_1 = require("./words.js");
+Object.defineProperty(exports, "INDONESIAN_WORDS", { enumerable: true, get: function () { return words_js_1.INDONESIAN_WORDS; } });
+const DEFAULT_REPLACEMENT = "***";
+const DEFAULT_FILTER = createFilter({
+    words: words_js_1.INDONESIAN_WORDS
+});
+const addedWords = new Map();
+let cachedDefaultFilter = DEFAULT_FILTER;
+let isDefaultFilterDirty = false;
+/**
+ * Add words to the shared default filter used by `containsProfanity`,
+ * `findProfanity`, and `clean`.
+ *
+ * Use this during application startup when your project has product-specific
+ * words that should be detected everywhere. Words are normalized and deduped
+ * with the same rules as the built-in Indonesian dictionary.
+ */
+function addWords(words) {
+    for (const word of words) {
+        const normalized = (0, normalize_js_1.normalizeWord)(word);
+        if (normalized.length > 0) {
+            addedWords.set(normalized, word);
+        }
+    }
+    isDefaultFilterDirty = true;
+}
+/**
+ * Remove all words previously registered through `addWords`.
+ *
+ * This is useful for tests, worker reuse, or apps that need to rebuild their
+ * moderation policy at runtime.
+ */
+function clearWords() {
+    addedWords.clear();
+    cachedDefaultFilter = DEFAULT_FILTER;
+    isDefaultFilterDirty = false;
+}
+/**
+ * Returns `true` when input contains a word from the default Indonesian
+ * dictionary, shared words registered with `addWords`, or per-call
+ * `additionalWords`.
+ */
+function containsProfanity(input, options = {}) {
+    return getDefaultFilter(options.additionalWords).containsProfanity(input);
+}
+/**
+ * Finds profanity matches in input and returns match details, including the raw
+ * token, normalized token, matched dictionary word, and start index.
+ */
+function findProfanity(input, options = {}) {
+    return getDefaultFilter(options.additionalWords).findProfanity(input);
+}
+/**
+ * Replaces profanity in input. By default matches are replaced with `"***"`,
+ * but callers can provide a replacement string or callback.
+ */
+function clean(input, options = {}) {
+    const filter = getDefaultFilter(options.additionalWords);
+    if (options.replacement === undefined) {
+        return filter.clean(input);
+    }
+    return filter.clean(input, {
+        replacement: options.replacement
+    });
+}
+/**
+ * Creates an isolated reusable filter with its own word list.
+ *
+ * Prefer this when you need multiple independent dictionaries or literal
+ * TypeScript word types. Use `addWords` when you want to extend the package's
+ * shared default filter once for the whole app.
+ */
+function createFilter(options) {
+    const words = [...options.words];
+    const dictionary = createDictionary(words);
+    return {
+        words,
+        containsProfanity(input) {
+            return findMatches(input, dictionary).length > 0;
+        },
+        findProfanity(input) {
+            return findMatches(input, dictionary);
+        },
+        clean(input, cleanOptions = {}) {
+            return replaceMatches(input, findMatches(input, dictionary), cleanOptions.replacement);
+        }
+    };
+}
+function createDictionary(words) {
+    const dictionary = new Map();
+    for (const word of words) {
+        const normalized = (0, normalize_js_1.normalizeWord)(word);
+        if (normalized.length > 0) {
+            dictionary.set(normalized, word);
+        }
+    }
+    return dictionary;
+}
+function getDefaultFilter(additionalWords) {
+    const sharedFilter = getSharedDefaultFilter();
+    if (additionalWords === undefined || additionalWords.length === 0) {
+        return sharedFilter;
+    }
+    return createFilter({
+        words: [...sharedFilter.words, ...additionalWords]
+    });
+}
+function getSharedDefaultFilter() {
+    if (!isDefaultFilterDirty) {
+        return cachedDefaultFilter;
+    }
+    cachedDefaultFilter = createFilter({
+        words: [...words_js_1.INDONESIAN_WORDS, ...addedWords.values()]
+    });
+    isDefaultFilterDirty = false;
+    return cachedDefaultFilter;
+}
+function findMatches(input, dictionary) {
+    const matches = [];
+    for (const token of (0, normalize_js_1.tokenize)(input)) {
+        const word = dictionary.get(token.value);
+        if (word !== undefined) {
+            matches.push(toMatch(token, word));
+        }
+    }
+    return matches;
+}
+function toMatch(token, word) {
+    return {
+        word,
+        raw: token.raw,
+        normalized: token.value,
+        index: token.index
+    };
+}
+function replaceMatches(input, matches, replacement = DEFAULT_REPLACEMENT) {
+    if (matches.length === 0) {
+        return input;
+    }
+    let cursor = 0;
+    let output = "";
+    for (const match of matches) {
+        output += input.slice(cursor, match.index);
+        output += typeof replacement === "function" ? replacement(match) : replacement;
+        cursor = match.index + match.raw.length;
+    }
+    return output + input.slice(cursor);
+}

package/dist/cjs/normalize.js ADDED Viewed

@@ -0,0 +1,53 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.normalizeWord = normalizeWord;
+exports.tokenize = tokenize;
+const LEET_CHARS = {
+    "0": "o",
+    "1": "i",
+    "!": "i",
+    "|": "i",
+    "3": "e",
+    "4": "a",
+    "@": "a",
+    "5": "s",
+    "$": "s",
+    "7": "t",
+    "+": "t",
+    "8": "b"
+};
+const COMBINING_MARKS = /[\u0300-\u036f]/g;
+const REPEATED_CHARS = /([a-z0-9])\1+/g;
+const TOKEN_CHARS = /[a-z0-9@#$]+(?:[!|+*._~-]*[a-z0-9@#$]+)*/g;
+function normalizeWord(word) {
+    return normalizeToken(word);
+}
+function tokenize(input) {
+    const normalizedInput = baseNormalize(input);
+    const tokens = [];
+    for (const match of normalizedInput.matchAll(TOKEN_CHARS)) {
+        const raw = match[0] ?? "";
+        const value = normalizeToken(raw);
+        if (value.length > 0) {
+            tokens.push({
+                value,
+                raw,
+                index: match.index ?? 0
+            });
+        }
+    }
+    return tokens;
+}
+function baseNormalize(input) {
+    return input.normalize("NFD").replace(COMBINING_MARKS, "").toLowerCase();
+}
+function normalizeToken(token) {
+    let normalized = "";
+    for (const char of baseNormalize(token)) {
+        const mapped = LEET_CHARS[char] ?? char;
+        if (/[a-z0-9]/.test(mapped)) {
+            normalized += mapped;
+        }
+    }
+    return normalized.replace(REPEATED_CHARS, "$1");
+}

package/dist/cjs/package.json ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"type":"commonjs"}

package/dist/cjs/words.js ADDED Viewed

@@ -0,0 +1,182 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.INDONESIAN_WORDS = void 0;
+exports.INDONESIAN_WORDS = [
+    "anjay",
+    "anjing",
+    "anjink",
+    "anjir",
+    "anjrit",
+    "anying",
+    "asu",
+    "babi",
+    "babatok",
+    "bacot",
+    "bagudung",
+    "bagong",
+    "bagoy",
+    "bajilak",
+    "bajindul",
+    "bajing",
+    "bajingak",
+    "bajingan",
+    "bajingseng",
+    "banci",
+    "bangsat",
+    "bangkawarah",
+    "bebel",
+    "bejad",
+    "bego",
+    "begok",
+    "belegug",
+    "belekok",
+    "bencong",
+    "berak",
+    "beungeut",
+    "bispak",
+    "bloon",
+    "bodat",
+    "bodo",
+    "bodoh",
+    "boloho",
+    "bolokotondo",
+    "brengsek",
+    "budeg",
+    "budheg",
+    "bujang",
+    "bujanginam",
+    "burit",
+    "cangcut",
+    "cangkem",
+    "cangkeman",
+    "cangkeme",
+    "celeng",
+    "celsit",
+    "cocot",
+    "cocote",
+    "cocotmu",
+    "cok",
+    "congor",
+    "congore",
+    "congormu",
+    "dancuk",
+    "damput",
+    "entot",
+    "ewe",
+    "gendeng",
+    "gendheng",
+    "geblek",
+    "gembel",
+    "gila",
+    "goblog",
+    "goblok",
+    "gundulmu",
+    "heang",
+    "hencet",
+    "henceut",
+    "herek",
+    "heunceut",
+    "idiot",
+    "itil",
+    "jablay",
+    "jamban",
+    "jampurut",
+    "jamput",
+    "jancok",
+    "jancuk",
+    "jangkrik",
+    "jembut",
+    "kampret",
+    "kampang",
+    "kancut",
+    "kanjut",
+    "kehed",
+    "keparat",
+    "kenthir",
+    "kimak",
+    "kimbek",
+    "kirik",
+    "kontol",
+    "kopet",
+    "koplak",
+    "koplok",
+    "kunyuk",
+    "lonte",
+    "loak",
+    "maho",
+    "maling",
+    "mampus",
+    "mampos",
+    "matamu",
+    "matane",
+    "mbacot",
+    "mbahmu",
+    "mbathang",
+    "mbladhog",
+    "memek",
+    "memex",
+    "meki",
+    "micek",
+    "modar",
+    "moddar",
+    "monyet",
+    "mripatmu",
+    "munyuk",
+    "ndase",
+    "ndasmu",
+    "ndlogok",
+    "ngaceng",
+    "ngendog",
+    "ngentot",
+    "nggapleki",
+    "nggateli",
+    "nggilani",
+    "ngewe",
+    "ngocor",
+    "ngocok",
+    "nguntal",
+    "ngurek",
+    "njeplak",
+    "njir",
+    "nyocot",
+    "ontohod",
+    "palkon",
+    "pantat",
+    "pantek",
+    "pecun",
+    "peju",
+    "pelacur",
+    "peler",
+    "pekok",
+    "pentil",
+    "pepek",
+    "perek",
+    "pethuk",
+    "picek",
+    "puki",
+    "pukimak",
+    "raimu",
+    "sarap",
+    "sempak",
+    "semprul",
+    "sial",
+    "sialan",
+    "silit",
+    "sinting",
+    "setan",
+    "soblog",
+    "sundal",
+    "taek",
+    "taik",
+    "tahi",
+    "tai",
+    "telek",
+    "tetek",
+    "titit",
+    "tokai",
+    "toket",
+    "tolol",
+    "udelmu",
+    "wedhus",
+    "zakar"
+];

package/dist/esm/index.js ADDED Viewed

@@ -0,0 +1,149 @@
+import { tokenize, normalizeWord } from "./normalize.js";
+import { INDONESIAN_WORDS } from "./words.js";
+const DEFAULT_REPLACEMENT = "***";
+const DEFAULT_FILTER = createFilter({
+    words: INDONESIAN_WORDS
+});
+const addedWords = new Map();
+let cachedDefaultFilter = DEFAULT_FILTER;
+let isDefaultFilterDirty = false;
+/**
+ * Add words to the shared default filter used by `containsProfanity`,
+ * `findProfanity`, and `clean`.
+ *
+ * Use this during application startup when your project has product-specific
+ * words that should be detected everywhere. Words are normalized and deduped
+ * with the same rules as the built-in Indonesian dictionary.
+ */
+export function addWords(words) {
+    for (const word of words) {
+        const normalized = normalizeWord(word);
+        if (normalized.length > 0) {
+            addedWords.set(normalized, word);
+        }
+    }
+    isDefaultFilterDirty = true;
+}
+/**
+ * Remove all words previously registered through `addWords`.
+ *
+ * This is useful for tests, worker reuse, or apps that need to rebuild their
+ * moderation policy at runtime.
+ */
+export function clearWords() {
+    addedWords.clear();
+    cachedDefaultFilter = DEFAULT_FILTER;
+    isDefaultFilterDirty = false;
+}
+/**
+ * Returns `true` when input contains a word from the default Indonesian
+ * dictionary, shared words registered with `addWords`, or per-call
+ * `additionalWords`.
+ */
+export function containsProfanity(input, options = {}) {
+    return getDefaultFilter(options.additionalWords).containsProfanity(input);
+}
+/**
+ * Finds profanity matches in input and returns match details, including the raw
+ * token, normalized token, matched dictionary word, and start index.
+ */
+export function findProfanity(input, options = {}) {
+    return getDefaultFilter(options.additionalWords).findProfanity(input);
+}
+/**
+ * Replaces profanity in input. By default matches are replaced with `"***"`,
+ * but callers can provide a replacement string or callback.
+ */
+export function clean(input, options = {}) {
+    const filter = getDefaultFilter(options.additionalWords);
+    if (options.replacement === undefined) {
+        return filter.clean(input);
+    }
+    return filter.clean(input, {
+        replacement: options.replacement
+    });
+}
+/**
+ * Creates an isolated reusable filter with its own word list.
+ *
+ * Prefer this when you need multiple independent dictionaries or literal
+ * TypeScript word types. Use `addWords` when you want to extend the package's
+ * shared default filter once for the whole app.
+ */
+export function createFilter(options) {
+    const words = [...options.words];
+    const dictionary = createDictionary(words);
+    return {
+        words,
+        containsProfanity(input) {
+            return findMatches(input, dictionary).length > 0;
+        },
+        findProfanity(input) {
+            return findMatches(input, dictionary);
+        },
+        clean(input, cleanOptions = {}) {
+            return replaceMatches(input, findMatches(input, dictionary), cleanOptions.replacement);
+        }
+    };
+}
+function createDictionary(words) {
+    const dictionary = new Map();
+    for (const word of words) {
+        const normalized = normalizeWord(word);
+        if (normalized.length > 0) {
+            dictionary.set(normalized, word);
+        }
+    }
+    return dictionary;
+}
+function getDefaultFilter(additionalWords) {
+    const sharedFilter = getSharedDefaultFilter();
+    if (additionalWords === undefined || additionalWords.length === 0) {
+        return sharedFilter;
+    }
+    return createFilter({
+        words: [...sharedFilter.words, ...additionalWords]
+    });
+}
+function getSharedDefaultFilter() {
+    if (!isDefaultFilterDirty) {
+        return cachedDefaultFilter;
+    }
+    cachedDefaultFilter = createFilter({
+        words: [...INDONESIAN_WORDS, ...addedWords.values()]
+    });
+    isDefaultFilterDirty = false;
+    return cachedDefaultFilter;
+}
+function findMatches(input, dictionary) {
+    const matches = [];
+    for (const token of tokenize(input)) {
+        const word = dictionary.get(token.value);
+        if (word !== undefined) {
+            matches.push(toMatch(token, word));
+        }
+    }
+    return matches;
+}
+function toMatch(token, word) {
+    return {
+        word,
+        raw: token.raw,
+        normalized: token.value,
+        index: token.index
+    };
+}
+function replaceMatches(input, matches, replacement = DEFAULT_REPLACEMENT) {
+    if (matches.length === 0) {
+        return input;
+    }
+    let cursor = 0;
+    let output = "";
+    for (const match of matches) {
+        output += input.slice(cursor, match.index);
+        output += typeof replacement === "function" ? replacement(match) : replacement;
+        cursor = match.index + match.raw.length;
+    }
+    return output + input.slice(cursor);
+}
+export { INDONESIAN_WORDS };

package/dist/esm/normalize.js ADDED Viewed

@@ -0,0 +1,49 @@
+const LEET_CHARS = {
+    "0": "o",
+    "1": "i",
+    "!": "i",
+    "|": "i",
+    "3": "e",
+    "4": "a",
+    "@": "a",
+    "5": "s",
+    "$": "s",
+    "7": "t",
+    "+": "t",
+    "8": "b"
+};
+const COMBINING_MARKS = /[\u0300-\u036f]/g;
+const REPEATED_CHARS = /([a-z0-9])\1+/g;
+const TOKEN_CHARS = /[a-z0-9@#$]+(?:[!|+*._~-]*[a-z0-9@#$]+)*/g;
+export function normalizeWord(word) {
+    return normalizeToken(word);
+}
+export function tokenize(input) {
+    const normalizedInput = baseNormalize(input);
+    const tokens = [];
+    for (const match of normalizedInput.matchAll(TOKEN_CHARS)) {
+        const raw = match[0] ?? "";
+        const value = normalizeToken(raw);
+        if (value.length > 0) {
+            tokens.push({
+                value,
+                raw,
+                index: match.index ?? 0
+            });
+        }
+    }
+    return tokens;
+}
+function baseNormalize(input) {
+    return input.normalize("NFD").replace(COMBINING_MARKS, "").toLowerCase();
+}
+function normalizeToken(token) {
+    let normalized = "";
+    for (const char of baseNormalize(token)) {
+        const mapped = LEET_CHARS[char] ?? char;
+        if (/[a-z0-9]/.test(mapped)) {
+            normalized += mapped;
+        }
+    }
+    return normalized.replace(REPEATED_CHARS, "$1");
+}

package/dist/esm/words.js ADDED Viewed

@@ -0,0 +1,179 @@
+export const INDONESIAN_WORDS = [
+    "anjay",
+    "anjing",
+    "anjink",
+    "anjir",
+    "anjrit",
+    "anying",
+    "asu",
+    "babi",
+    "babatok",
+    "bacot",
+    "bagudung",
+    "bagong",
+    "bagoy",
+    "bajilak",
+    "bajindul",
+    "bajing",
+    "bajingak",
+    "bajingan",
+    "bajingseng",
+    "banci",
+    "bangsat",
+    "bangkawarah",
+    "bebel",
+    "bejad",
+    "bego",
+    "begok",
+    "belegug",
+    "belekok",
+    "bencong",
+    "berak",
+    "beungeut",
+    "bispak",
+    "bloon",
+    "bodat",
+    "bodo",
+    "bodoh",
+    "boloho",
+    "bolokotondo",
+    "brengsek",
+    "budeg",
+    "budheg",
+    "bujang",
+    "bujanginam",
+    "burit",
+    "cangcut",
+    "cangkem",
+    "cangkeman",
+    "cangkeme",
+    "celeng",
+    "celsit",
+    "cocot",
+    "cocote",
+    "cocotmu",
+    "cok",
+    "congor",
+    "congore",
+    "congormu",
+    "dancuk",
+    "damput",
+    "entot",
+    "ewe",
+    "gendeng",
+    "gendheng",
+    "geblek",
+    "gembel",
+    "gila",
+    "goblog",
+    "goblok",
+    "gundulmu",
+    "heang",
+    "hencet",
+    "henceut",
+    "herek",
+    "heunceut",
+    "idiot",
+    "itil",
+    "jablay",
+    "jamban",
+    "jampurut",
+    "jamput",
+    "jancok",
+    "jancuk",
+    "jangkrik",
+    "jembut",
+    "kampret",
+    "kampang",
+    "kancut",
+    "kanjut",
+    "kehed",
+    "keparat",
+    "kenthir",
+    "kimak",
+    "kimbek",
+    "kirik",
+    "kontol",
+    "kopet",
+    "koplak",
+    "koplok",
+    "kunyuk",
+    "lonte",
+    "loak",
+    "maho",
+    "maling",
+    "mampus",
+    "mampos",
+    "matamu",
+    "matane",
+    "mbacot",
+    "mbahmu",
+    "mbathang",
+    "mbladhog",
+    "memek",
+    "memex",
+    "meki",
+    "micek",
+    "modar",
+    "moddar",
+    "monyet",
+    "mripatmu",
+    "munyuk",
+    "ndase",
+    "ndasmu",
+    "ndlogok",
+    "ngaceng",
+    "ngendog",
+    "ngentot",
+    "nggapleki",
+    "nggateli",
+    "nggilani",
+    "ngewe",
+    "ngocor",
+    "ngocok",
+    "nguntal",
+    "ngurek",
+    "njeplak",
+    "njir",
+    "nyocot",
+    "ontohod",
+    "palkon",
+    "pantat",
+    "pantek",
+    "pecun",
+    "peju",
+    "pelacur",
+    "peler",
+    "pekok",
+    "pentil",
+    "pepek",
+    "perek",
+    "pethuk",
+    "picek",
+    "puki",
+    "pukimak",
+    "raimu",
+    "sarap",
+    "sempak",
+    "semprul",
+    "sial",
+    "sialan",
+    "silit",
+    "sinting",
+    "setan",
+    "soblog",
+    "sundal",
+    "taek",
+    "taik",
+    "tahi",
+    "tai",
+    "telek",
+    "tetek",
+    "titit",
+    "tokai",
+    "toket",
+    "tolol",
+    "udelmu",
+    "wedhus",
+    "zakar"
+];

package/dist/types/index.d.ts ADDED Viewed

@@ -0,0 +1,93 @@
+import { INDONESIAN_WORDS, type IndonesianWord } from "./words.js";
+export type SupportedLanguage = "id";
+/**
+ * A detected profanity token and its location in the original input.
+ */
+export interface ProfanityMatch<Word extends string = string> {
+    /** The dictionary word that matched after normalization. */
+    readonly word: Word;
+    /** The original token from the input, before normalization. */
+    readonly raw: string;
+    /** The normalized token used for dictionary lookup. */
+    readonly normalized: string;
+    /** Zero-based character index where the raw token starts. */
+    readonly index: number;
+}
+/**
+ * Options shared by read-only detection helpers.
+ */
+export interface ProfanityOptions<AdditionalWord extends string = never> {
+    /** Extra words to merge with the default dictionary for this call only. */
+    readonly additionalWords?: readonly AdditionalWord[];
+}
+/**
+ * Options for replacing detected profanity.
+ */
+export interface CleanOptions<AdditionalWord extends string = never> extends ProfanityOptions<AdditionalWord> {
+    /** Replacement string or callback. Defaults to `"***"`. */
+    readonly replacement?: string | ((match: ProfanityMatch<DefaultWord<AdditionalWord>>) => string);
+}
+/**
+ * Options for creating an isolated reusable profanity filter.
+ */
+export interface FilterOptions<Word extends string = IndonesianWord> {
+    /** Words that should be detected by this filter. */
+    readonly words: readonly Word[];
+    readonly replacement?: string | ((match: ProfanityMatch<Word>) => string);
+}
+/**
+ * A reusable profanity filter with its own dictionary.
+ */
+export interface ProfanityFilter<Word extends string = IndonesianWord> {
+    /** The words configured for this filter. */
+    readonly words: readonly Word[];
+    /** Returns whether the input contains at least one configured word. */
+    containsProfanity(input: string): boolean;
+    /** Returns all configured words found in the input. */
+    findProfanity(input: string): readonly ProfanityMatch<Word>[];
+    /** Returns input with configured words replaced. */
+    clean(input: string, options?: Pick<FilterOptions<Word>, "replacement">): string;
+}
+type DefaultWord<AdditionalWord extends string = never> = IndonesianWord | string | AdditionalWord;
+/**
+ * Add words to the shared default filter used by `containsProfanity`,
+ * `findProfanity`, and `clean`.
+ *
+ * Use this during application startup when your project has product-specific
+ * words that should be detected everywhere. Words are normalized and deduped
+ * with the same rules as the built-in Indonesian dictionary.
+ */
+export declare function addWords(words: readonly string[]): void;
+/**
+ * Remove all words previously registered through `addWords`.
+ *
+ * This is useful for tests, worker reuse, or apps that need to rebuild their
+ * moderation policy at runtime.
+ */
+export declare function clearWords(): void;
+/**
+ * Returns `true` when input contains a word from the default Indonesian
+ * dictionary, shared words registered with `addWords`, or per-call
+ * `additionalWords`.
+ */
+export declare function containsProfanity<AdditionalWord extends string = never>(input: string, options?: ProfanityOptions<AdditionalWord>): boolean;
+/**
+ * Finds profanity matches in input and returns match details, including the raw
+ * token, normalized token, matched dictionary word, and start index.
+ */
+export declare function findProfanity<AdditionalWord extends string = never>(input: string, options?: ProfanityOptions<AdditionalWord>): readonly ProfanityMatch<DefaultWord<AdditionalWord>>[];
+/**
+ * Replaces profanity in input. By default matches are replaced with `"***"`,
+ * but callers can provide a replacement string or callback.
+ */
+export declare function clean<AdditionalWord extends string = never>(input: string, options?: CleanOptions<AdditionalWord>): string;
+/**
+ * Creates an isolated reusable filter with its own word list.
+ *
+ * Prefer this when you need multiple independent dictionaries or literal
+ * TypeScript word types. Use `addWords` when you want to extend the package's
+ * shared default filter once for the whole app.
+ */
+export declare function createFilter<Word extends string>(options: Pick<FilterOptions<Word>, "words">): ProfanityFilter<Word>;
+export { INDONESIAN_WORDS };
+export type { IndonesianWord };

package/dist/types/normalize.d.ts ADDED Viewed

@@ -0,0 +1,7 @@
+export interface NormalizedToken {
+    readonly value: string;
+    readonly raw: string;
+    readonly index: number;
+}
+export declare function normalizeWord(word: string): string;
+export declare function tokenize(input: string): readonly NormalizedToken[];

package/dist/types/words.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ export declare const INDONESIAN_WORDS: readonly ["anjay", "anjing", "anjink", "anjir", "anjrit", "anying", "asu", "babi", "babatok", "bacot", "bagudung", "bagong", "bagoy", "bajilak", "bajindul", "bajing", "bajingak", "bajingan", "bajingseng", "banci", "bangsat", "bangkawarah", "bebel", "bejad", "bego", "begok", "belegug", "belekok", "bencong", "berak", "beungeut", "bispak", "bloon", "bodat", "bodo", "bodoh", "boloho", "bolokotondo", "brengsek", "budeg", "budheg", "bujang", "bujanginam", "burit", "cangcut", "cangkem", "cangkeman", "cangkeme", "celeng", "celsit", "cocot", "cocote", "cocotmu", "cok", "congor", "congore", "congormu", "dancuk", "damput", "entot", "ewe", "gendeng", "gendheng", "geblek", "gembel", "gila", "goblog", "goblok", "gundulmu", "heang", "hencet", "henceut", "herek", "heunceut", "idiot", "itil", "jablay", "jamban", "jampurut", "jamput", "jancok", "jancuk", "jangkrik", "jembut", "kampret", "kampang", "kancut", "kanjut", "kehed", "keparat", "kenthir", "kimak", "kimbek", "kirik", "kontol", "kopet", "koplak", "koplok", "kunyuk", "lonte", "loak", "maho", "maling", "mampus", "mampos", "matamu", "matane", "mbacot", "mbahmu", "mbathang", "mbladhog", "memek", "memex", "meki", "micek", "modar", "moddar", "monyet", "mripatmu", "munyuk", "ndase", "ndasmu", "ndlogok", "ngaceng", "ngendog", "ngentot", "nggapleki", "nggateli", "nggilani", "ngewe", "ngocor", "ngocok", "nguntal", "ngurek", "njeplak", "njir", "nyocot", "ontohod", "palkon", "pantat", "pantek", "pecun", "peju", "pelacur", "peler", "pekok", "pentil", "pepek", "perek", "pethuk", "picek", "puki", "pukimak", "raimu", "sarap", "sempak", "semprul", "sial", "sialan", "silit", "sinting", "setan", "soblog", "sundal", "taek", "taik", "tahi", "tai", "telek", "tetek", "titit", "tokai", "toket", "tolol", "udelmu", "wedhus", "zakar"];
2	+ export type IndonesianWord = (typeof INDONESIAN_WORDS)[number];

package/package.json ADDED Viewed

@@ -0,0 +1,64 @@
+{
+  "name": "@fldx/sopan",
+  "version": "1.0.0",
+  "description": "Small TypeScript-first profanity filter for Indonesian text.",
+  "homepage": "https://github.com/feildrixliemdra/sopan",
+  "license": "MIT",
+  "author": "fldx",
+  "type": "module",
+  "sideEffects": false,
+  "files": [
+    "dist",
+    "README.md",
+    "LICENSE"
+  ],
+  "repository": {
+    "type": "git",
+    "url": "git+ssh://git@github.com:feildrixliemdra/sopan.git"
+  },
+  "bugs": {
+    "url": "https://github.com/feildrixliemdra/sopan/issues"
+  },
+  "main": "./dist/cjs/index.js",
+  "module": "./dist/esm/index.js",
+  "types": "./dist/types/index.d.ts",
+  "exports": {
+    ".": {
+      "types": "./dist/types/index.d.ts",
+      "import": "./dist/esm/index.js",
+      "require": "./dist/cjs/index.js"
+    }
+  },
+  "scripts": {
+    "build": "npm run clean && npm run build:esm && npm run build:cjs && npm run build:types && node scripts/rename-cjs.mjs",
+    "build:esm": "tsc -p tsconfig.esm.json",
+    "build:cjs": "tsc -p tsconfig.cjs.json",
+    "build:types": "tsc -p tsconfig.types.json",
+    "clean": "node scripts/clean.mjs",
+    "test": "npm run build && node --test test/*.test.js",
+    "typecheck": "tsc -p tsconfig.json --noEmit",
+    "lint": "eslint .",
+    "lint:fix": "eslint . --fix",
+    "prepublishOnly": "npm run lint && npm test"
+  },
+  "keywords": [
+    "profanity",
+    "filter",
+    "indonesian",
+    "words",
+    "censor",
+    "swearing",
+    "badwords",
+    "sensor"
+  ],
+  "devDependencies": {
+    "@eslint/js": "^10.0.1",
+    "eslint": "^10.4.1",
+    "globals": "^15.14.0",
+    "typescript": "^5.4.0",
+    "typescript-eslint": "^8.61.0"
+  },
+  "engines": {
+    "node": ">=18"
+  }
+}