@oomfware/lang-detect 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/LICENSE +14 -0
  2. package/README.md +68 -0
  3. package/dist/eval.d.ts +8 -0
  4. package/dist/eval.d.ts.map +1 -0
  5. package/dist/eval.js +145 -0
  6. package/dist/eval.js.map +1 -0
  7. package/dist/index.d.ts +3 -0
  8. package/dist/index.d.ts.map +1 -0
  9. package/dist/index.js +20 -0
  10. package/dist/index.js.map +1 -0
  11. package/dist/lite.d.ts +3 -0
  12. package/dist/lite.d.ts.map +1 -0
  13. package/dist/lite.js +20 -0
  14. package/dist/lite.js.map +1 -0
  15. package/dist/nn/detect.d.ts +25 -0
  16. package/dist/nn/detect.d.ts.map +1 -0
  17. package/dist/nn/detect.js +209 -0
  18. package/dist/nn/detect.js.map +1 -0
  19. package/dist/nn/forward.d.ts +38 -0
  20. package/dist/nn/forward.d.ts.map +1 -0
  21. package/dist/nn/forward.js +154 -0
  22. package/dist/nn/forward.js.map +1 -0
  23. package/dist/nn/groups.d.ts +23 -0
  24. package/dist/nn/groups.d.ts.map +1 -0
  25. package/dist/nn/groups.js +81 -0
  26. package/dist/nn/groups.js.map +1 -0
  27. package/dist/nn/load.d.ts +15 -0
  28. package/dist/nn/load.d.ts.map +1 -0
  29. package/dist/nn/load.js +21 -0
  30. package/dist/nn/load.js.map +1 -0
  31. package/dist/nn/load.node.d.ts +15 -0
  32. package/dist/nn/load.node.d.ts.map +1 -0
  33. package/dist/nn/load.node.js +23 -0
  34. package/dist/nn/load.node.js.map +1 -0
  35. package/dist/nn/normalize.d.ts +17 -0
  36. package/dist/nn/normalize.d.ts.map +1 -0
  37. package/dist/nn/normalize.js +34 -0
  38. package/dist/nn/normalize.js.map +1 -0
  39. package/package.json +61 -0
  40. package/src/eval.ts +173 -0
  41. package/src/index.ts +22 -0
  42. package/src/lite.ts +25 -0
  43. package/src/nn/detect.ts +309 -0
  44. package/src/nn/forward.ts +181 -0
  45. package/src/nn/load.node.ts +24 -0
  46. package/src/nn/load.ts +21 -0
  47. package/src/nn/normalize.ts +38 -0
  48. package/weights/lite/arabic.bin +0 -0
  49. package/weights/lite/arabic.json +1 -0
  50. package/weights/lite/cyrillic.bin +5 -0
  51. package/weights/lite/cyrillic.json +1 -0
  52. package/weights/lite/devanagari.bin +0 -0
  53. package/weights/lite/devanagari.json +1 -0
  54. package/weights/lite/latin.bin +5 -0
  55. package/weights/lite/latin.json +1 -0
  56. package/weights/standard/arabic.bin +0 -0
  57. package/weights/standard/arabic.json +1 -0
  58. package/weights/standard/cyrillic.bin +0 -0
  59. package/weights/standard/cyrillic.json +1 -0
  60. package/weights/standard/devanagari.bin +9 -0
  61. package/weights/standard/devanagari.json +1 -0
  62. package/weights/standard/latin.bin +0 -0
  63. package/weights/standard/latin.json +1 -0
package/LICENSE ADDED
@@ -0,0 +1,14 @@
1
+ BSD Zero Clause License
2
+
3
+ Copyright (c) 2026 Mary
4
+
5
+ Permission to use, copy, modify, and/or distribute this software for any
6
+ purpose with or without fee is hereby granted.
7
+
8
+ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
9
+ REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
10
+ AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
11
+ INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
12
+ LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
13
+ OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
14
+ PERFORMANCE OF THIS SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,68 @@
1
+ # @oomfware/lang-detect
2
+
3
+ natural language detection library.
4
+
5
+ ```sh
6
+ npm install @oomfware/lang-detect
7
+ ```
8
+
9
+ ## usage
10
+
11
+ call `initialize()` once to load model weights via `fetch()`, then use `detect()` synchronously on
12
+ any text. results are sorted by probability, highest first.
13
+
14
+ ```ts
15
+ import { initialize, detect } from '@oomfware/lang-detect';
16
+
17
+ await initialize();
18
+
19
+ const results = detect('the quick brown fox jumps over the lazy dog');
20
+ console.log(results[0]); // -> ['eng', 0.98]
21
+ ```
22
+
23
+ mixed-script text returns detections for each script present, with probabilities scaled by
24
+ proportion:
25
+
26
+ ```ts
27
+ const results = detect('Hello Мир');
28
+ // -> [['eng', ...], ['rus', ...]]
29
+ ```
30
+
31
+ ### variants
32
+
33
+ two variants are available, trading accuracy for smaller weights:
34
+
35
+ | subpath | weights | accuracy |
36
+ | --------- | ------- | -------- |
37
+ | (default) | 57.4 KB | 95.2% |
38
+ | `/lite` | 43.1 KB | 95.1% |
39
+
40
+ ```ts
41
+ import { initialize, detect } from '@oomfware/lang-detect/lite';
42
+ ```
43
+
44
+ ### supported languages
45
+
46
+ 50 languages across Latin, Cyrillic, Arabic, Devanagari, CJK, and unique-script families.
47
+
48
+ | code | language | code | language | code | language |
49
+ | ----- | ---------------- | ----- | ---------------- | ----- | ---------- |
50
+ | `afr` | Afrikaans | `hau` | Hausa | `por` | Portuguese |
51
+ | `ara` | Arabic | `heb` | Hebrew | `ron` | Romanian |
52
+ | `aze` | Azerbaijani | `hin` | Hindi | `run` | Rundi |
53
+ | `bel` | Belarusian | `hrv` | Croatian | `rus` | Russian |
54
+ | `ben` | Bengali | `hun` | Hungarian | `slk` | Slovak |
55
+ | `bul` | Bulgarian | `hye` | Armenian | `spa` | Spanish |
56
+ | `cat` | Catalan | `ind` | Indonesian | `srp` | Serbian |
57
+ | `ces` | Czech | `isl` | Icelandic | `swe` | Swedish |
58
+ | `ckb` | Central Kurdish | `ita` | Italian | `tgl` | Tagalog |
59
+ | `cmn` | Mandarin Chinese | `jpn` | Japanese | `tur` | Turkish |
60
+ | `dan` | Danish | `kat` | Georgian | `ukr` | Ukrainian |
61
+ | `deu` | German | `kaz` | Kazakh | `vie` | Vietnamese |
62
+ | `ell` | Greek | `kor` | Korean | | |
63
+ | `eng` | English | `lit` | Lithuanian | | |
64
+ | `est` | Estonian | `mar` | Marathi | | |
65
+ | `eus` | Basque | `mkd` | Macedonian | | |
66
+ | `fin` | Finnish | `nld` | Dutch | | |
67
+ | `fra` | French | `nob` | Norwegian Bokmål | | |
68
+ | `pes` | Persian | `pol` | Polish | | |
package/dist/eval.d.ts ADDED
@@ -0,0 +1,8 @@
1
+ /**
2
+ * evaluate detection accuracy against the UDHR dataset.
3
+ *
4
+ * usage:
5
+ * node --conditions source src/eval.ts [--lite] [--lande]
6
+ */
7
+ export {};
8
+ //# sourceMappingURL=eval.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"eval.d.ts","sourceRoot":"","sources":["../src/eval.ts"],"names":[],"mappings":"AAAA;;;;;GAKG"}
package/dist/eval.js ADDED
@@ -0,0 +1,145 @@
1
+ /**
2
+ * evaluate detection accuracy against the UDHR dataset.
3
+ *
4
+ * usage:
5
+ * node --conditions source src/eval.ts [--lite] [--lande]
6
+ */
7
+ import fs from 'node:fs';
8
+ import path from 'node:path';
9
+ import { parseArgs } from 'node:util';
10
+ import { create } from "./nn/detect.js";
11
+ const { values: args } = parseArgs({
12
+ options: {
13
+ lite: { type: 'boolean', default: false },
14
+ lande: { type: 'boolean', default: false },
15
+ },
16
+ });
17
+ const variant = args.lite ? 'lite' : 'standard';
18
+ const quantBits = args.lite ? 6 : 8;
19
+ const weightsDir = path.resolve(import.meta.dirname, '..', 'weights', variant);
20
+ const { initialize, detect } = create({
21
+ cyrillic: {
22
+ weights: new URL(`file://${path.join(weightsDir, 'cyrillic.bin')}`),
23
+ meta: new URL(`file://${path.join(weightsDir, 'cyrillic.json')}`),
24
+ },
25
+ arabic: {
26
+ weights: new URL(`file://${path.join(weightsDir, 'arabic.bin')}`),
27
+ meta: new URL(`file://${path.join(weightsDir, 'arabic.json')}`),
28
+ },
29
+ devanagari: {
30
+ weights: new URL(`file://${path.join(weightsDir, 'devanagari.bin')}`),
31
+ meta: new URL(`file://${path.join(weightsDir, 'devanagari.json')}`),
32
+ },
33
+ latin: {
34
+ weights: new URL(`file://${path.join(weightsDir, 'latin.bin')}`),
35
+ meta: new URL(`file://${path.join(weightsDir, 'latin.json')}`),
36
+ },
37
+ }, quantBits);
38
+ // ── UDHR code → ISO 639-3 mapping ──
39
+ const UDHR_CODE_TO_LANG = {
40
+ afr: 'afr',
41
+ bel: 'bel',
42
+ ben: 'ben',
43
+ bul: 'bul',
44
+ cat: 'cat',
45
+ ces: 'ces',
46
+ ckb: 'ckb',
47
+ cmn_hans: 'cmn',
48
+ dan: 'dan',
49
+ deu_1996: 'deu',
50
+ ell_monotonic: 'ell',
51
+ eng: 'eng',
52
+ eus: 'eus',
53
+ fin: 'fin',
54
+ fra: 'fra',
55
+ hau_NG: 'hau',
56
+ heb: 'heb',
57
+ hin: 'hin',
58
+ hrv: 'hrv',
59
+ hun: 'hun',
60
+ hye: 'hye',
61
+ ind: 'ind',
62
+ isl: 'isl',
63
+ ita: 'ita',
64
+ jpn: 'jpn',
65
+ kat: 'kat',
66
+ kaz: 'kaz',
67
+ kor: 'kor',
68
+ lit: 'lit',
69
+ mar: 'mar',
70
+ mkd: 'mkd',
71
+ nld: 'nld',
72
+ nob: 'nob',
73
+ pes_1: 'pes',
74
+ pol: 'pol',
75
+ por_BR: 'por',
76
+ por_PT: 'por',
77
+ ron_2006: 'ron',
78
+ run: 'run',
79
+ rus: 'rus',
80
+ slk: 'slk',
81
+ spa: 'spa',
82
+ srp_cyrl: 'srp',
83
+ srp_latn: 'srp',
84
+ swe: 'swe',
85
+ tgl: 'tgl',
86
+ tur: 'tur',
87
+ ukr: 'ukr',
88
+ vie: 'vie',
89
+ };
90
+ const TAG_RE = /<[^>]+>/g;
91
+ // ── load UDHR sentences ──
92
+ const declDir = path.resolve(import.meta.dirname, '..', 'train', 'resources', 'udhr', 'declaration');
93
+ const sentences = [];
94
+ for (const [code, lang] of Object.entries(UDHR_CODE_TO_LANG)) {
95
+ const htmlFile = path.join(declDir, `${code}.html`);
96
+ if (!fs.existsSync(htmlFile)) {
97
+ continue;
98
+ }
99
+ const content = fs.readFileSync(htmlFile, 'utf-8');
100
+ for (const match of content.matchAll(/<p>(.*?)<\/p>/gs)) {
101
+ const text = match[1].replace(TAG_RE, '').trim();
102
+ if (text.length < 10) {
103
+ continue;
104
+ }
105
+ sentences.push({ lang, text });
106
+ }
107
+ }
108
+ const evaluate = (name, detectFn) => {
109
+ const perLang = {};
110
+ let totalPass = 0;
111
+ for (const { lang, text } of sentences) {
112
+ perLang[lang] ??= { pass: 0, total: 0 };
113
+ perLang[lang].total++;
114
+ if (detectFn(text) === lang) {
115
+ perLang[lang].pass++;
116
+ totalPass++;
117
+ }
118
+ }
119
+ const overallAcc = (totalPass / sentences.length) * 100;
120
+ console.log(`\n=== ${name} ===`);
121
+ console.log(`${sentences.length} sentences, ${Object.keys(perLang).length} languages`);
122
+ console.log(`overall accuracy: ${overallAcc.toFixed(2)}%`);
123
+ const sorted = Object.entries(perLang).sort((a, b) => a[1].pass / a[1].total - b[1].pass / b[1].total);
124
+ for (const [lang, stats] of sorted) {
125
+ const acc = (stats.pass / stats.total) * 100;
126
+ if (acc < 100) {
127
+ console.log(` ${lang}: ${acc.toFixed(1)}% (${stats.total})`);
128
+ }
129
+ }
130
+ };
131
+ // ── evaluate ──
132
+ await initialize();
133
+ evaluate(`UDHR: ${variant} (${quantBits}-bit)`, (text) => {
134
+ const result = detect(text);
135
+ return result[0]?.[0];
136
+ });
137
+ if (args.lande) {
138
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
139
+ const { default: lande } = await import('lande');
140
+ evaluate('UDHR: lande', (text) => {
141
+ const result = lande(text);
142
+ return result?.[0]?.[0];
143
+ });
144
+ }
145
+ //# sourceMappingURL=eval.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"eval.js","sourceRoot":"","sources":["../src/eval.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAEtC,OAAO,EAAE,MAAM,EAAE,MAAM,gBAAgB,CAAC;AAExC,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,SAAS,CAAC;IAClC,OAAO,EAAE;QACR,IAAI,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE;QACzC,KAAK,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE;KAC1C;CACD,CAAC,CAAC;AAEH,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,UAAU,CAAC;AAChD,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AAEpC,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,OAAQ,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC;AAChF,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,GAAG,MAAM,CACpC;IACC,QAAQ,EAAE;QACT,OAAO,EAAE,IAAI,GAAG,CAAC,UAAU,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,cAAc,CAAC,EAAE,CAAC;QACnE,IAAI,EAAE,IAAI,GAAG,CAAC,UAAU,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,eAAe,CAAC,EAAE,CAAC;KACjE;IACD,MAAM,EAAE;QACP,OAAO,EAAE,IAAI,GAAG,CAAC,UAAU,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,YAAY,CAAC,EAAE,CAAC;QACjE,IAAI,EAAE,IAAI,GAAG,CAAC,UAAU,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,aAAa,CAAC,EAAE,CAAC;KAC/D;IACD,UAAU,EAAE;QACX,OAAO,EAAE,IAAI,GAAG,CAAC,UAAU,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,gBAAgB,CAAC,EAAE,CAAC;QACrE,IAAI,EAAE,IAAI,GAAG,CAAC,UAAU,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,iBAAiB,CAAC,EAAE,CAAC;KACnE;IACD,KAAK,EAAE;QACN,OAAO,EAAE,IAAI,GAAG,CAAC,UAAU,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,WAAW,CAAC,EAAE,CAAC;QAChE,IAAI,EAAE,IAAI,GAAG,CAAC,UAAU,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,YAAY,CAAC,EAAE,CAAC;KAC9D;CACD,EACD,SAAS,CACT,CAAC;AAEF,sCAAsC;AAEtC,MAAM,iBAAiB,GAA2B;IACjD,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,KAAK;IACV,QAAQ,EAAE,KAAK;IACf,GAAG,EAAE,KAAK;IACV,QAAQ,EAAE,KAAK;IACf,aAAa,EAAE,KAAK;IACpB,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,KAAK;IACV,MAAM,EAAE,KAAK;IACb,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,KAAK;IACV,KAAK,EAAE,KAAK;IACZ,GAAG,EAAE,KAAK;IACV,MAAM,EAAE,KAAK;IACb,MAAM,EAAE,KAAK;IACb,QAAQ,EAAE,KAAK;IACf,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,KAAK;IACV,QAAQ,EAAE,KAAK;IACf,QAAQ,EAAE,KAAK;IACf,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,KAAK;IACV,GAAG,EAAE,KAAK;CACV,CAAC;AAEF,MAAM,MAAM,GAAG,UAAU,CAAC;AAE1B,4BAA4B;AAE5B,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,OAAQ,EAAE,IAAI,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,EAAE,aAAa,CAAC,CAAC;AACtG,MAAM,SAAS,GAAqC,EAAE,CAAC;AAEvD,KAAK,MAAM,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,iBAAiB,CAAC,EAAE,CAAC;IAC9D,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,GAAG,IAAI,OAAO,CAAC,CAAC;IACpD,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC9B,SAAS;IACV,CAAC;IAED,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IACnD,KAAK,MAAM,KAAK,IAAI,OAAO,CAAC,QAAQ,CAAC,iBAAiB,CAAC,EAAE,CAAC;QACzD,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;QACjD,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;YACtB,SAAS;QACV,CAAC;QACD,SAAS,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC;IAChC,CAAC;AACF,CAAC;AAMD,MAAM,QAAQ,GAAG,CAAC,IAAY,EAAE,QAA8C,EAAE,EAAE;IACjF,MAAM,OAAO,GAA0B,EAAE,CAAC;IAC1C,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,KAAK,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,SAAS,EAAE,CAAC;QACxC,OAAO,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;QACxC,OAAO,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC;QAEtB,IAAI,QAAQ,CAAC,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC;YAC7B,OAAO,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;YACrB,SAAS,EAAE,CAAC;QACb,CAAC;IACF,CAAC;IAED,MAAM,UAAU,GAAG,CAAC,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC,GAAG,GAAG,CAAC;IAExD,OAAO,CAAC,GAAG,CAAC,SAAS,IAAI,MAAM,CAAC,CAAC;IACjC,OAAO,CAAC,GAAG,CAAC,GAAG,SAAS,CAAC,MAAM,eAAe,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,YAAY,CAAC,CAAC;IACvF,OAAO,CAAC,GAAG,CAAC,qBAAqB,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IAE3D,MAAM,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;IACvG,KAAK,MAAM,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,MAAM,EAAE,CAAC;QACpC,MAAM,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,GAAG,CAAC;QAC7C,IAAI,GAAG,GAAG,GAAG,EAAE,CAAC;YACf,OAAO,CAAC,GAAG,CAAC,KAAK,IAAI,KAAK,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,KAAK,CAAC,KAAK,GAAG,CAAC,CAAC;QAC/D,CAAC;IACF,CAAC;AACF,CAAC,CAAC;AAEF,iBAAiB;AAEjB,MAAM,UAAU,EAAE,CAAC;AAEnB,QAAQ,CAAC,SAAS,OAAO,KAAK,SAAS,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE;IACxD,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC;IAC5B,OAAO,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;AACvB,CAAC,CAAC,CAAC;AAEH,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;IAChB,iEAAiE;IACjE,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,CAAC;IACjD,QAAQ,CAAC,aAAa,EAAE,CAAC,IAAI,EAAE,EAAE;QAChC,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC;QAC3B,OAAO,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IACzB,CAAC,CAAC,CAAC;AACJ,CAAC"}
@@ -0,0 +1,3 @@
1
+ export type { Detection } from './nn/detect.ts';
2
+ export declare const initialize: () => Promise<void>, detect: (text: string) => import("./nn/detect.ts").Detection[];
3
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAEA,YAAY,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAEhD,eAAO,MAAQ,UAAU,uBAAE,MAAM,wDAiB/B,CAAC"}
package/dist/index.js ADDED
@@ -0,0 +1,20 @@
1
+ import { create } from "./nn/detect.js";
2
+ export const { initialize, detect } = create({
3
+ cyrillic: {
4
+ weights: new URL('../weights/standard/cyrillic.bin', import.meta.url),
5
+ meta: new URL('../weights/standard/cyrillic.json', import.meta.url),
6
+ },
7
+ arabic: {
8
+ weights: new URL('../weights/standard/arabic.bin', import.meta.url),
9
+ meta: new URL('../weights/standard/arabic.json', import.meta.url),
10
+ },
11
+ devanagari: {
12
+ weights: new URL('../weights/standard/devanagari.bin', import.meta.url),
13
+ meta: new URL('../weights/standard/devanagari.json', import.meta.url),
14
+ },
15
+ latin: {
16
+ weights: new URL('../weights/standard/latin.bin', import.meta.url),
17
+ meta: new URL('../weights/standard/latin.json', import.meta.url),
18
+ },
19
+ });
20
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,gBAAgB,CAAC;AAIxC,MAAM,CAAC,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,GAAG,MAAM,CAAC;IAC5C,QAAQ,EAAE;QACT,OAAO,EAAE,IAAI,GAAG,CAAC,kCAAkC,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC;QACrE,IAAI,EAAE,IAAI,GAAG,CAAC,mCAAmC,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC;KACnE;IACD,MAAM,EAAE;QACP,OAAO,EAAE,IAAI,GAAG,CAAC,gCAAgC,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC;QACnE,IAAI,EAAE,IAAI,GAAG,CAAC,iCAAiC,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC;KACjE;IACD,UAAU,EAAE;QACX,OAAO,EAAE,IAAI,GAAG,CAAC,oCAAoC,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC;QACvE,IAAI,EAAE,IAAI,GAAG,CAAC,qCAAqC,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC;KACrE;IACD,KAAK,EAAE;QACN,OAAO,EAAE,IAAI,GAAG,CAAC,+BAA+B,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC;QAClE,IAAI,EAAE,IAAI,GAAG,CAAC,gCAAgC,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC;KAChE;CACD,CAAC,CAAC"}
package/dist/lite.d.ts ADDED
@@ -0,0 +1,3 @@
1
+ export type { Detection } from './nn/detect.ts';
2
+ export declare const initialize: () => Promise<void>, detect: (text: string) => import("./nn/detect.ts").Detection[];
3
+ //# sourceMappingURL=lite.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"lite.d.ts","sourceRoot":"","sources":["../src/lite.ts"],"names":[],"mappings":"AAEA,YAAY,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAEhD,eAAO,MAAQ,UAAU,uBAAE,MAAM,wDAoBhC,CAAC"}
package/dist/lite.js ADDED
@@ -0,0 +1,20 @@
1
+ import { create } from "./nn/detect.js";
2
+ export const { initialize, detect } = create({
3
+ cyrillic: {
4
+ weights: new URL('../weights/lite/cyrillic.bin', import.meta.url),
5
+ meta: new URL('../weights/lite/cyrillic.json', import.meta.url),
6
+ },
7
+ arabic: {
8
+ weights: new URL('../weights/lite/arabic.bin', import.meta.url),
9
+ meta: new URL('../weights/lite/arabic.json', import.meta.url),
10
+ },
11
+ devanagari: {
12
+ weights: new URL('../weights/lite/devanagari.bin', import.meta.url),
13
+ meta: new URL('../weights/lite/devanagari.json', import.meta.url),
14
+ },
15
+ latin: {
16
+ weights: new URL('../weights/lite/latin.bin', import.meta.url),
17
+ meta: new URL('../weights/lite/latin.json', import.meta.url),
18
+ },
19
+ }, 6);
20
+ //# sourceMappingURL=lite.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"lite.js","sourceRoot":"","sources":["../src/lite.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,gBAAgB,CAAC;AAIxC,MAAM,CAAC,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,GAAG,MAAM,CAC3C;IACC,QAAQ,EAAE;QACT,OAAO,EAAE,IAAI,GAAG,CAAC,8BAA8B,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC;QACjE,IAAI,EAAE,IAAI,GAAG,CAAC,+BAA+B,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC;KAC/D;IACD,MAAM,EAAE;QACP,OAAO,EAAE,IAAI,GAAG,CAAC,4BAA4B,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC;QAC/D,IAAI,EAAE,IAAI,GAAG,CAAC,6BAA6B,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC;KAC7D;IACD,UAAU,EAAE;QACX,OAAO,EAAE,IAAI,GAAG,CAAC,gCAAgC,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC;QACnE,IAAI,EAAE,IAAI,GAAG,CAAC,iCAAiC,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC;KACjE;IACD,KAAK,EAAE;QACN,OAAO,EAAE,IAAI,GAAG,CAAC,2BAA2B,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC;QAC9D,IAAI,EAAE,IAAI,GAAG,CAAC,4BAA4B,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC;KAC5D;CACD,EACD,CAAC,CACD,CAAC"}
@@ -0,0 +1,25 @@
1
+ /** a single detection result: ISO 639-3 language code and its probability. */
2
+ export type Detection = [lang: string, probability: number];
3
+ /** URLs for a single group's weight + metadata files. */
4
+ type GroupSource = {
5
+ weights: URL;
6
+ meta: URL;
7
+ };
8
+ /** returned by {@link create} — call initialize() once, then detect() synchronously. */
9
+ type Detector = {
10
+ initialize: () => Promise<void>;
11
+ detect: (text: string) => Detection[];
12
+ };
13
+ /**
14
+ * creates a detector for a specific weight variant.
15
+ *
16
+ * call initialize() once to load and dequantize weights via fetch(), then
17
+ * call detect() synchronously for each input text.
18
+ *
19
+ * @param sources record of group names to their weight/meta file URLs
20
+ * @param quantBits quantization bit width (default 8)
21
+ * @returns detector with initialize() and detect() methods
22
+ */
23
+ export declare const create: (sources: Record<string, GroupSource>, quantBits?: number) => Detector;
24
+ export {};
25
+ //# sourceMappingURL=detect.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"detect.d.ts","sourceRoot":"","sources":["../../src/nn/detect.ts"],"names":[],"mappings":"AAOA,8EAA8E;AAC9E,MAAM,MAAM,SAAS,GAAG,CAAC,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,CAAC,CAAC;AAE5D,yDAAyD;AACzD,KAAK,WAAW,GAAG;IAClB,OAAO,EAAE,GAAG,CAAC;IACb,IAAI,EAAE,GAAG,CAAC;CACV,CAAC;AAwBF,wFAAwF;AACxF,KAAK,QAAQ,GAAG;IACf,UAAU,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;IAChC,MAAM,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,SAAS,EAAE,CAAC;CACtC,CAAC;AA+IF;;;;;;;;;GASG;AACH,eAAO,MAAM,MAAM,GAAI,SAAS,MAAM,CAAC,MAAM,EAAE,WAAW,CAAC,EAAE,kBAAa,KAAG,QAmF5E,CAAC"}
@@ -0,0 +1,209 @@
1
+ import { loadBinary, loadJson } from '#load';
2
+ import { forward, loadWeights, loadWeights6 } from "./forward.js";
3
+ import { normalize, extractNgrams } from "./normalize.js";
4
+ /**
5
+ * classifies a character's Unicode codepoint into a script family.
6
+ *
7
+ * @param cp the codepoint to classify
8
+ * @returns the script family, or `null` if not recognized
9
+ */
10
+ const classifyCodepoint = (cp) => {
11
+ // unique scripts
12
+ if ((cp >= 0xac00 && cp <= 0xd7af) || (cp >= 0x1100 && cp <= 0x11ff)) {
13
+ return 'korean';
14
+ }
15
+ if ((cp >= 0x10a0 && cp <= 0x10ff) || (cp >= 0x2d00 && cp <= 0x2d2f)) {
16
+ return 'georgian';
17
+ }
18
+ if (cp >= 0x0530 && cp <= 0x058f) {
19
+ return 'armenian';
20
+ }
21
+ if (cp >= 0x0980 && cp <= 0x09ff) {
22
+ return 'bengali';
23
+ }
24
+ if ((cp >= 0x0370 && cp <= 0x03ff) || (cp >= 0x1f00 && cp <= 0x1fff)) {
25
+ return 'greek';
26
+ }
27
+ if (cp >= 0x0590 && cp <= 0x05ff) {
28
+ return 'hebrew';
29
+ }
30
+ // CJK
31
+ if ((cp >= 0x3040 && cp <= 0x309f) || (cp >= 0x30a0 && cp <= 0x30ff)) {
32
+ return 'cjk_kana';
33
+ }
34
+ if ((cp >= 0x4e00 && cp <= 0x9fff) || (cp >= 0x3400 && cp <= 0x4dbf)) {
35
+ return 'cjk_han';
36
+ }
37
+ // NN groups
38
+ if (cp >= 0x0400 && cp <= 0x04ff) {
39
+ return 'cyrillic';
40
+ }
41
+ if ((cp >= 0x0600 && cp <= 0x06ff) || (cp >= 0x0750 && cp <= 0x077f)) {
42
+ return 'arabic';
43
+ }
44
+ if (cp >= 0x0900 && cp <= 0x097f) {
45
+ return 'devanagari';
46
+ }
47
+ if ((cp >= 0x0041 && cp <= 0x005a) || (cp >= 0x0061 && cp <= 0x007a) || (cp >= 0x00c0 && cp <= 0x024f)) {
48
+ return 'latin';
49
+ }
50
+ return null;
51
+ };
52
+ /** maps unique script families to their ISO 639-3 language code. */
53
+ const UNIQUE_SCRIPT_MAP = {
54
+ korean: 'kor',
55
+ georgian: 'kat',
56
+ armenian: 'hye',
57
+ bengali: 'ben',
58
+ greek: 'ell',
59
+ hebrew: 'heb',
60
+ };
61
+ /** maps script families to NN group names. */
62
+ const SCRIPT_TO_GROUP = {
63
+ cyrillic: 'cyrillic',
64
+ arabic: 'arabic',
65
+ devanagari: 'devanagari',
66
+ latin: 'latin',
67
+ };
68
+ // #endregion
69
+ // #region inference helpers
70
+ /**
71
+ * builds the input feature vector for a group model from normalized text.
72
+ *
73
+ * @param text normalized text
74
+ * @param ngrams the group's ngram vocabulary
75
+ * @returns float32 input vector matching the model's expected layout
76
+ */
77
+ const buildInput = (text, ngrams) => {
78
+ const unigrams = extractNgrams(text, 1);
79
+ const bigrams = extractNgrams(text, 2);
80
+ const trigrams = extractNgrams(text, 3);
81
+ const quadgrams = extractNgrams(text, 4);
82
+ const values = [
83
+ ...ngrams.unigrams.map((v) => unigrams[v] || 0),
84
+ ...ngrams.bigrams.map((v) => bigrams[v] || 0),
85
+ ...ngrams.trigrams.map((v) => trigrams[v] || 0),
86
+ ...ngrams.quadgrams.map((v) => quadgrams[v] || 0),
87
+ ];
88
+ return new Float32Array(values);
89
+ };
90
+ // #endregion
91
+ // #region weight loading
92
+ /**
93
+ * loads and dequantizes weights for a single group from its binary + metadata files.
94
+ *
95
+ * @param source URLs for the group's weight and metadata files
96
+ * @param quantBits quantization bit width (8 or 6)
97
+ * @returns the loaded model ready for inference
98
+ */
99
+ const loadGroup = async (source, quantBits) => {
100
+ const [bin, rawMeta] = await Promise.all([loadBinary(source.weights), loadJson(source.meta)]);
101
+ const meta = rawMeta;
102
+ const load = quantBits === 6 ? loadWeights6 : loadWeights;
103
+ const weights = load(bin, meta.inputSize, meta.outputSize);
104
+ return { meta, weights };
105
+ };
106
+ // #endregion
107
+ // #region detection
108
+ /**
109
+ * creates a detector for a specific weight variant.
110
+ *
111
+ * call initialize() once to load and dequantize weights via fetch(), then
112
+ * call detect() synchronously for each input text.
113
+ *
114
+ * @param sources record of group names to their weight/meta file URLs
115
+ * @param quantBits quantization bit width (default 8)
116
+ * @returns detector with initialize() and detect() methods
117
+ */
118
+ export const create = (sources, quantBits = 8) => {
119
+ let models = null;
120
+ const initialize = async () => {
121
+ const entries = Object.entries(sources);
122
+ const loaded = await Promise.all(entries.map(([, source]) => loadGroup(source, quantBits)));
123
+ models = {};
124
+ for (let i = 0; i < entries.length; i++) {
125
+ models[entries[i][0]] = loaded[i];
126
+ }
127
+ };
128
+ const detect = (text) => {
129
+ if (!models) {
130
+ throw new Error(`call initialize() first`);
131
+ }
132
+ // classify characters by script family
133
+ const scriptCounts = new Map();
134
+ let totalClassified = 0;
135
+ for (let i = 0; i < text.length; i++) {
136
+ const cp = text.codePointAt(i);
137
+ // skip surrogates for astral characters
138
+ if (cp > 0xffff) {
139
+ i++;
140
+ }
141
+ const family = classifyCodepoint(cp);
142
+ if (family) {
143
+ scriptCounts.set(family, (scriptCounts.get(family) || 0) + 1);
144
+ totalClassified++;
145
+ }
146
+ }
147
+ // no classified characters — fallback to latin
148
+ if (totalClassified === 0) {
149
+ return detectGroup(text, 'latin', models);
150
+ }
151
+ const results = [];
152
+ for (const [family, count] of scriptCounts) {
153
+ const proportion = count / totalClassified;
154
+ // unique script languages — use proportion directly as probability
155
+ const uniqueLang = UNIQUE_SCRIPT_MAP[family];
156
+ if (uniqueLang) {
157
+ results.push([uniqueLang, proportion]);
158
+ continue;
159
+ }
160
+ // CJK — kana implies Japanese, Han-only implies Chinese
161
+ if (family === 'cjk_kana') {
162
+ results.push(['jpn', proportion]);
163
+ continue;
164
+ }
165
+ if (family === 'cjk_han') {
166
+ // only count as Chinese if no kana detected (otherwise Han is part of Japanese)
167
+ if (!scriptCounts.has('cjk_kana')) {
168
+ results.push(['cmn', proportion]);
169
+ }
170
+ continue;
171
+ }
172
+ // NN group — run model and scale by proportion
173
+ const groupName = SCRIPT_TO_GROUP[family];
174
+ if (groupName && models[groupName]) {
175
+ const groupResults = detectGroup(text, groupName, models, proportion);
176
+ results.push(...groupResults);
177
+ }
178
+ }
179
+ // if nothing was produced (shouldn't happen, but safety), fallback to latin
180
+ if (results.length === 0) {
181
+ return detectGroup(text, 'latin', models);
182
+ }
183
+ results.sort((a, b) => b[1] - a[1]);
184
+ return results;
185
+ };
186
+ return { initialize, detect };
187
+ };
188
+ /**
189
+ * runs a group's model on the input text and returns detections scaled by proportion.
190
+ *
191
+ * @param text raw input text
192
+ * @param groupName key into the loaded models
193
+ * @param models loaded model records
194
+ * @param proportion script proportion to scale probabilities by
195
+ * @returns detections for this group
196
+ */
197
+ const detectGroup = (text, groupName, models, proportion = 1) => {
198
+ const model = models[groupName];
199
+ if (!model) {
200
+ throw new Error(`weights not loaded for group '${groupName}'`);
201
+ }
202
+ const normalized = normalize(text);
203
+ const input = buildInput(normalized, model.meta.ngrams);
204
+ const output = forward(input, model.weights);
205
+ const results = model.meta.langs.map((lang, i) => [lang, output[i] * proportion]);
206
+ return results;
207
+ };
208
+ // #endregion
209
+ //# sourceMappingURL=detect.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"detect.js","sourceRoot":"","sources":["../../src/nn/detect.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,OAAO,CAAC;AAE7C,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,YAAY,EAAqB,MAAM,cAAc,CAAC;AACrF,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AA4D1D;;;;;GAKG;AACH,MAAM,iBAAiB,GAAG,CAAC,EAAU,EAAuB,EAAE;IAC7D,iBAAiB;IACjB,IAAI,CAAC,EAAE,IAAI,MAAM,IAAI,EAAE,IAAI,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,MAAM,IAAI,EAAE,IAAI,MAAM,CAAC,EAAE,CAAC;QACtE,OAAO,QAAQ,CAAC;IACjB,CAAC;IACD,IAAI,CAAC,EAAE,IAAI,MAAM,IAAI,EAAE,IAAI,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,MAAM,IAAI,EAAE,IAAI,MAAM,CAAC,EAAE,CAAC;QACtE,OAAO,UAAU,CAAC;IACnB,CAAC;IACD,IAAI,EAAE,IAAI,MAAM,IAAI,EAAE,IAAI,MAAM,EAAE,CAAC;QAClC,OAAO,UAAU,CAAC;IACnB,CAAC;IACD,IAAI,EAAE,IAAI,MAAM,IAAI,EAAE,IAAI,MAAM,EAAE,CAAC;QAClC,OAAO,SAAS,CAAC;IAClB,CAAC;IACD,IAAI,CAAC,EAAE,IAAI,MAAM,IAAI,EAAE,IAAI,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,MAAM,IAAI,EAAE,IAAI,MAAM,CAAC,EAAE,CAAC;QACtE,OAAO,OAAO,CAAC;IAChB,CAAC;IACD,IAAI,EAAE,IAAI,MAAM,IAAI,EAAE,IAAI,MAAM,EAAE,CAAC;QAClC,OAAO,QAAQ,CAAC;IACjB,CAAC;IAED,MAAM;IACN,IAAI,CAAC,EAAE,IAAI,MAAM,IAAI,EAAE,IAAI,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,MAAM,IAAI,EAAE,IAAI,MAAM,CAAC,EAAE,CAAC;QACtE,OAAO,UAAU,CAAC;IACnB,CAAC;IACD,IAAI,CAAC,EAAE,IAAI,MAAM,IAAI,EAAE,IAAI,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,MAAM,IAAI,EAAE,IAAI,MAAM,CAAC,EAAE,CAAC;QACtE,OAAO,SAAS,CAAC;IAClB,CAAC;IAED,YAAY;IACZ,IAAI,EAAE,IAAI,MAAM,IAAI,EAAE,IAAI,MAAM,EAAE,CAAC;QAClC,OAAO,UAAU,CAAC;IACnB,CAAC;IACD,IAAI,CAAC,EAAE,IAAI,MAAM,IAAI,EAAE,IAAI,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,MAAM,IAAI,EAAE,IAAI,MAAM,CAAC,EAAE,CAAC;QACtE,OAAO,QAAQ,CAAC;IACjB,CAAC;IACD,IAAI,EAAE,IAAI,MAAM,IAAI,EAAE,IAAI,MAAM,EAAE,CAAC;QAClC,OAAO,YAAY,CAAC;IACrB,CAAC;IACD,IAAI,CAAC,EAAE,IAAI,MAAM,IAAI,EAAE,IAAI,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,MAAM,IAAI,EAAE,IAAI,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,MAAM,IAAI,EAAE,IAAI,MAAM,CAAC,EAAE,CAAC;QACxG,OAAO,OAAO,CAAC;IAChB,CAAC;IAED,OAAO,IAAI,CAAC;AACb,CAAC,CAAC;AAEF,oEAAoE;AACpE,MAAM,iBAAiB,GAA0C;IAChE,MAAM,EAAE,KAAK;IACb,QAAQ,EAAE,KAAK;IACf,QAAQ,EAAE,KAAK;IACf,OAAO,EAAE,KAAK;IACd,KAAK,EAAE,KAAK;IACZ,MAAM,EAAE,KAAK;CACb,CAAC;AAEF,8CAA8C;AAC9C,MAAM,eAAe,GAA0C;IAC9D,QAAQ,EAAE,UAAU;IACpB,MAAM,EAAE,QAAQ;IAChB,UAAU,EAAE,YAAY;IACxB,KAAK,EAAE,OAAO;CACd,CAAC;AAEF,aAAa;AAEb,4BAA4B;AAE5B;;;;;;GAMG;AACH,MAAM,UAAU,GAAG,CAAC,IAAY,EAAE,MAAmB,EAAgB,EAAE;IACtE,MAAM,QAAQ,GAAG,aAAa,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;IACxC,MAAM,OAAO,GAAG,aAAa,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;IACvC,MAAM,QAAQ,GAAG,aAAa,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;IACxC,MAAM,SAAS,GAAG,aAAa,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;IAEzC,MAAM,MAAM,GAAG;QACd,GAAG,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAC/C,GAAG,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAC7C,GAAG,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAC/C,GAAG,MAAM,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;KACjD,CAAC;IAEF,OAAO,IAAI,YAAY,CAAC,MAAM,CAAC,CAAC;AACjC,CAAC,CAAC;AAEF,aAAa;AAEb,yBAAyB;AAEzB;;;;;;GAMG;AACH,MAAM,SAAS,GAAG,KAAK,EAAE,MAAmB,EAAE,SAAiB,EAAuB,EAAE;IACvF,MAAM,CAAC,GAAG,EAAE,OAAO,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC9F,MAAM,IAAI,GAAG,OAAoB,CAAC;IAElC,MAAM,IAAI,GAAG,SAAS,KAAK,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,WAAW,CAAC;IAC1D,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;IAE3D,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC;AAC1B,CAAC,CAAC;AAEF,aAAa;AAEb,oBAAoB;AAEpB;;;;;;;;;GASG;AACH,MAAM,CAAC,MAAM,MAAM,GAAG,CAAC,OAAoC,EAAE,SAAS,GAAG,CAAC,EAAY,EAAE;IACvF,IAAI,MAAM,GAAsC,IAAI,CAAC;IAErD,MAAM,UAAU,GAAG,KAAK,IAAI,EAAE;QAC7B,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QACxC,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC;QAE5F,MAAM,GAAG,EAAE,CAAC;QACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACzC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QACnC,CAAC;IACF,CAAC,CAAC;IAEF,MAAM,MAAM,GAAG,CAAC,IAAY,EAAe,EAAE;QAC5C,IAAI,CAAC,MAAM,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;QAC5C,CAAC;QAED,uCAAuC;QACvC,MAAM,YAAY,GAAG,IAAI,GAAG,EAAwB,CAAC;QACrD,IAAI,eAAe,GAAG,CAAC,CAAC;QAExB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,EAAE,GAAG,IAAI,CAAC,WAAW,CAAC,CAAC,CAAE,CAAC;YAChC,wCAAwC;YACxC,IAAI,EAAE,GAAG,MAAM,EAAE,CAAC;gBACjB,CAAC,EAAE,CAAC;YACL,CAAC;YACD,MAAM,MAAM,GAAG,iBAAiB,CAAC,EAAE,CAAC,CAAC;YACrC,IAAI,MAAM,EAAE,CAAC;gBACZ,YAAY,CAAC,GAAG,CAAC,MAAM,EAAE,CAAC,YAAY,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;gBAC9D,eAAe,EAAE,CAAC;YACnB,CAAC;QACF,CAAC;QAED,+CAA+C;QAC/C,IAAI,eAAe,KAAK,CAAC,EAAE,CAAC;YAC3B,OAAO,WAAW,CAAC,IAAI,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC;QAC3C,CAAC;QAED,MAAM,OAAO,GAAgB,EAAE,CAAC;QAEhC,KAAK,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,IAAI,YAAY,EAAE,CAAC;YAC5C,MAAM,UAAU,GAAG,KAAK,GAAG,eAAe,CAAC;YAE3C,mEAAmE;YACnE,MAAM,UAAU,GAAG,iBAAiB,CAAC,MAAM,CAAC,CAAC;YAC7C,IAAI,UAAU,EAAE,CAAC;gBAChB,OAAO,CAAC,IAAI,CAAC,CAAC,UAAU,EAAE,UAAU,CAAC,CAAC,CAAC;gBACvC,SAAS;YACV,CAAC;YAED,wDAAwD;YACxD,IAAI,MAAM,KAAK,UAAU,EAAE,CAAC;gBAC3B,OAAO,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,UAAU,CAAC,CAAC,CAAC;gBAClC,SAAS;YACV,CAAC;YACD,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;gBAC1B,gFAAgF;gBAChF,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;oBACnC,OAAO,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,UAAU,CAAC,CAAC,CAAC;gBACnC,CAAC;gBACD,SAAS;YACV,CAAC;YAED,+CAA+C;YAC/C,MAAM,SAAS,GAAG,eAAe,CAAC,MAAM,CAAC,CAAC;YAC1C,IAAI,SAAS,IAAI,MAAM,CAAC,SAAS,CAAC,EAAE,CAAC;gBACpC,MAAM,YAAY,GAAG,WAAW,CAAC,IAAI,EAAE,SAAS,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;gBACtE,OAAO,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAC;YAC/B,CAAC;QACF,CAAC;QAED,4EAA4E;QAC5E,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC1B,OAAO,WAAW,CAAC,IAAI,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC;QAC3C,CAAC;QAED,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACpC,OAAO,OAAO,CAAC;IAChB,CAAC,CAAC;IAEF,OAAO,EAAE,UAAU,EAAE,MAAM,EAAE,CAAC;AAC/B,CAAC,CAAC;AAEF;;;;;;;;GAQG;AACH,MAAM,WAAW,GAAG,CACnB,IAAY,EACZ,SAAiB,EACjB,MAAkC,EAClC,UAAU,GAAG,CAAC,EACA,EAAE;IAChB,MAAM,KAAK,GAAG,MAAM,CAAC,SAAS,CAAC,CAAC;IAChC,IAAI,CAAC,KAAK,EAAE,CAAC;QACZ,MAAM,IAAI,KAAK,CAAC,iCAAiC,SAAS,GAAG,CAAC,CAAC;IAChE,CAAC;IAED,MAAM,UAAU,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,KAAK,GAAG,UAAU,CAAC,UAAU,EAAE,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACxD,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC;IAE7C,MAAM,OAAO,GAAgB,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,GAAG,UAAU,CAAC,CAAC,CAAC;IAC/F,OAAO,OAAO,CAAC;AAChB,CAAC,CAAC;AAEF,aAAa"}
@@ -0,0 +1,38 @@
1
+ /** float32 weights for a linear model (dense → softmax). */
2
+ export type ModelWeights = {
3
+ w: Float32Array;
4
+ b: Float32Array;
5
+ inputSize: number;
6
+ outputSize: number;
7
+ };
8
+ /**
9
+ * loads int8 quantized weights from a binary buffer and dequantizes to float32.
10
+ *
11
+ * binary format: 2 × f32 scales (wScale, bScale), then weight bytes, then bias bytes.
12
+ *
13
+ * @param bin raw binary weight data
14
+ * @param inputSize number of input features
15
+ * @param outputSize number of output classes
16
+ * @returns dequantized model weights
17
+ */
18
+ export declare const loadWeights: (bin: ArrayBuffer, inputSize: number, outputSize: number) => ModelWeights;
19
+ /**
20
+ * loads int6 packed quantized weights from a binary buffer and dequantizes to float32.
21
+ *
22
+ * same header as int8 (2 × f32 scales), but payload is 6-bit packed.
23
+ *
24
+ * @param bin raw binary weight data
25
+ * @param inputSize number of input features
26
+ * @param outputSize number of output classes
27
+ * @returns dequantized model weights
28
+ */
29
+ export declare const loadWeights6: (bin: ArrayBuffer, inputSize: number, outputSize: number) => ModelWeights;
30
+ /**
31
+ * forward pass for a linear model: dense → softmax.
32
+ *
33
+ * @param input input feature vector (ngram frequencies)
34
+ * @param m model weights
35
+ * @returns output probabilities (one per language in the group)
36
+ */
37
+ export declare const forward: (input: Float32Array, m: ModelWeights) => Float32Array;
38
+ //# sourceMappingURL=forward.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"forward.d.ts","sourceRoot":"","sources":["../../src/nn/forward.ts"],"names":[],"mappings":"AAEA,4DAA4D;AAC5D,MAAM,MAAM,YAAY,GAAG;IAC1B,CAAC,EAAE,YAAY,CAAC;IAChB,CAAC,EAAE,YAAY,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;CACnB,CAAC;AAmEF;;;;;;;;;GASG;AACH,eAAO,MAAM,WAAW,GAAI,KAAK,WAAW,EAAE,WAAW,MAAM,EAAE,YAAY,MAAM,KAAG,YAerF,CAAC;AAEF;;;;;;;;;GASG;AACH,eAAO,MAAM,YAAY,GAAI,KAAK,WAAW,EAAE,WAAW,MAAM,EAAE,YAAY,MAAM,KAAG,YAkBtF,CAAC;AA4BF;;;;;;GAMG;AACH,eAAO,MAAM,OAAO,GAAI,OAAO,YAAY,EAAE,GAAG,YAAY,KAAG,YAa9D,CAAC"}