npm - @oomfware/lang-detect - Versions diffs - 0.1.0 - Mend

@oomfware/lang-detect 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

package/LICENSE +14 -0
package/README.md +68 -0
package/dist/eval.d.ts +8 -0
package/dist/eval.d.ts.map +1 -0
package/dist/eval.js +145 -0
package/dist/eval.js.map +1 -0
package/dist/index.d.ts +3 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +20 -0
package/dist/index.js.map +1 -0
package/dist/lite.d.ts +3 -0
package/dist/lite.d.ts.map +1 -0
package/dist/lite.js +20 -0
package/dist/lite.js.map +1 -0
package/dist/nn/detect.d.ts +25 -0
package/dist/nn/detect.d.ts.map +1 -0
package/dist/nn/detect.js +209 -0
package/dist/nn/detect.js.map +1 -0
package/dist/nn/forward.d.ts +38 -0
package/dist/nn/forward.d.ts.map +1 -0
package/dist/nn/forward.js +154 -0
package/dist/nn/forward.js.map +1 -0
package/dist/nn/groups.d.ts +23 -0
package/dist/nn/groups.d.ts.map +1 -0
package/dist/nn/groups.js +81 -0
package/dist/nn/groups.js.map +1 -0
package/dist/nn/load.d.ts +15 -0
package/dist/nn/load.d.ts.map +1 -0
package/dist/nn/load.js +21 -0
package/dist/nn/load.js.map +1 -0
package/dist/nn/load.node.d.ts +15 -0
package/dist/nn/load.node.d.ts.map +1 -0
package/dist/nn/load.node.js +23 -0
package/dist/nn/load.node.js.map +1 -0
package/dist/nn/normalize.d.ts +17 -0
package/dist/nn/normalize.d.ts.map +1 -0
package/dist/nn/normalize.js +34 -0
package/dist/nn/normalize.js.map +1 -0
package/package.json +61 -0
package/src/eval.ts +173 -0
package/src/index.ts +22 -0
package/src/lite.ts +25 -0
package/src/nn/detect.ts +309 -0
package/src/nn/forward.ts +181 -0
package/src/nn/load.node.ts +24 -0
package/src/nn/load.ts +21 -0
package/src/nn/normalize.ts +38 -0
package/weights/lite/arabic.bin +0 -0
package/weights/lite/arabic.json +1 -0
package/weights/lite/cyrillic.bin +5 -0
package/weights/lite/cyrillic.json +1 -0
package/weights/lite/devanagari.bin +0 -0
package/weights/lite/devanagari.json +1 -0
package/weights/lite/latin.bin +5 -0
package/weights/lite/latin.json +1 -0
package/weights/standard/arabic.bin +0 -0
package/weights/standard/arabic.json +1 -0
package/weights/standard/cyrillic.bin +0 -0
package/weights/standard/cyrillic.json +1 -0
package/weights/standard/devanagari.bin +9 -0
package/weights/standard/devanagari.json +1 -0
package/weights/standard/latin.bin +0 -0
package/weights/standard/latin.json +1 -0

package/src/index.ts ADDED Viewed

@@ -0,0 +1,22 @@
+import { create } from './nn/detect.ts';
+export type { Detection } from './nn/detect.ts';
+export const { initialize, detect } = create({
+	cyrillic: {
+		weights: new URL('../weights/standard/cyrillic.bin', import.meta.url),
+		meta: new URL('../weights/standard/cyrillic.json', import.meta.url),
+	},
+	arabic: {
+		weights: new URL('../weights/standard/arabic.bin', import.meta.url),
+		meta: new URL('../weights/standard/arabic.json', import.meta.url),
+	},
+	devanagari: {
+		weights: new URL('../weights/standard/devanagari.bin', import.meta.url),
+		meta: new URL('../weights/standard/devanagari.json', import.meta.url),
+	},
+	latin: {
+		weights: new URL('../weights/standard/latin.bin', import.meta.url),
+		meta: new URL('../weights/standard/latin.json', import.meta.url),
+	},
+});

package/src/lite.ts ADDED Viewed

@@ -0,0 +1,25 @@
+import { create } from './nn/detect.ts';
+export type { Detection } from './nn/detect.ts';
+export const { initialize, detect } = create(
+	{
+		cyrillic: {
+			weights: new URL('../weights/lite/cyrillic.bin', import.meta.url),
+			meta: new URL('../weights/lite/cyrillic.json', import.meta.url),
+		},
+		arabic: {
+			weights: new URL('../weights/lite/arabic.bin', import.meta.url),
+			meta: new URL('../weights/lite/arabic.json', import.meta.url),
+		},
+		devanagari: {
+			weights: new URL('../weights/lite/devanagari.bin', import.meta.url),
+			meta: new URL('../weights/lite/devanagari.json', import.meta.url),
+		},
+		latin: {
+			weights: new URL('../weights/lite/latin.bin', import.meta.url),
+			meta: new URL('../weights/lite/latin.json', import.meta.url),
+		},
+	},
+	6,
+);

package/src/nn/detect.ts ADDED Viewed

@@ -0,0 +1,309 @@
+import { loadBinary, loadJson } from '#load';
+import { forward, loadWeights, loadWeights6, type ModelWeights } from './forward.ts';
+import { normalize, extractNgrams } from './normalize.ts';
+// #region types
+/** a single detection result: ISO 639-3 language code and its probability. */
+export type Detection = [lang: string, probability: number];
+/** URLs for a single group's weight + metadata files. */
+type GroupSource = {
+	weights: URL;
+	meta: URL;
+};
+/** ngram vocabulary lists that define the input vector layout for a group model. */
+type GroupNgrams = {
+	unigrams: string[];
+	bigrams: string[];
+	trigrams: string[];
+	quadgrams: string[];
+};
+/** weight metadata loaded from a group's .json file. */
+type GroupMeta = {
+	langs: string[];
+	ngrams: GroupNgrams;
+	inputSize: number;
+	outputSize: number;
+};
+/** a loaded group model ready for inference. */
+type ReadyModel = {
+	meta: GroupMeta;
+	weights: ModelWeights;
+};
+/** returned by {@link create} — call initialize() once, then detect() synchronously. */
+type Detector = {
+	initialize: () => Promise<void>;
+	detect: (text: string) => Detection[];
+};
+// #endregion
+// #region script classification
+/** script family identifiers for character classification. */
+type ScriptFamily =
+	| 'korean'
+	| 'georgian'
+	| 'armenian'
+	| 'bengali'
+	| 'greek'
+	| 'hebrew'
+	| 'cjk_kana'
+	| 'cjk_han'
+	| 'cyrillic'
+	| 'arabic'
+	| 'devanagari'
+	| 'latin';
+/**
+ * classifies a character's Unicode codepoint into a script family.
+ *
+ * @param cp the codepoint to classify
+ * @returns the script family, or `null` if not recognized
+ */
+const classifyCodepoint = (cp: number): ScriptFamily | null => {
+	// unique scripts
+	if ((cp >= 0xac00 && cp <= 0xd7af) || (cp >= 0x1100 && cp <= 0x11ff)) {
+		return 'korean';
+	}
+	if ((cp >= 0x10a0 && cp <= 0x10ff) || (cp >= 0x2d00 && cp <= 0x2d2f)) {
+		return 'georgian';
+	}
+	if (cp >= 0x0530 && cp <= 0x058f) {
+		return 'armenian';
+	}
+	if (cp >= 0x0980 && cp <= 0x09ff) {
+		return 'bengali';
+	}
+	if ((cp >= 0x0370 && cp <= 0x03ff) || (cp >= 0x1f00 && cp <= 0x1fff)) {
+		return 'greek';
+	}
+	if (cp >= 0x0590 && cp <= 0x05ff) {
+		return 'hebrew';
+	}
+	// CJK
+	if ((cp >= 0x3040 && cp <= 0x309f) || (cp >= 0x30a0 && cp <= 0x30ff)) {
+		return 'cjk_kana';
+	}
+	if ((cp >= 0x4e00 && cp <= 0x9fff) || (cp >= 0x3400 && cp <= 0x4dbf)) {
+		return 'cjk_han';
+	}
+	// NN groups
+	if (cp >= 0x0400 && cp <= 0x04ff) {
+		return 'cyrillic';
+	}
+	if ((cp >= 0x0600 && cp <= 0x06ff) || (cp >= 0x0750 && cp <= 0x077f)) {
+		return 'arabic';
+	}
+	if (cp >= 0x0900 && cp <= 0x097f) {
+		return 'devanagari';
+	}
+	if ((cp >= 0x0041 && cp <= 0x005a) || (cp >= 0x0061 && cp <= 0x007a) || (cp >= 0x00c0 && cp <= 0x024f)) {
+		return 'latin';
+	}
+	return null;
+};
+/** maps unique script families to their ISO 639-3 language code. */
+const UNIQUE_SCRIPT_MAP: Partial<Record<ScriptFamily, string>> = {
+	korean: 'kor',
+	georgian: 'kat',
+	armenian: 'hye',
+	bengali: 'ben',
+	greek: 'ell',
+	hebrew: 'heb',
+};
+/** maps script families to NN group names. */
+const SCRIPT_TO_GROUP: Partial<Record<ScriptFamily, string>> = {
+	cyrillic: 'cyrillic',
+	arabic: 'arabic',
+	devanagari: 'devanagari',
+	latin: 'latin',
+};
+// #endregion
+// #region inference helpers
+/**
+ * builds the input feature vector for a group model from normalized text.
+ *
+ * @param text normalized text
+ * @param ngrams the group's ngram vocabulary
+ * @returns float32 input vector matching the model's expected layout
+ */
+const buildInput = (text: string, ngrams: GroupNgrams): Float32Array => {
+	const unigrams = extractNgrams(text, 1);
+	const bigrams = extractNgrams(text, 2);
+	const trigrams = extractNgrams(text, 3);
+	const quadgrams = extractNgrams(text, 4);
+	const values = [
+		...ngrams.unigrams.map((v) => unigrams[v] || 0),
+		...ngrams.bigrams.map((v) => bigrams[v] || 0),
+		...ngrams.trigrams.map((v) => trigrams[v] || 0),
+		...ngrams.quadgrams.map((v) => quadgrams[v] || 0),
+	];
+	return new Float32Array(values);
+};
+// #endregion
+// #region weight loading
+/**
+ * loads and dequantizes weights for a single group from its binary + metadata files.
+ *
+ * @param source URLs for the group's weight and metadata files
+ * @param quantBits quantization bit width (8 or 6)
+ * @returns the loaded model ready for inference
+ */
+const loadGroup = async (source: GroupSource, quantBits: number): Promise<ReadyModel> => {
+	const [bin, rawMeta] = await Promise.all([loadBinary(source.weights), loadJson(source.meta)]);
+	const meta = rawMeta as GroupMeta;
+	const load = quantBits === 6 ? loadWeights6 : loadWeights;
+	const weights = load(bin, meta.inputSize, meta.outputSize);
+	return { meta, weights };
+};
+// #endregion
+// #region detection
+/**
+ * creates a detector for a specific weight variant.
+ *
+ * call initialize() once to load and dequantize weights via fetch(), then
+ * call detect() synchronously for each input text.
+ *
+ * @param sources record of group names to their weight/meta file URLs
+ * @param quantBits quantization bit width (default 8)
+ * @returns detector with initialize() and detect() methods
+ */
+export const create = (sources: Record<string, GroupSource>, quantBits = 8): Detector => {
+	let models: Record<string, ReadyModel> | null = null;
+	const initialize = async () => {
+		const entries = Object.entries(sources);
+		const loaded = await Promise.all(entries.map(([, source]) => loadGroup(source, quantBits)));
+		models = {};
+		for (let i = 0; i < entries.length; i++) {
+			models[entries[i][0]] = loaded[i];
+		}
+	};
+	const detect = (text: string): Detection[] => {
+		if (!models) {
+			throw new Error(`call initialize() first`);
+		}
+		// classify characters by script family
+		const scriptCounts = new Map<ScriptFamily, number>();
+		let totalClassified = 0;
+		for (let i = 0; i < text.length; i++) {
+			const cp = text.codePointAt(i)!;
+			// skip surrogates for astral characters
+			if (cp > 0xffff) {
+				i++;
+			}
+			const family = classifyCodepoint(cp);
+			if (family) {
+				scriptCounts.set(family, (scriptCounts.get(family) || 0) + 1);
+				totalClassified++;
+			}
+		}
+		// no classified characters — fallback to latin
+		if (totalClassified === 0) {
+			return detectGroup(text, 'latin', models);
+		}
+		const results: Detection[] = [];
+		for (const [family, count] of scriptCounts) {
+			const proportion = count / totalClassified;
+			// unique script languages — use proportion directly as probability
+			const uniqueLang = UNIQUE_SCRIPT_MAP[family];
+			if (uniqueLang) {
+				results.push([uniqueLang, proportion]);
+				continue;
+			}
+			// CJK — kana implies Japanese, Han-only implies Chinese
+			if (family === 'cjk_kana') {
+				results.push(['jpn', proportion]);
+				continue;
+			}
+			if (family === 'cjk_han') {
+				// only count as Chinese if no kana detected (otherwise Han is part of Japanese)
+				if (!scriptCounts.has('cjk_kana')) {
+					results.push(['cmn', proportion]);
+				}
+				continue;
+			}
+			// NN group — run model and scale by proportion
+			const groupName = SCRIPT_TO_GROUP[family];
+			if (groupName && models[groupName]) {
+				const groupResults = detectGroup(text, groupName, models, proportion);
+				results.push(...groupResults);
+			}
+		}
+		// if nothing was produced (shouldn't happen, but safety), fallback to latin
+		if (results.length === 0) {
+			return detectGroup(text, 'latin', models);
+		}
+		results.sort((a, b) => b[1] - a[1]);
+		return results;
+	};
+	return { initialize, detect };
+};
+/**
+ * runs a group's model on the input text and returns detections scaled by proportion.
+ *
+ * @param text raw input text
+ * @param groupName key into the loaded models
+ * @param models loaded model records
+ * @param proportion script proportion to scale probabilities by
+ * @returns detections for this group
+ */
+const detectGroup = (
+	text: string,
+	groupName: string,
+	models: Record<string, ReadyModel>,
+	proportion = 1,
+): Detection[] => {
+	const model = models[groupName];
+	if (!model) {
+		throw new Error(`weights not loaded for group '${groupName}'`);
+	}
+	const normalized = normalize(text);
+	const input = buildInput(normalized, model.meta.ngrams);
+	const output = forward(input, model.weights);
+	const results: Detection[] = model.meta.langs.map((lang, i) => [lang, output[i] * proportion]);
+	return results;
+};
+// #endregion

package/src/nn/forward.ts ADDED Viewed

@@ -0,0 +1,181 @@
+// #region types
+/** float32 weights for a linear model (dense → softmax). */
+export type ModelWeights = {
+	w: Float32Array;
+	b: Float32Array;
+	inputSize: number;
+	outputSize: number;
+};
+// #endregion
+// #region dequantization
+/**
+ * dequantizes an int8 array back to float32 using its absmax scale.
+ *
+ * @param data quantized int8 values
+ * @param scale the scale factor used during quantization (scaleMax / absmax)
+ * @returns dequantized float32 array
+ */
+const dequantize = (data: Int8Array, scale: number): Float32Array => {
+	const result = new Float32Array(data.length);
+	for (let i = 0; i < data.length; i++) {
+		result[i] = data[i] / scale;
+	}
+	return result;
+};
+/**
+ * unpacks 6-bit packed bytes into signed int8 values.
+ *
+ * packing scheme: 4 values (6 bits each, unsigned offset by +31) → 3 bytes.
+ * byte0 = (u0 << 2) | (u1 >> 4)
+ * byte1 = ((u1 & 0x0F) << 4) | (u2 >> 2)
+ * byte2 = ((u2 & 0x03) << 6) | u3
+ *
+ * @param packed packed 6-bit data
+ * @param count number of original values
+ * @returns signed int8 values in [-31, 31]
+ */
+const unpack6 = (packed: Uint8Array, count: number): Int8Array => {
+	const result = new Int8Array(count);
+	let ri = 0;
+	let pi = 0;
+	// process full groups of 4
+	const fullGroups = (count >> 2) << 2;
+	while (ri < fullGroups) {
+		const b0 = packed[pi];
+		const b1 = packed[pi + 1];
+		const b2 = packed[pi + 2];
+		result[ri] = (b0 >> 2) - 31;
+		result[ri + 1] = (((b0 & 0x03) << 4) | (b1 >> 4)) - 31;
+		result[ri + 2] = (((b1 & 0x0f) << 2) | (b2 >> 6)) - 31;
+		result[ri + 3] = (b2 & 0x3f) - 31;
+		ri += 4;
+		pi += 3;
+	}
+	// remainder (1-3 values)
+	const rem = count - fullGroups;
+	if (rem >= 1) {
+		result[ri] = (packed[pi] >> 2) - 31;
+	}
+	if (rem >= 2) {
+		result[ri + 1] = (((packed[pi] & 0x03) << 4) | (packed[pi + 1] >> 4)) - 31;
+	}
+	if (rem >= 3) {
+		result[ri + 2] = (((packed[pi + 1] & 0x0f) << 2) | (packed[pi + 2] >> 6)) - 31;
+	}
+	return result;
+};
+/**
+ * loads int8 quantized weights from a binary buffer and dequantizes to float32.
+ *
+ * binary format: 2 × f32 scales (wScale, bScale), then weight bytes, then bias bytes.
+ *
+ * @param bin raw binary weight data
+ * @param inputSize number of input features
+ * @param outputSize number of output classes
+ * @returns dequantized model weights
+ */
+export const loadWeights = (bin: ArrayBuffer, inputSize: number, outputSize: number): ModelWeights => {
+	const view = new DataView(bin);
+	const wScale = view.getFloat32(0, true);
+	const bScale = view.getFloat32(4, true);
+	const wSize = outputSize * inputSize;
+	const w = new Int8Array(bin, 8, wSize);
+	const b = new Int8Array(bin, 8 + wSize, outputSize);
+	return {
+		w: dequantize(w, wScale),
+		b: dequantize(b, bScale),
+		inputSize,
+		outputSize,
+	};
+};
+/**
+ * loads int6 packed quantized weights from a binary buffer and dequantizes to float32.
+ *
+ * same header as int8 (2 × f32 scales), but payload is 6-bit packed.
+ *
+ * @param bin raw binary weight data
+ * @param inputSize number of input features
+ * @param outputSize number of output classes
+ * @returns dequantized model weights
+ */
+export const loadWeights6 = (bin: ArrayBuffer, inputSize: number, outputSize: number): ModelWeights => {
+	const view = new DataView(bin);
+	const wScale = view.getFloat32(0, true);
+	const bScale = view.getFloat32(4, true);
+	const wCount = outputSize * inputSize;
+	const wPackedSize = Math.ceil((wCount * 3) / 4);
+	const bPackedSize = Math.ceil((outputSize * 3) / 4);
+	const wPacked = new Uint8Array(bin, 8, wPackedSize);
+	const bPacked = new Uint8Array(bin, 8 + wPackedSize, bPackedSize);
+	return {
+		w: dequantize(unpack6(wPacked, wCount), wScale),
+		b: dequantize(unpack6(bPacked, outputSize), bScale),
+		inputSize,
+		outputSize,
+	};
+};
+// #endregion
+// #region forward pass
+/**
+ * applies softmax in-place to an output array.
+ *
+ * @param output logit array to convert to probabilities
+ */
+const softmax = (output: Float32Array): void => {
+	let max = -Infinity;
+	for (let i = 0; i < output.length; i++) {
+		if (output[i] > max) {
+			max = output[i];
+		}
+	}
+	let expSum = 0;
+	for (let i = 0; i < output.length; i++) {
+		output[i] = Math.exp(output[i] - max);
+		expSum += output[i];
+	}
+	for (let i = 0; i < output.length; i++) {
+		output[i] /= expSum;
+	}
+};
+/**
+ * forward pass for a linear model: dense → softmax.
+ *
+ * @param input input feature vector (ngram frequencies)
+ * @param m model weights
+ * @returns output probabilities (one per language in the group)
+ */
+export const forward = (input: Float32Array, m: ModelWeights): Float32Array => {
+	const output = new Float32Array(m.outputSize);
+	for (let i = 0; i < m.outputSize; i++) {
+		let sum = m.b[i];
+		const off = i * m.inputSize;
+		for (let j = 0; j < m.inputSize; j++) {
+			sum += input[j] * m.w[off + j];
+		}
+		output[i] = sum;
+	}
+	softmax(output);
+	return output;
+};
+// #endregion

package/src/nn/load.node.ts ADDED Viewed

@@ -0,0 +1,24 @@
+import { readFileSync } from 'node:fs';
+import { fileURLToPath } from 'node:url';
+/**
+ * loads binary data from a file URL using node:fs.
+ *
+ * @param url file URL to load
+ * @returns the file contents as an ArrayBuffer
+ */
+export const loadBinary = async (url: URL): Promise<ArrayBuffer> => {
+	const buffer = readFileSync(fileURLToPath(url));
+	return buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength);
+};
+/**
+ * loads and parses JSON from a file URL using node:fs.
+ *
+ * @param url file URL to load
+ * @returns the parsed JSON value
+ */
+export const loadJson = async (url: URL): Promise<unknown> => {
+	const content = readFileSync(fileURLToPath(url), 'utf-8');
+	return JSON.parse(content);
+};

package/src/nn/load.ts ADDED Viewed

@@ -0,0 +1,21 @@
+/**
+ * loads binary data from a URL via fetch.
+ *
+ * @param url URL to fetch
+ * @returns the response body as an ArrayBuffer
+ */
+export const loadBinary = async (url: URL): Promise<ArrayBuffer> => {
+	const response = await fetch(url);
+	return response.arrayBuffer();
+};
+/**
+ * loads and parses JSON from a URL via fetch.
+ *
+ * @param url URL to fetch
+ * @returns the parsed JSON value
+ */
+export const loadJson = async (url: URL): Promise<unknown> => {
+	const response = await fetch(url);
+	return response.json();
+};

package/src/nn/normalize.ts ADDED Viewed

@@ -0,0 +1,38 @@
+const HYPHEN_RE = /-+/g;
+const NON_LETTER_RE = /[^\p{L}\p{M}\s]/gu;
+const MULTI_SPACE_RE = /\s{2,}/g;
+/**
+ * normalizes text for ngram extraction: lowercases, strips non-letter/non-mark
+ * characters, collapses whitespace, and pads with spaces.
+ *
+ * @param text raw input text
+ * @returns normalized text padded with leading/trailing spaces
+ */
+export const normalize = (text: string): string => {
+	return ` ${text.replace(HYPHEN_RE, ' ').replace(NON_LETTER_RE, '').replace(MULTI_SPACE_RE, ' ').toLowerCase().trim()} `;
+};
+/**
+ * extracts ngram frequencies from a string.
+ *
+ * @param text normalized text (from {@link normalize})
+ * @param length ngram length (1 for unigrams, 2 for bigrams, etc.)
+ * @returns map of ngram string to its relative frequency (count / total)
+ */
+export const extractNgrams = (text: string, length: number): Record<string, number> => {
+	const ngrams: Record<string, number> = {};
+	let total = 0;
+	for (let i = 0, l = text.length - length; i <= l; i++) {
+		const value = text.slice(i, i + length);
+		ngrams[value] = (ngrams[value] || 0) + 1;
+		total++;
+	}
+	for (const value in ngrams) {
+		ngrams[value] /= total;
+	}
+	return ngrams;
+};

package/weights/lite/arabic.bin ADDED Viewed

Binary file

package/weights/lite/arabic.json ADDED Viewed

@@ -0,0 +1 @@

+ {"langs":["ara","ckb","pes"],"ngrams":{"unigrams":[" ","ە","ا","ل","ی","ن","ي","و","د","م","ت","ر","ب","ک","ه","ع","ێ","س","ك","ۆ","ش","أ","ئ","ز","ة","گ","خ","ف","پ","ق","ح","ڵ","آ","ج","ڕ","چ","ّ","e","ط","ى","ژ","ص","ذ","a","غ","إ","i","ض","َ","n","ظ","ث","r","ٔ","ء","ê","ً","ُ","k","ِ","ْ","ؤ","ٍ","b","o","ٌ","ھ","m","ـ","d","t","l","y","s","c","h","â","p","w","u","ٱ","ۀ","ٓ","v","ā","g","f","j","x","z","q","ﻻ","ڤ","ٰ","ۜ","あ","š","а","ﻹ",""],"bigrams":["ال","ە ","ی "," ا","د ","م ","ه ","ة "," د","ي ","ر "," أ"," ل","می","ل ","دە","ند"," ي","ای"," ئ","ست"," ر","ى ","ەم","لە","وو"," آ","بە"," ك","ز ","به","أن","ەر","ید","نە","کا","از","ەن","ري","یا","که","ێت","در","تە","ك ","مە","ب ","هە","ین"," إ","ێک","کن","كا","او","يا","خو","ود","ين","رە","ری"," ی","ده","ير","ەت","تا","اه","ع ","سە","ني","لت","ۆ ","دي","ش ","ول","ەس","نه","هم","ائ","بۆ","بی","لق","دن","ێ ","ەب","یر","كن"," ڕ","یش","بي"," گ","گر","ێن","یم","ۆر","سي","هی","ء ","بێ","تن","تێ","رة","زا","لی","لن","خۆ","شە","شد","ذا","فت","هر","شو","اء","اڵ","نێ","مل","ئا","مه","ەز","یس","فر","رێ","ته","ەد","يم","ێر","عن","نو","ێش","زن","لد","لێ","قد","شم","کس","يس","یگ","رف","ً ","ڕا","يو","خە","گا","ەخ","چە","عد","وز","ۆش","إن","کو","مت"],"trigrams":[" ال"," دە"," می"," لە","ند ","ست "," را","را "," بە"," به","که ","به ","ن ا","است"," اس"," هە"," که","لە ","ێت ","از ","كان"," در"," تۆ"," از","ما ","ای ","ید ","ەی ","در ","ها "," خو"," كا"," دا","ه ا","ين ","می ","نا "," پێ"," بر","ین ","وو ","ود "," بۆ"," یک"," نا"," آن","بوو","یک ","ه ب","امي","رة ","یەک"," کن","کرد"," ای","ا ا","ی ا","دار","ەکا","ه م","سام","ار ","ني ","یت ","ده ","های"," او","این","ت ا","ێک ","د ب","انە"," لي"," لم","بۆ "," ان","ذا ","ر ا","رای","یان"," هم","یم ","کات","دن ","اء ","آن ","بال"," نم","ري ","ەر ","ی م"," هذ","ا م","کان"," عن","ال ","با ","م ا"," نە","دة ","سەر","ی ئ","ارە"," تا","ة م","رە ","میک","نمی","ۆر "," لل","نە ","ير "," سە","ه د","ي م","ەند"," دو"," تو","الب","ێکی","ن ر","اً ","نم ","الن"," ئا","تر ","ایە","نند","زان","ا د","اد ","مة ","واه","اوە","بود","ی ل","ی ر"," خۆ","ول "," رو","هم ","دوو"," ام","الإ","تە ","اید","خود","كل "," هر","د م"],"quadgrams":[" لە "," را "," الم"," به "," که ","ن ال"," است","است "," بۆ "," از "," در "," یک "," كان"," می ","ئەوە"," این"," کرد"," او "," آن ","برای","كان "," زۆر","رای ","امي ","توان","سامي"," بوو"," سام"," دار","زۆر ","ا ال","ەکان"," نمی"," با ","ت ال","بوو ","را ب","ل ال","های "," الت","ێکی ","این "," میک"," الج","یان ","خواه"," بال","نند ","ارد ","م ال"," خود","ن را","ه ال"," بود"," لم ","مان ","اید "," ما ","دارد"," الب"," الن","د ال","ی دە","میتو","کات ","یتوا"," چیز","ر ال","ویست","ر می","ایە "," الإ","توم ","کرد ","ا می"," توم","انم ","ا به","باید"," هست","ی را"," الش","انە ","شود ","ووە ","ب ال","ه با","ی کن"," سەر"," هیچ"," الا"," بای","بود "," باش"," الو"," هل ","ە کە"," شما","نید ","و ال"]},"inputSize":500,"outputSize":3}

package/weights/lite/cyrillic.bin ADDED Viewed

@@ -0,0 +1,5 @@
+�o�?�INB�'u��~'�q��v\�WЁן��ى�)u�aiz`M�RvSe�T}Ԛz�uu�Ug�}�a�Şy�u�ݍ��~'�a�^e��m��e��u��q�u����u�y�y��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}�ڍ���!u5�9��n'!�'aix[m�b��nc^X��_�D�`�^y��^(I�u�fI�^��n&މ�[��zZ(��YigU�`uuPbYqŕv!Q��}�d�U�qVZnH�]�f����ށ��gz&`��Y~f6ana�4օ��m�qhj^�8�}�i��Y��]�a�c�H~W��\�'ڂ)`U�`��T}֥y�ۖG܅�ډf�y� j
+Zy�cU��~X��m�m���y�_���uؖ�j�m���ؕ�獖�m��Yf�i��e�"��`Q�ׂ�Y��y��a����݂'Y��"e��zc}����}�^b� V��'��$u��e����9�u��G�u��y�a�y߆����q�ۉ�"y��e�ߍ���z�mǜ�ޅ�am�Z�硉�υכy�^y��]G�}�!nF�}��n7�u�Q��q� y�Wnj$u����V���������[�����Y��&�e�am�Չ�i�u� e��z$�q暅�i�a���g"e7u�}�]�r%��቙#m�m���}ƙ}�^�x`���r_u&䉖�}��y�$}��n��"���q��ך~$yh���y��u�Xv&�a��q��nv�b8}e^��Xm��]Ơm��q֢z'�jG�GXvG�r7���"a�_r�rށ�Z~b~�z_y��e�"��ug$�v��VrV�}g��ށ�`y7]y��~����h�"y瀉���cvT�tv�L�\A֔m�m�Zq��}��d�\e�]�`r^y�\��ru�)��v�q�u�}�v�y�y�u�y�y��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��]��]�Y��R7�~G�u����=Wm��vWm�WY��m��}����V�x���BG^n�^^Yi���q���cu�au�_a��jW؎2be�#Y��m�Xy7�������TzT؍ i�^m��~�(&yv�v�V�^	 yɝ�gnc]�蠥��m��e�N��E�h����6�~	�bV�Yz!��dy���F�ziR�w`^Wc��Xz�~g����֚m�T}�X�֠r���&ݮ�UX�~Tוg�nV�f�z6!y��w�aGڂt��_�����$^ ]ؠ}�ҍɟI�e�ȟ��]�[a��z\y�zQq��J:!t�ڂ�&�A�XyiX���mX!�נf(rV�q�am6ߒ~V_Y� �ّ�ނ' �%�yǟy�!}h��(^5�a��\U���'�V��3���Zm�^�7��5�����W���bW�iե�h��W ���I�uٟ�'�m�lf(}�݁��f7�a��iؘ�9Vf�^W!b�nVh��z���q�Z�b�Ɯ��eU��f����i�Yq�]����h���ar�e�~8%mԎQHq��M�["ԙ��&Zf(�vG�|�m^8 �ݦۍ�ݒ�^v7�����B�jS�b�v(�v'����I��a����6�u��u�feT�j�!���y��\vڥh�}7aVH�qȠr��q���VW!�h%��"~xQV�m�[��r�a�d�7 �g֑6ܞw�N�䢉Y�'}����~X eh�}�q�)������Q�r'�y�]���uU�u�\q��e��~�ev�W��+uD�m�y(�vuqّuצa�^�g��w��מ���瞅�z���z�y��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}�X���iW�v9ny�Xm��6u�^���y���^mלu��~'�nG����}�&u����]m�}v��Ɨa��u�#�&�~G�e��m��vj�e�g~�y�Xa���zw_q��aǗ�&u� ]�`��^��bm�c~��vS�E_i��m�}W�r7VyƜu�ډƜU�ލW�iV]��a�Y��m٘VU�V��_U��Y�Yr5�r7�y�v$�~�I�n6ߍ�����y�a��b���z�}��awYy�q矅�~�e��}�ڑ�}�i�q�^y�a�#n$r7�WY}ǢuG�iu^��y�ז'�y�\�&[m��iv}��a�\^5�m�}� ]�&v��$Y�]y�[qvY]�^B�Y��nv�u�Wfj�u��m�mלq��y� ���e؛��`�W`uv!y��u��u��q�m�ۍ�Z��Z%�M��q��q�i�\���]��Iu�i����vEm�cF!m�#v�a��ug�}�BVq�a�֞a��z�q��j}vq硁�]���M�܅�du���h�}�_�n�ncu�d}���ay�\m�^q�]j�a��y�z&��!z6�Z(���ߕ�ގ}ǚ��ׁ��uvۅǡvF�vGi�#n�Y؟��r7�֙e� ���i�#�(��azb��c��څ�`�ןv܊!��߆Gq�$zI�]uv�i�]�&�}棅��i�_���vfނj�W�za}��}� }���\5���V]~�y��Ux�m��iY]��q��q�\y�݂i7�q�[���}�^z\u�^q�_uǠ��z'�p��u��u�u�y�y�u�y�y��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��ew�n&"���u�"z)"i������]j`���r�y�!e�}ș�w u�O�(�m��q�_m��E�\�^&�z'm��U]yfcz�m��ٍw~F�vgZ'Z����g[y�_v}�]�]b7�u��R r�m��y���(��ނ�B��a�d�&��#Y�T��FWy��q8҆$�N7�� vw]�ՍH��F�a��}�❇�q�U���xjQfbQ֘�f�jW�N'��y*�u�jh���fw!u�\}����vg[eXV���ǡ��_�)�x�_��b�0Ւ6����Ǟm�\^�]�]��&fH�u����rfZ���n'��\y)[�Ȟ�X�y��b(q��^ڄ���v&ee�����r& m����ߝ�y�\�Ȣ�7ˉ��zH�z&]��bG���^�
+Zr(Xb'e�~`��q�Xi��q�"uF}��zXݙ��� zڍ��z9�'�e�e�^��a�G�i蚆�v5frVf'�vHr7!�����m螊"]� v4WY�0n'��؝Y�[v�ףu�X��ej6���#t����9�hU�^��!�	�'��&�^��'��u	�5u�>`i�`ifz'V�^��m��	be�]j�擮G�ja�e�Y�a���G��vd^y�fZa��f� ��І&VRXfv'�v:ev:����H�}��i�b�X`~(^u��iؙ���e�ߊfXyw�e�>'�qU��֊F�u�"�� �'�z��\}Ȕ~fq�}�SJ z8]`��JuXa�u�Ս�[uơ�X��i�b�n���\u�_q�^uן1�^v�m�uמu�q�y�y�q�y�y��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}�݆'؉��e�&bM(\�v�f�mX�qW�]������a�\��^q�%b'�E�N�nJT~#vH!��bvaa�#f��4���\]�_e�b^&�VXҊ&�}��q�b�m�gmu!v(Tb:WY��Tm�z�~u����Iءn9�I(dFfa�����%ى�Yj8ֵ�TrG�]�m�����ե����~'ݝ�&Y��e�\eơm�YqXXfI^z�b6�����m�"mf�Mמr��Y��]�_my�a�aq�#��Wuh���m�$M�Z���r}螕��^F�r	�]����vwc��fe�ci�!64a��[��%Q�Z:!z�q��Uw#q�aJ7���}�"m�)i��T���M��Vۑ����\e��}��w�Yȡ�d^v��^hyv߉��j	b�E�`i�q�cu�br	%^'�uS"fUUz�e�\�'���y�ㅦ���rX$�g���Se��n&%^'�uG#j5�i��z(�U����ܕg���܎WI�ai�Xn8]r�e�"a�u����(�W�yȢ�)~'�y�Yr9_xY!q���~�ev΢Vr�v�E��]�`�"��yd\��g�]�ba�U�`�V�i8�ju���c��#9��u�b��aG�u�[�i �&W�u�E�^�h^v��U$y�߉���\egYm�ayw�����Y�`r'E���X�Q��n���Wa�Ff8"r6�u�e�"n�v�a�_yY!qXc���}� r(�����u�N�}Hq�)n�Y��6�z�aauaN�&�e�߅�]�h!gX�=�Sbi՞z6�uy^q�"qǘ��^bg�s�f'�v�q��u�y�u�y�y��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}�^iw�j	�~Su��y�m�\a��o\u���]Q�cj%�r7�i�^q�܅����Ԏ'�v���m��Z��q��ywX}��}g�q��Sq�a4ԗq�؉�z7v6�V'�Yɡu�^m؜m��r6Zm��y�&^�ra��y�]����zRtee��}�U�c]��q�]�"z9Yu�؁�^e�=ge�^�Ǚ���q�܆��Y}Ve�mؕ��Y����v]G�M�!��Zy�!��Zu��u��^U�d^�qx҆5 ��y�]��~��}��}ԝu�u�Xu�!r6����&�%��qHT}֠��_�h[��u�aq�z&Zi�}� q�a��q��r�\R'�}(]i���U����e�[��╷�J!�蚉xY}��G��Yu�&u�����u�a�Y�m�#d��i�]jby�}Wv��y�y��]r&�z7����q��~'���ۖaE�!zd~8�y��}��Gm��rG�q��a��z%�u�\u�anG�n9!jт7���^��ߝ�^m��iأb
+vc��ނ6�~��6ȉ��]���BO��n[�X�q��}i�[��m�ցwSu��I6�U�^I�ԉ�$�������$��\R��[��$�4e��j�~j%��ZjG!u��}�� �'�q梞FYa��u��N7���q�bnGea�cm�q��u�u�#a��m��}�nGaq���~fy�!}� qz)`}��}��~'���R}�SV!v!����q��ię��u�v�U7y��f}��y�u��}֛�מ:�y�uhy���rv�y� ���!���}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}��}� bc�(�!�c�6����a�aq��u7��bQ���c���fx$����_���Y�։�#q���HRF�v)u�v`m��~W��re��Sm�u�a�բ��Zm��q�Yy�b�\���^X^gi��u�Q�h�m���Wr��a����Z#b��~_}�VV4ڑ��s#���^�v�i�NS�z_BG�f��آrei�de�j�q�Z�W�qɢ���m�]�Ǘ��!ugdq�)n\y� z]�ߊᩨci��m֗mfւhnq�!a�Yu��rfa�UjFZq��f(ҙ�ߑ� ��Y�vUN��m�dj�rߩ��6`��%Eמ���h�m�}��m��qf`Yiz��U��a�ەv��q��VY�e'�x����}��q�gu�_Y���m��m���!y��}z���h�ٜ��ށ��]֞�~Ӊ���Ve�(�������r6u��z }ǧq���u��a�y�a�ەgQ����!R`q�Wy�\Jhr^�E\�Yq�"b&zDX��^�^��UZ	�a�juJ5�z
+�N�R&W�f�iG�Q6}�uw�:8]ix�y�_e�SuG�6ڎ"�hv'���QnU��֦:H�j ��ga}ד���v6Wiy`r�Xz^�ɛ~'!�������%Q�aqV��B	"��e��r4�}��g\e8�m���x�m�m��u��}	ڊV��4�f8bR �_�~��