ne-lid 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ne_lid-1.0.0/PKG-INFO ADDED
@@ -0,0 +1,68 @@
1
+ Metadata-Version: 2.4
2
+ Name: ne-lid
3
+ Version: 1.0.0
4
+ Summary: Language identification for Northeast Indian languages
5
+ Author-email: MWire Labs <connect@mwirelabs.com>
6
+ Project-URL: Homepage, https://mwirelabs.com
7
+ Project-URL: HuggingFace, https://huggingface.co/MWirelabs/ne-lid
8
+ Keywords: language-identification,nlp,northeast-india,low-resource,fasttext
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
11
+ Classifier: License :: Free To Use But Restricted
12
+ Requires-Python: >=3.8
13
+ Description-Content-Type: text/markdown
14
+ Requires-Dist: fasttext-wheel>=0.9.2
15
+ Requires-Dist: huggingface-hub>=0.20.0
16
+
17
+ # ne-lid
18
+
19
+ Language identification for Northeast Indian languages.
20
+
21
+ **11 languages · 99.09% accuracy · fastText · CC-BY-4.0**
22
+
23
+ ## Install
24
+
25
+ ```bash
26
+ pip install ne-lid
27
+ ```
28
+
29
+ ## Usage
30
+
31
+ ```python
32
+ from ne_lid import NELID
33
+
34
+ model = NELID() # downloads from HuggingFace on first run
35
+
36
+ # Single prediction
37
+ result = model.predict('Ki paidbah shnong ki la ia shim bynta')
38
+ print(result) # {'lang': 'kha', 'score': 0.9999}
39
+
40
+ # Top-3 predictions
41
+ results = model.predict('Ka sngi ka lieh', k=3)
42
+ print(results)
43
+
44
+ # List supported languages
45
+ model.languages()
46
+ ```
47
+
48
+ ## Supported Languages
49
+
50
+ | Code | Language | Accuracy |
51
+ |------|----------|----------|
52
+ | asm | Assamese | 100% |
53
+ | brx | Bodo | 99% |
54
+ | eng | English | 98% |
55
+ | grt | Garo | 100% |
56
+ | hin | Hindi | 97% |
57
+ | kha | Khasi | 99% |
58
+ | trp | Kokborok | 100% |
59
+ | mni | Meitei | 100% |
60
+ | lus | Mizo | 99% |
61
+ | nag | Nagamese | 100% |
62
+ | njz | Nyishi | 99% |
63
+
64
+ Overall test accuracy: **99.09%**
65
+
66
+ ## Links
67
+ - [HuggingFace](https://huggingface.co/MWirelabs/ne-lid)
68
+ - [MWire Labs](https://mwirelabs.com)
ne_lid-1.0.0/README.md ADDED
@@ -0,0 +1,52 @@
1
+ # ne-lid
2
+
3
+ Language identification for Northeast Indian languages.
4
+
5
+ **11 languages · 99.09% accuracy · fastText · CC-BY-4.0**
6
+
7
+ ## Install
8
+
9
+ ```bash
10
+ pip install ne-lid
11
+ ```
12
+
13
+ ## Usage
14
+
15
+ ```python
16
+ from ne_lid import NELID
17
+
18
+ model = NELID() # downloads from HuggingFace on first run
19
+
20
+ # Single prediction
21
+ result = model.predict('Ki paidbah shnong ki la ia shim bynta')
22
+ print(result) # {'lang': 'kha', 'score': 0.9999}
23
+
24
+ # Top-3 predictions
25
+ results = model.predict('Ka sngi ka lieh', k=3)
26
+ print(results)
27
+
28
+ # List supported languages
29
+ model.languages()
30
+ ```
31
+
32
+ ## Supported Languages
33
+
34
+ | Code | Language | Accuracy |
35
+ |------|----------|----------|
36
+ | asm | Assamese | 100% |
37
+ | brx | Bodo | 99% |
38
+ | eng | English | 98% |
39
+ | grt | Garo | 100% |
40
+ | hin | Hindi | 97% |
41
+ | kha | Khasi | 99% |
42
+ | trp | Kokborok | 100% |
43
+ | mni | Meitei | 100% |
44
+ | lus | Mizo | 99% |
45
+ | nag | Nagamese | 100% |
46
+ | njz | Nyishi | 99% |
47
+
48
+ Overall test accuracy: **99.09%**
49
+
50
+ ## Links
51
+ - [HuggingFace](https://huggingface.co/MWirelabs/ne-lid)
52
+ - [MWire Labs](https://mwirelabs.com)
@@ -0,0 +1,4 @@
1
+ from .model import NELID, LANGUAGES
2
+
3
+ __version__ = "1.0.0"
4
+ __all__ = ["NELID", "LANGUAGES"]
@@ -0,0 +1,41 @@
1
+ import fasttext
2
+ import os
3
+ from huggingface_hub import hf_hub_download
4
+
5
+ MODEL_REPO = "MWirelabs/ne-lid"
6
+ MODEL_FILE = "ne_lid.bin"
7
+
8
+ LANGUAGES = {
9
+ "asm": "Assamese",
10
+ "brx": "Bodo",
11
+ "eng": "English",
12
+ "grt": "Garo",
13
+ "hin": "Hindi",
14
+ "kha": "Khasi",
15
+ "trp": "Kokborok",
16
+ "mni": "Meitei",
17
+ "lus": "Mizo",
18
+ "nag": "Nagamese (Naga)",
19
+ "njz": "Nyishi",
20
+ }
21
+
22
+ class NELID:
23
+ def __init__(self):
24
+ print("Loading NE-LID...")
25
+ model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
26
+ self.model = fasttext.load_model(model_path)
27
+ print("Ready.")
28
+
29
+ def predict(self, text: str, k: int = 1):
30
+ text = text.strip().replace("\n", " ")
31
+ labels, scores = self.model.predict(text, k=k)
32
+ results = [
33
+ {"lang": l.replace("__label__", ""), "score": round(float(s), 4)}
34
+ for l, s in zip(labels, scores)
35
+ ]
36
+ return results if k > 1 else results[0]
37
+
38
+ @staticmethod
39
+ def languages():
40
+ for code, name in LANGUAGES.items():
41
+ print(f" {code} {name}")
@@ -0,0 +1,68 @@
1
+ Metadata-Version: 2.4
2
+ Name: ne-lid
3
+ Version: 1.0.0
4
+ Summary: Language identification for Northeast Indian languages
5
+ Author-email: MWire Labs <connect@mwirelabs.com>
6
+ Project-URL: Homepage, https://mwirelabs.com
7
+ Project-URL: HuggingFace, https://huggingface.co/MWirelabs/ne-lid
8
+ Keywords: language-identification,nlp,northeast-india,low-resource,fasttext
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
11
+ Classifier: License :: Free To Use But Restricted
12
+ Requires-Python: >=3.8
13
+ Description-Content-Type: text/markdown
14
+ Requires-Dist: fasttext-wheel>=0.9.2
15
+ Requires-Dist: huggingface-hub>=0.20.0
16
+
17
+ # ne-lid
18
+
19
+ Language identification for Northeast Indian languages.
20
+
21
+ **11 languages · 99.09% accuracy · fastText · CC-BY-4.0**
22
+
23
+ ## Install
24
+
25
+ ```bash
26
+ pip install ne-lid
27
+ ```
28
+
29
+ ## Usage
30
+
31
+ ```python
32
+ from ne_lid import NELID
33
+
34
+ model = NELID() # downloads from HuggingFace on first run
35
+
36
+ # Single prediction
37
+ result = model.predict('Ki paidbah shnong ki la ia shim bynta')
38
+ print(result) # {'lang': 'kha', 'score': 0.9999}
39
+
40
+ # Top-3 predictions
41
+ results = model.predict('Ka sngi ka lieh', k=3)
42
+ print(results)
43
+
44
+ # List supported languages
45
+ model.languages()
46
+ ```
47
+
48
+ ## Supported Languages
49
+
50
+ | Code | Language | Accuracy |
51
+ |------|----------|----------|
52
+ | asm | Assamese | 100% |
53
+ | brx | Bodo | 99% |
54
+ | eng | English | 98% |
55
+ | grt | Garo | 100% |
56
+ | hin | Hindi | 97% |
57
+ | kha | Khasi | 99% |
58
+ | trp | Kokborok | 100% |
59
+ | mni | Meitei | 100% |
60
+ | lus | Mizo | 99% |
61
+ | nag | Nagamese | 100% |
62
+ | njz | Nyishi | 99% |
63
+
64
+ Overall test accuracy: **99.09%**
65
+
66
+ ## Links
67
+ - [HuggingFace](https://huggingface.co/MWirelabs/ne-lid)
68
+ - [MWire Labs](https://mwirelabs.com)
@@ -0,0 +1,9 @@
1
+ README.md
2
+ pyproject.toml
3
+ ne_lid/__init__.py
4
+ ne_lid/model.py
5
+ ne_lid.egg-info/PKG-INFO
6
+ ne_lid.egg-info/SOURCES.txt
7
+ ne_lid.egg-info/dependency_links.txt
8
+ ne_lid.egg-info/requires.txt
9
+ ne_lid.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ fasttext-wheel>=0.9.2
2
+ huggingface-hub>=0.20.0
@@ -0,0 +1 @@
1
+ ne_lid
@@ -0,0 +1,25 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "ne-lid"
7
+ version = "1.0.0"
8
+ description = "Language identification for Northeast Indian languages"
9
+ readme = "README.md"
10
+ authors = [{ name = "MWire Labs", email = "connect@mwirelabs.com" }]
11
+ keywords = ["language-identification", "nlp", "northeast-india", "low-resource", "fasttext"]
12
+ classifiers = [
13
+ "Programming Language :: Python :: 3",
14
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
15
+ "License :: Free To Use But Restricted",
16
+ ]
17
+ requires-python = ">=3.8"
18
+ dependencies = [
19
+ "fasttext-wheel>=0.9.2",
20
+ "huggingface-hub>=0.20.0",
21
+ ]
22
+
23
+ [project.urls]
24
+ Homepage = "https://mwirelabs.com"
25
+ HuggingFace = "https://huggingface.co/MWirelabs/ne-lid"
ne_lid-1.0.0/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+