ne-agent 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ne_agent/__init__.py +4 -0
- ne_agent/core.py +24 -0
- ne_agent/lid.py +9 -0
- ne_agent/llm.py +24 -0
- ne_agent/retriever.py +38 -0
- ne_agent/tui.py +69 -0
- ne_agent-0.1.1.dist-info/METADATA +77 -0
- ne_agent-0.1.1.dist-info/RECORD +10 -0
- ne_agent-0.1.1.dist-info/WHEEL +5 -0
- ne_agent-0.1.1.dist-info/top_level.txt +1 -0
ne_agent/__init__.py
ADDED
ne_agent/core.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from .lid import LanguageIdentifier
|
|
2
|
+
from .retriever import Retriever
|
|
3
|
+
from .llm import OllamaLLM
|
|
4
|
+
|
|
5
|
+
class NEAgent:
|
|
6
|
+
def __init__(self, model: str = "qwen2.5:1.5b"):
|
|
7
|
+
self.lid = LanguageIdentifier()
|
|
8
|
+
self.retriever = Retriever()
|
|
9
|
+
self.llm = OllamaLLM(model=model)
|
|
10
|
+
|
|
11
|
+
def load_corpus(self, corpus: dict, n: int = 500):
|
|
12
|
+
self.retriever.build(corpus, n=n)
|
|
13
|
+
|
|
14
|
+
def run(self, query: str) -> dict:
|
|
15
|
+
lang, score = self.lid.predict(query)
|
|
16
|
+
docs = self.retriever.retrieve(query, lang, top_k=3)
|
|
17
|
+
answer = self.llm.generate(query, docs)
|
|
18
|
+
return {
|
|
19
|
+
"query": query,
|
|
20
|
+
"detected_lang": lang,
|
|
21
|
+
"lang_score": score,
|
|
22
|
+
"retrieved": docs,
|
|
23
|
+
"answer": answer
|
|
24
|
+
}
|
ne_agent/lid.py
ADDED
ne_agent/llm.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
|
|
3
|
+
class OllamaLLM:
|
|
4
|
+
def __init__(self, model: str = "qwen2.5:1.5b", host: str = "http://127.0.0.1:11434"):
|
|
5
|
+
self.model = model
|
|
6
|
+
self.host = host
|
|
7
|
+
|
|
8
|
+
def generate(self, query: str, context_docs: list) -> str:
|
|
9
|
+
context = "\n".join([f"[{d['lang']}] {d['text']}" for d in context_docs])
|
|
10
|
+
prompt = f"""You are NE-Agent, an AI assistant for Northeast Indian languages.
|
|
11
|
+
Use the following retrieved context to answer the query.
|
|
12
|
+
|
|
13
|
+
Context:
|
|
14
|
+
{context}
|
|
15
|
+
|
|
16
|
+
Query: {query}
|
|
17
|
+
|
|
18
|
+
Answer:"""
|
|
19
|
+
response = requests.post(f"{self.host}/api/generate", json={
|
|
20
|
+
"model": self.model,
|
|
21
|
+
"prompt": prompt,
|
|
22
|
+
"stream": False
|
|
23
|
+
})
|
|
24
|
+
return response.json()["response"]
|
ne_agent/retriever.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import faiss
|
|
2
|
+
import numpy as np
|
|
3
|
+
from ne_embed import NEEmbed
|
|
4
|
+
|
|
5
|
+
class Retriever:
|
|
6
|
+
def __init__(self):
|
|
7
|
+
self.model = NEEmbed()
|
|
8
|
+
self.index = None
|
|
9
|
+
self.sentences = []
|
|
10
|
+
self.langs = []
|
|
11
|
+
|
|
12
|
+
def build(self, corpus: dict, n: int = 500):
|
|
13
|
+
import random
|
|
14
|
+
random.seed(42)
|
|
15
|
+
for lang, sents in corpus.items():
|
|
16
|
+
cleaned = [s.strip() for s in sents if isinstance(s, str) and len(s.strip()) > 10]
|
|
17
|
+
sampled = random.sample(cleaned, min(n, len(cleaned)))
|
|
18
|
+
self.sentences.extend(sampled)
|
|
19
|
+
self.langs.extend([lang] * len(sampled))
|
|
20
|
+
embeddings = self.model.encode(self.sentences, batch_size=64).astype(np.float32)
|
|
21
|
+
self.index = faiss.IndexFlatIP(embeddings.shape[1])
|
|
22
|
+
self.index.add(embeddings)
|
|
23
|
+
print(f"Index built: {self.index.ntotal} vectors")
|
|
24
|
+
|
|
25
|
+
def retrieve(self, query: str, lang: str, top_k: int = 3) -> list:
|
|
26
|
+
qe = self.model.encode([query]).astype(np.float32)
|
|
27
|
+
scores, indices = self.index.search(qe, 50)
|
|
28
|
+
results = []
|
|
29
|
+
for score, idx in zip(scores[0], indices[0]):
|
|
30
|
+
if self.langs[idx] == lang:
|
|
31
|
+
results.append({
|
|
32
|
+
"text": self.sentences[idx],
|
|
33
|
+
"lang": self.langs[idx],
|
|
34
|
+
"score": round(float(score), 4)
|
|
35
|
+
})
|
|
36
|
+
if len(results) == top_k:
|
|
37
|
+
break
|
|
38
|
+
return results
|
ne_agent/tui.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from rich.console import Console
|
|
3
|
+
from rich.text import Text
|
|
4
|
+
from rich.align import Align
|
|
5
|
+
from rich.panel import Panel
|
|
6
|
+
from rich.rule import Rule
|
|
7
|
+
|
|
8
|
+
console = Console()
|
|
9
|
+
|
|
10
|
+
GREETINGS = [
|
|
11
|
+
("Kumno ngi tip u ia u?", "khasi"),
|
|
12
|
+
("Angni manda?", "garo"),
|
|
13
|
+
("Eng nge nia?", "mizo"),
|
|
14
|
+
("Nongshitla?", "meitei"),
|
|
15
|
+
("Kemon acho?", "assamese"),
|
|
16
|
+
("Nangse ema?", "bodo"),
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
def load_banner() -> str:
|
|
20
|
+
try:
|
|
21
|
+
with open("assets/banner.txt") as f:
|
|
22
|
+
return f.read()
|
|
23
|
+
except:
|
|
24
|
+
return "NE-AGENT"
|
|
25
|
+
|
|
26
|
+
def boot_screen():
|
|
27
|
+
console.clear()
|
|
28
|
+
console.print(Align.center(Text(load_banner(), style="bold orange1")))
|
|
29
|
+
console.print(Align.center(Text("Speaks Northeast.", style="bold white")))
|
|
30
|
+
console.print()
|
|
31
|
+
for phrase, lang in GREETINGS:
|
|
32
|
+
line = Text()
|
|
33
|
+
line.append(f"{phrase}", style="white")
|
|
34
|
+
line.append(f" [{lang}]", style="bold blue")
|
|
35
|
+
console.print(Align.center(line))
|
|
36
|
+
time.sleep(0.2)
|
|
37
|
+
console.print()
|
|
38
|
+
console.print(Rule(style="orange1"))
|
|
39
|
+
console.print()
|
|
40
|
+
|
|
41
|
+
def print_status(index_size: int, model: str):
|
|
42
|
+
status = Text()
|
|
43
|
+
status.append("● NE-LID ", style="orange1")
|
|
44
|
+
status.append("● NE-Embed ", style="orange1")
|
|
45
|
+
status.append(f"● Ollama {model} ", style="orange1")
|
|
46
|
+
status.append(f"● FAISS index: {index_size} docs", style="orange1")
|
|
47
|
+
console.print(Align.center(status))
|
|
48
|
+
console.print()
|
|
49
|
+
console.print(Rule(style="orange1"))
|
|
50
|
+
console.print()
|
|
51
|
+
|
|
52
|
+
def print_result(result: dict):
|
|
53
|
+
lang_line = Text()
|
|
54
|
+
lang_line.append("LANG ", style="dim white")
|
|
55
|
+
lang_line.append(result["detected_lang"].upper(), style="bold blue")
|
|
56
|
+
lang_line.append(f" {result['lang_score']}", style="dim white")
|
|
57
|
+
console.print(lang_line)
|
|
58
|
+
console.print()
|
|
59
|
+
if result["retrieved"]:
|
|
60
|
+
console.print(Text("RETRIEVED", style="dim orange1"))
|
|
61
|
+
for d in result["retrieved"]:
|
|
62
|
+
console.print(Text(f" · {d['text'][:70]}", style="dim white"))
|
|
63
|
+
console.print()
|
|
64
|
+
console.print(Panel(
|
|
65
|
+
Text(result["answer"], style="white"),
|
|
66
|
+
border_style="orange1",
|
|
67
|
+
title=Text("NE-AGENT", style="bold orange1"),
|
|
68
|
+
))
|
|
69
|
+
console.print()
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ne-agent
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: First open-source AI agent for Northeast Indian languages
|
|
5
|
+
Home-page: https://github.com/MWirelabs/ne-agent
|
|
6
|
+
Author: MWire Labs
|
|
7
|
+
Author-email: connect@mwirelabs.com
|
|
8
|
+
License: CC-BY-4.0
|
|
9
|
+
Requires-Python: >=3.9
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
Requires-Dist: ne-lid
|
|
12
|
+
Requires-Dist: ne-embed
|
|
13
|
+
Requires-Dist: faiss-cpu
|
|
14
|
+
Requires-Dist: rich
|
|
15
|
+
Requires-Dist: requests
|
|
16
|
+
Requires-Dist: pandas
|
|
17
|
+
Requires-Dist: openpyxl
|
|
18
|
+
Dynamic: author
|
|
19
|
+
Dynamic: author-email
|
|
20
|
+
Dynamic: description
|
|
21
|
+
Dynamic: description-content-type
|
|
22
|
+
Dynamic: home-page
|
|
23
|
+
Dynamic: license
|
|
24
|
+
Dynamic: requires-dist
|
|
25
|
+
Dynamic: requires-python
|
|
26
|
+
Dynamic: summary
|
|
27
|
+
|
|
28
|
+
# NE-Agent
|
|
29
|
+
|
|
30
|
+
**First open-source AI agent for Northeast Indian languages.**
|
|
31
|
+
|
|
32
|
+
Built on the NE-Stack by [MWire Labs](https://mwirelabs.com), Shillong.
|
|
33
|
+
|
|
34
|
+
---
|
|
35
|
+
|
|
36
|
+
## What is NE-Agent?
|
|
37
|
+
|
|
38
|
+
NE-Agent is a terminal-based agentic assistant that speaks Northeast India's indigenous languages. It automatically detects the input language, retrieves relevant context from a multilingual corpus, and generates responses — all running locally with no API keys required.
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
## Powered by NE-Stack
|
|
43
|
+
|
|
44
|
+
| Component | Role | Model |
|
|
45
|
+
|---|---|---|
|
|
46
|
+
| NE-LID | Language identification | fastText, 11 languages, 99.09% accuracy |
|
|
47
|
+
| NE-Embed | Multilingual embeddings | LaBSE fine-tuned, 768-dim |
|
|
48
|
+
| FAISS | Vector retrieval | IndexFlatIP |
|
|
49
|
+
| Ollama | Local LLM | qwen2.5:1.5b (default) |
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
## Supported Languages
|
|
54
|
+
|
|
55
|
+
Assamese · Khasi · Garo · Mizo · Meitei · Bodo · Kokborok · Nyishi · Nagamese · English
|
|
56
|
+
|
|
57
|
+
---
|
|
58
|
+
|
|
59
|
+
## Quickstart
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
pip install ne-agent
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
Add your corpus to `data/` and run:
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
python run.py
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
Type in any Northeast Indian language. Type `exit` or `:q` to quit.
|
|
72
|
+
|
|
73
|
+
---
|
|
74
|
+
|
|
75
|
+
## License
|
|
76
|
+
|
|
77
|
+
CC-BY-4.0 — MWire Labs, Shillong, Meghalaya.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
ne_agent/__init__.py,sha256=J9k624HHKv0Co_yoUohRV-skRq0iKobgdylU8bGtfK0,71
|
|
2
|
+
ne_agent/core.py,sha256=w2ON_OrXv7gKLcuGn4q_piXmL_1RTvYYKUY85q65Osc,760
|
|
3
|
+
ne_agent/lid.py,sha256=Y-s61pm7NDoFt_mvlKyZCxeYDGKf0gqcoGaiGQyuPsk,260
|
|
4
|
+
ne_agent/llm.py,sha256=0FnO19WQol_DrBHrXT8v0ik55oQH-yZJtQz1jaR54eQ,729
|
|
5
|
+
ne_agent/retriever.py,sha256=ApIUw_yp9JB-57KqrNmUtOe-vHPme8LBqP71kFWewdw,1431
|
|
6
|
+
ne_agent/tui.py,sha256=G8Qrv1aCyn7YgrSFw-gugDQj_N7K1mQ3b0mt_lvkQiY,2189
|
|
7
|
+
ne_agent-0.1.1.dist-info/METADATA,sha256=LlXXNpTio7hSiRvBIwkTC6WMr12kGW4Slt2orB7FdAA,1800
|
|
8
|
+
ne_agent-0.1.1.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
9
|
+
ne_agent-0.1.1.dist-info/top_level.txt,sha256=tMA-RwskDWoaeDi6UZ2lQwspa1G47GVH2BrjUSdntn8,9
|
|
10
|
+
ne_agent-0.1.1.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
ne_agent
|