nlp-menu 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nlp_menu-1.0.0/PKG-INFO +87 -0
- nlp_menu-1.0.0/README.md +60 -0
- nlp_menu-1.0.0/nlp_menu/__init__.py +3 -0
- nlp_menu-1.0.0/nlp_menu/cli.py +65 -0
- nlp_menu-1.0.0/nlp_menu/program1.py +31 -0
- nlp_menu-1.0.0/nlp_menu/program10.py +53 -0
- nlp_menu-1.0.0/nlp_menu/program2.py +24 -0
- nlp_menu-1.0.0/nlp_menu/program3.py +63 -0
- nlp_menu-1.0.0/nlp_menu/program4.py +76 -0
- nlp_menu-1.0.0/nlp_menu/program5.py +36 -0
- nlp_menu-1.0.0/nlp_menu/program6.py +17 -0
- nlp_menu-1.0.0/nlp_menu/program7.py +15 -0
- nlp_menu-1.0.0/nlp_menu/program8.py +22 -0
- nlp_menu-1.0.0/nlp_menu/program9.py +43 -0
- nlp_menu-1.0.0/nlp_menu.egg-info/PKG-INFO +87 -0
- nlp_menu-1.0.0/nlp_menu.egg-info/SOURCES.txt +20 -0
- nlp_menu-1.0.0/nlp_menu.egg-info/dependency_links.txt +1 -0
- nlp_menu-1.0.0/nlp_menu.egg-info/entry_points.txt +2 -0
- nlp_menu-1.0.0/nlp_menu.egg-info/requires.txt +14 -0
- nlp_menu-1.0.0/nlp_menu.egg-info/top_level.txt +1 -0
- nlp_menu-1.0.0/pyproject.toml +41 -0
- nlp_menu-1.0.0/setup.cfg +4 -0
nlp_menu-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: nlp-menu
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: A collection of 10 NLP programs accessible via an interactive CLI menu
|
|
5
|
+
License: MIT
|
|
6
|
+
Keywords: nlp,word2vec,glove,transformers,cohere,spacy,menu
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
11
|
+
Requires-Python: >=3.8
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
Requires-Dist: gensim
|
|
14
|
+
Requires-Dist: matplotlib
|
|
15
|
+
Requires-Dist: scikit-learn
|
|
16
|
+
Requires-Dist: nltk
|
|
17
|
+
Requires-Dist: cohere
|
|
18
|
+
Requires-Dist: transformers
|
|
19
|
+
Requires-Dist: torch
|
|
20
|
+
Requires-Dist: langchain
|
|
21
|
+
Requires-Dist: langchain-community
|
|
22
|
+
Requires-Dist: langchain-cohere
|
|
23
|
+
Requires-Dist: langchain-core
|
|
24
|
+
Requires-Dist: pydantic
|
|
25
|
+
Requires-Dist: wikipedia-api
|
|
26
|
+
Requires-Dist: spacy
|
|
27
|
+
|
|
28
|
+
# nlp-menu
|
|
29
|
+
|
|
30
|
+
An interactive CLI menu for 10 NLP programs covering word embeddings, sentiment analysis, summarization, and more.
|
|
31
|
+
|
|
32
|
+
## Installation
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install nlp-menu
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
After installing the spaCy model (required for Program 10):
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
python -m spacy download en_core_web_sm
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Usage
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
nlp-menu
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
This launches an interactive menu:
|
|
51
|
+
|
|
52
|
+
```
|
|
53
|
+
╔══════════════════════════════════════════════════════╗
|
|
54
|
+
║ 🧠 NLP PROGRAMS MENU 🧠 ║
|
|
55
|
+
╠══════════════════════════════════════════════════════╣
|
|
56
|
+
║ 1. GloVe Word Analogies (King-Man+Woman...) ║
|
|
57
|
+
║ 2. Word Embedding PCA Visualization (Sports) ║
|
|
58
|
+
║ 3. Word2Vec on Medical Corpus + PCA Plot ║
|
|
59
|
+
║ 4. Prompt Enrichment with GloVe + Cohere ║
|
|
60
|
+
║ 5. Paragraph Generator with GloVe + Cohere ║
|
|
61
|
+
║ 6. Sentiment Analysis (Transformers) ║
|
|
62
|
+
║ 7. Text Summarization (Pegasus) ║
|
|
63
|
+
║ 8. LangChain + Cohere File Summarizer ║
|
|
64
|
+
║ 9. Institution Details via Wikipedia + Pydantic ║
|
|
65
|
+
║ 10. IPC Chatbot (spaCy NER) ║
|
|
66
|
+
║ 0. Exit ║
|
|
67
|
+
╚══════════════════════════════════════════════════════╝
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Programs Overview
|
|
71
|
+
|
|
72
|
+
| # | Program | Libraries Used |
|
|
73
|
+
|---|---------|---------------|
|
|
74
|
+
| 1 | GloVe Word Analogies | gensim |
|
|
75
|
+
| 2 | Word Embedding PCA Visualization | gensim, matplotlib, scikit-learn |
|
|
76
|
+
| 3 | Word2Vec Medical Corpus | gensim, nltk, matplotlib |
|
|
77
|
+
| 4 | Prompt Enrichment | gensim, nltk, cohere |
|
|
78
|
+
| 5 | Paragraph Generator | gensim, cohere |
|
|
79
|
+
| 6 | Sentiment Analysis | transformers |
|
|
80
|
+
| 7 | Text Summarization | transformers |
|
|
81
|
+
| 8 | LangChain Summarizer | langchain, cohere |
|
|
82
|
+
| 9 | Institution Scraper | wikipedia-api, pydantic |
|
|
83
|
+
| 10 | IPC Chatbot | spacy |
|
|
84
|
+
|
|
85
|
+
## License
|
|
86
|
+
|
|
87
|
+
MIT
|
nlp_menu-1.0.0/README.md
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# nlp-menu
|
|
2
|
+
|
|
3
|
+
An interactive CLI menu for 10 NLP programs covering word embeddings, sentiment analysis, summarization, and more.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install nlp-menu
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
After installing the spaCy model (required for Program 10):
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
python -m spacy download en_core_web_sm
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Usage
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
nlp-menu
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
This launches an interactive menu:
|
|
24
|
+
|
|
25
|
+
```
|
|
26
|
+
╔══════════════════════════════════════════════════════╗
|
|
27
|
+
║ 🧠 NLP PROGRAMS MENU 🧠 ║
|
|
28
|
+
╠══════════════════════════════════════════════════════╣
|
|
29
|
+
║ 1. GloVe Word Analogies (King-Man+Woman...) ║
|
|
30
|
+
║ 2. Word Embedding PCA Visualization (Sports) ║
|
|
31
|
+
║ 3. Word2Vec on Medical Corpus + PCA Plot ║
|
|
32
|
+
║ 4. Prompt Enrichment with GloVe + Cohere ║
|
|
33
|
+
║ 5. Paragraph Generator with GloVe + Cohere ║
|
|
34
|
+
║ 6. Sentiment Analysis (Transformers) ║
|
|
35
|
+
║ 7. Text Summarization (Pegasus) ║
|
|
36
|
+
║ 8. LangChain + Cohere File Summarizer ║
|
|
37
|
+
║ 9. Institution Details via Wikipedia + Pydantic ║
|
|
38
|
+
║ 10. IPC Chatbot (spaCy NER) ║
|
|
39
|
+
║ 0. Exit ║
|
|
40
|
+
╚══════════════════════════════════════════════════════╝
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Programs Overview
|
|
44
|
+
|
|
45
|
+
| # | Program | Libraries Used |
|
|
46
|
+
|---|---------|---------------|
|
|
47
|
+
| 1 | GloVe Word Analogies | gensim |
|
|
48
|
+
| 2 | Word Embedding PCA Visualization | gensim, matplotlib, scikit-learn |
|
|
49
|
+
| 3 | Word2Vec Medical Corpus | gensim, nltk, matplotlib |
|
|
50
|
+
| 4 | Prompt Enrichment | gensim, nltk, cohere |
|
|
51
|
+
| 5 | Paragraph Generator | gensim, cohere |
|
|
52
|
+
| 6 | Sentiment Analysis | transformers |
|
|
53
|
+
| 7 | Text Summarization | transformers |
|
|
54
|
+
| 8 | LangChain Summarizer | langchain, cohere |
|
|
55
|
+
| 9 | Institution Scraper | wikipedia-api, pydantic |
|
|
56
|
+
| 10 | IPC Chatbot | spacy |
|
|
57
|
+
|
|
58
|
+
## License
|
|
59
|
+
|
|
60
|
+
MIT
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""NLP Menu - Interactive CLI menu for 10 NLP programs."""
|
|
2
|
+
|
|
3
|
+
MENU = """
|
|
4
|
+
╔══════════════════════════════════════════════════════╗
|
|
5
|
+
║ 🧠 NLP PROGRAMS MENU 🧠 ║
|
|
6
|
+
╠══════════════════════════════════════════════════════╣
|
|
7
|
+
║ 1. GloVe Word Analogies (King-Man+Woman...) ║
|
|
8
|
+
║ 2. Word Embedding PCA Visualization (Sports) ║
|
|
9
|
+
║ 3. Word2Vec on Medical Corpus + PCA Plot ║
|
|
10
|
+
║ 4. Prompt Enrichment with GloVe + Cohere ║
|
|
11
|
+
║ 5. Paragraph Generator with GloVe + Cohere ║
|
|
12
|
+
║ 6. Sentiment Analysis (Transformers) ║
|
|
13
|
+
║ 7. Text Summarization (Pegasus) ║
|
|
14
|
+
║ 8. LangChain + Cohere File Summarizer ║
|
|
15
|
+
║ 9. Institution Details via Wikipedia + Pydantic ║
|
|
16
|
+
║ 10. IPC Chatbot (spaCy NER) ║
|
|
17
|
+
║ 0. Exit ║
|
|
18
|
+
╚══════════════════════════════════════════════════════╝
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def main():
|
|
22
|
+
while True:
|
|
23
|
+
print(MENU)
|
|
24
|
+
choice = input("Enter program number (0 to exit): ").strip()
|
|
25
|
+
|
|
26
|
+
if choice == "0":
|
|
27
|
+
print("👋 Goodbye!")
|
|
28
|
+
break
|
|
29
|
+
elif choice == "1":
|
|
30
|
+
from nlp_menu import program1
|
|
31
|
+
program1.run()
|
|
32
|
+
elif choice == "2":
|
|
33
|
+
from nlp_menu import program2
|
|
34
|
+
program2.run()
|
|
35
|
+
elif choice == "3":
|
|
36
|
+
from nlp_menu import program3
|
|
37
|
+
program3.run()
|
|
38
|
+
elif choice == "4":
|
|
39
|
+
from nlp_menu import program4
|
|
40
|
+
program4.run()
|
|
41
|
+
elif choice == "5":
|
|
42
|
+
from nlp_menu import program5
|
|
43
|
+
program5.run()
|
|
44
|
+
elif choice == "6":
|
|
45
|
+
from nlp_menu import program6
|
|
46
|
+
program6.run()
|
|
47
|
+
elif choice == "7":
|
|
48
|
+
from nlp_menu import program7
|
|
49
|
+
program7.run()
|
|
50
|
+
elif choice == "8":
|
|
51
|
+
from nlp_menu import program8
|
|
52
|
+
program8.run()
|
|
53
|
+
elif choice == "9":
|
|
54
|
+
from nlp_menu import program9
|
|
55
|
+
program9.run()
|
|
56
|
+
elif choice == "10":
|
|
57
|
+
from nlp_menu import program10
|
|
58
|
+
program10.run()
|
|
59
|
+
else:
|
|
60
|
+
print("❌ Invalid choice. Please enter a number between 0 and 10.")
|
|
61
|
+
|
|
62
|
+
input("\n[Press Enter to return to menu...]")
|
|
63
|
+
|
|
64
|
+
if __name__ == "__main__":
|
|
65
|
+
main()
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import gensim.downloader as api
|
|
2
|
+
|
|
3
|
+
def run():
|
|
4
|
+
# Load GloVe model
|
|
5
|
+
model = api.load("glove-wiki-gigaword-50")
|
|
6
|
+
|
|
7
|
+
# Example 1: King - Man + Woman
|
|
8
|
+
result = model.most_similar(
|
|
9
|
+
positive=['king', 'woman'],
|
|
10
|
+
negative=['man'],
|
|
11
|
+
topn=1
|
|
12
|
+
)
|
|
13
|
+
print("King - Man + Woman =", result[0][0])
|
|
14
|
+
print("Similarity:", result[0][1])
|
|
15
|
+
|
|
16
|
+
# Example 2: Paris - France + Italy
|
|
17
|
+
result = model.most_similar(
|
|
18
|
+
positive=['paris', 'italy'],
|
|
19
|
+
negative=['france'],
|
|
20
|
+
topn=1
|
|
21
|
+
)
|
|
22
|
+
print("Paris - France + Italy =", result[0][0])
|
|
23
|
+
print("Similarity:", result[0][1])
|
|
24
|
+
|
|
25
|
+
# Example 3: Similar words to programming
|
|
26
|
+
result = model.most_similar(
|
|
27
|
+
positive=['programming'],
|
|
28
|
+
topn=5
|
|
29
|
+
)
|
|
30
|
+
for word, similarity in result:
|
|
31
|
+
print(word, similarity)
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import spacy
|
|
2
|
+
|
|
3
|
+
IPC_SECTIONS = {
|
|
4
|
+
"302": "Section 302 IPC: Punishment for murder. Whoever commits murder shall be punished with death, or imprisonment for life, and shall also be liable to fine.",
|
|
5
|
+
"375": "Section 375 IPC: Rape. This section defines what constitutes rape under Indian law.",
|
|
6
|
+
"420": "Section 420 IPC: Cheating and dishonestly inducing delivery of property.",
|
|
7
|
+
"376": "Section 376 IPC: Punishment for rape. Rigorous imprisonment of not less than 10 years, which may extend to life imprisonment, and a fine.",
|
|
8
|
+
"124a": "Section 124A IPC: Sedition – whoever, by words or signs, brings or attempts to bring hatred or contempt against the Government shall be punished."
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
def run():
|
|
12
|
+
nlp = spacy.load("en_core_web_sm")
|
|
13
|
+
|
|
14
|
+
def extract_ipc_section(user_input):
|
|
15
|
+
doc = nlp(user_input)
|
|
16
|
+
for i, token in enumerate(doc):
|
|
17
|
+
if token.like_num and token.text in IPC_SECTIONS:
|
|
18
|
+
return token.text
|
|
19
|
+
if "section" in token.text.lower():
|
|
20
|
+
if i + 1 < len(doc):
|
|
21
|
+
next_token = doc[i + 1]
|
|
22
|
+
if next_token.text in IPC_SECTIONS:
|
|
23
|
+
return next_token.text
|
|
24
|
+
return None
|
|
25
|
+
|
|
26
|
+
def detect_intent(user_input):
|
|
27
|
+
if "section" in user_input.lower() or any(sec in user_input for sec in IPC_SECTIONS.keys()):
|
|
28
|
+
return "ipc_query"
|
|
29
|
+
elif "hello" in user_input.lower() or "hi" in user_input.lower():
|
|
30
|
+
return "greeting"
|
|
31
|
+
else:
|
|
32
|
+
return "general"
|
|
33
|
+
|
|
34
|
+
print("IPC Chatbot: Hello! Ask me about any section of the Indian Penal Code (e.g., 'What is Section 302?').")
|
|
35
|
+
|
|
36
|
+
while True:
|
|
37
|
+
user_input = input("You: ")
|
|
38
|
+
if user_input.lower() in ['exit', 'quit', 'bye']:
|
|
39
|
+
print("IPC Chatbot: Goodbye!")
|
|
40
|
+
break
|
|
41
|
+
|
|
42
|
+
intent = detect_intent(user_input)
|
|
43
|
+
|
|
44
|
+
if intent == "ipc_query":
|
|
45
|
+
section = extract_ipc_section(user_input)
|
|
46
|
+
if section:
|
|
47
|
+
print(f"IPC Chatbot: {IPC_SECTIONS[section]}")
|
|
48
|
+
else:
|
|
49
|
+
print("IPC Chatbot: Sorry, I couldn't identify the IPC section. Please specify like 'Section 302'.")
|
|
50
|
+
elif intent == "greeting":
|
|
51
|
+
print("IPC Chatbot: Hello! Ask me about any IPC section.")
|
|
52
|
+
else:
|
|
53
|
+
print("IPC Chatbot: I can help you with information about the Indian Penal Code. Try asking about a section like 'What is Section 420?'")
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import matplotlib.pyplot as plt
|
|
2
|
+
import gensim.downloader as api
|
|
3
|
+
from sklearn.decomposition import PCA
|
|
4
|
+
|
|
5
|
+
def run():
|
|
6
|
+
model = api.load("glove-wiki-gigaword-50")
|
|
7
|
+
|
|
8
|
+
words = ["football", "cricket", "volleyball", "kabaddi", "basketball"]
|
|
9
|
+
embeddings = [model[word] for word in words]
|
|
10
|
+
|
|
11
|
+
pca = PCA(n_components=2)
|
|
12
|
+
reduced = pca.fit_transform(embeddings)
|
|
13
|
+
|
|
14
|
+
plt.figure(figsize=(12, 8))
|
|
15
|
+
for i, word in enumerate(words):
|
|
16
|
+
x, y = reduced[i]
|
|
17
|
+
plt.scatter(x, y)
|
|
18
|
+
plt.text(x + 0.02, y + 0.02, word)
|
|
19
|
+
|
|
20
|
+
plt.title("2D Visualization of Word Embeddings")
|
|
21
|
+
plt.xlabel("PCA Component 1")
|
|
22
|
+
plt.ylabel("PCA Component 2")
|
|
23
|
+
plt.grid(True)
|
|
24
|
+
plt.show()
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import nltk
|
|
2
|
+
import string
|
|
3
|
+
from nltk.tokenize import word_tokenize
|
|
4
|
+
from nltk.corpus import stopwords
|
|
5
|
+
from gensim.models import Word2Vec
|
|
6
|
+
import matplotlib.pyplot as plt
|
|
7
|
+
from sklearn.decomposition import PCA
|
|
8
|
+
|
|
9
|
+
def run():
|
|
10
|
+
medical_corpus = [
|
|
11
|
+
"Diabetes is a chronic disease that affects the way the body process blood sugar.",
|
|
12
|
+
"Hypertension, or high blood pressure, can lead to heart disease and stroke.",
|
|
13
|
+
"The patient was diagnosed with pneumonia and prescribed antibiotics.",
|
|
14
|
+
"Insulin therapy is commonly used for type 1 diabetes patient.",
|
|
15
|
+
"Cardiovascular diseases are leading cause of death worldwide."
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
nltk.download('punkt', quiet=True)
|
|
19
|
+
nltk.download('stopwords', quiet=True)
|
|
20
|
+
|
|
21
|
+
stop_words = set(stopwords.words("english"))
|
|
22
|
+
processed_corpus = []
|
|
23
|
+
|
|
24
|
+
for sentence in medical_corpus:
|
|
25
|
+
tokens = word_tokenize(sentence.lower())
|
|
26
|
+
tokens = [word for word in tokens if word not in stop_words and word not in string.punctuation]
|
|
27
|
+
processed_corpus.append(tokens)
|
|
28
|
+
|
|
29
|
+
print("Processed corpus:", processed_corpus)
|
|
30
|
+
|
|
31
|
+
word2vec_model = Word2Vec(
|
|
32
|
+
sentences=processed_corpus,
|
|
33
|
+
vector_size=100,
|
|
34
|
+
window=5,
|
|
35
|
+
min_count=1,
|
|
36
|
+
workers=4
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
word2vec_model.save("medical_word2vec.model")
|
|
40
|
+
word2vec_model = Word2Vec.load("medical_word2vec.model")
|
|
41
|
+
|
|
42
|
+
print("Vocabulary:", list(word2vec_model.wv.index_to_key))
|
|
43
|
+
print("Similar words to 'diabetes':", word2vec_model.wv.most_similar("diabetes"))
|
|
44
|
+
|
|
45
|
+
result = word2vec_model.wv.most_similar(
|
|
46
|
+
positive=['hypertension', 'heart'],
|
|
47
|
+
negative=['stroke']
|
|
48
|
+
)
|
|
49
|
+
print("Word vector arithmetic result:", result)
|
|
50
|
+
|
|
51
|
+
words = list(word2vec_model.wv.index_to_key)
|
|
52
|
+
word_vectors = [word2vec_model.wv[word] for word in words]
|
|
53
|
+
|
|
54
|
+
pca = PCA(n_components=2)
|
|
55
|
+
reduced_vectors = pca.fit_transform(word_vectors)
|
|
56
|
+
|
|
57
|
+
plt.figure(figsize=(8, 6))
|
|
58
|
+
for i, word in enumerate(words):
|
|
59
|
+
plt.scatter(reduced_vectors[i, 0], reduced_vectors[i, 1])
|
|
60
|
+
plt.annotate(word, (reduced_vectors[i, 0], reduced_vectors[i, 1]))
|
|
61
|
+
|
|
62
|
+
plt.title("Word Embedding Visualization (PCA)")
|
|
63
|
+
plt.show()
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import ssl
|
|
2
|
+
import nltk
|
|
3
|
+
import string
|
|
4
|
+
import cohere
|
|
5
|
+
from gensim.downloader import load
|
|
6
|
+
from nltk.tokenize import word_tokenize
|
|
7
|
+
from nltk.corpus import stopwords
|
|
8
|
+
|
|
9
|
+
def run():
|
|
10
|
+
ssl._create_default_https_context = ssl._create_unverified_context
|
|
11
|
+
nltk.download('punkt', quiet=True)
|
|
12
|
+
nltk.download('averaged_perceptron_tagger', quiet=True)
|
|
13
|
+
nltk.download('stopwords', quiet=True)
|
|
14
|
+
|
|
15
|
+
print("Loading GloVe model...")
|
|
16
|
+
embedding_model = load("glove-wiki-gigaword-50")
|
|
17
|
+
print("GloVe loaded!")
|
|
18
|
+
|
|
19
|
+
def clean_text(text):
|
|
20
|
+
text = text.lower()
|
|
21
|
+
text = text.translate(str.maketrans(' ', ' ', string.punctuation))
|
|
22
|
+
return text
|
|
23
|
+
|
|
24
|
+
def extract_keywords(text, top_pos_tags=['NN', 'NNS', 'JJ', 'VB']):
|
|
25
|
+
tokens = word_tokenize(text)
|
|
26
|
+
tokens = [t for t in tokens if t not in stopwords.words('english')]
|
|
27
|
+
pos_tags = nltk.pos_tag(tokens)
|
|
28
|
+
keywords = [word for word, tag in pos_tags if tag in top_pos_tags]
|
|
29
|
+
return list(set(keywords))
|
|
30
|
+
|
|
31
|
+
def get_similar_words(word, top_n=3):
|
|
32
|
+
try:
|
|
33
|
+
return [w for w, _ in embedding_model.most_similar(word, topn=top_n)]
|
|
34
|
+
except KeyError:
|
|
35
|
+
return []
|
|
36
|
+
|
|
37
|
+
def enrich_prompt(prompt, top_n=3):
|
|
38
|
+
cleaned_prompt = clean_text(prompt)
|
|
39
|
+
keywords = extract_keywords(cleaned_prompt)
|
|
40
|
+
enriched_prompt = prompt
|
|
41
|
+
for word in keywords:
|
|
42
|
+
similar_words = get_similar_words(word, top_n)
|
|
43
|
+
if similar_words:
|
|
44
|
+
enriched_prompt += " " + " ".join(similar_words)
|
|
45
|
+
return enriched_prompt
|
|
46
|
+
|
|
47
|
+
api_key = input("Enter your Cohere API key: ")
|
|
48
|
+
co = cohere.Client(api_key)
|
|
49
|
+
|
|
50
|
+
original_prompt = input("Enter your prompt: ")
|
|
51
|
+
enriched_prompt = enrich_prompt(original_prompt)
|
|
52
|
+
|
|
53
|
+
print("\n Original Prompt:\n", original_prompt)
|
|
54
|
+
print("\n Enriched Prompt:\n", enriched_prompt)
|
|
55
|
+
|
|
56
|
+
def generate_response(prompt_text, creative=True):
|
|
57
|
+
if creative:
|
|
58
|
+
prompt_text = "write a detailed and imaginative story based on following prompt:\n" + prompt_text
|
|
59
|
+
response = co.chat(model="command-a-03-2025", message=prompt_text)
|
|
60
|
+
return response.text
|
|
61
|
+
|
|
62
|
+
response_original = generate_response(original_prompt, creative=True)
|
|
63
|
+
response_enriched = generate_response(enriched_prompt, creative=True)
|
|
64
|
+
|
|
65
|
+
print("\n\n ===Response to original prompt\n")
|
|
66
|
+
print(response_original)
|
|
67
|
+
print("\n\n ===Response to enriched prompt\n")
|
|
68
|
+
print(response_enriched)
|
|
69
|
+
|
|
70
|
+
print("\n\n===Comparison===\n")
|
|
71
|
+
print("""
|
|
72
|
+
1. Detail: Enriched prompt should produce longer, more descriptive output.
|
|
73
|
+
2. Vocabulary: Enriched prompt uses richer wording from similar words.
|
|
74
|
+
3. Context relevance: Enriched prompt guides the AI to cover more specific concepts.
|
|
75
|
+
4. Creativity: Enriched prompt usually results in more imaginative storytelling.
|
|
76
|
+
""")
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import cohere
|
|
2
|
+
from gensim.downloader import load
|
|
3
|
+
|
|
4
|
+
def run():
|
|
5
|
+
print("Loading pre-trained GloVe model...")
|
|
6
|
+
model = load("glove-wiki-gigaword-50")
|
|
7
|
+
print("Model loaded successfully!")
|
|
8
|
+
|
|
9
|
+
api_key = input("Enter your Cohere API key: ")
|
|
10
|
+
co = cohere.Client(api_key)
|
|
11
|
+
|
|
12
|
+
def generate_para(seed_word, words):
|
|
13
|
+
words_str = ", ".join(words)
|
|
14
|
+
prompt = (
|
|
15
|
+
f"Write a meaningful and well structured paragraph about '{seed_word}'."
|
|
16
|
+
f" Use these related words naturally: {words_str}."
|
|
17
|
+
)
|
|
18
|
+
try:
|
|
19
|
+
response = co.chat(model='command-a-03-2025', message=prompt)
|
|
20
|
+
return response.text.strip()
|
|
21
|
+
except Exception as e:
|
|
22
|
+
return f"Cohere API error: {e}"
|
|
23
|
+
|
|
24
|
+
seed_word = input("Enter a seed word: ").lower()
|
|
25
|
+
|
|
26
|
+
if seed_word not in model.key_to_index:
|
|
27
|
+
print("Word not in vocabulary.")
|
|
28
|
+
else:
|
|
29
|
+
similar_words = model.most_similar(seed_word, topn=10)
|
|
30
|
+
words = [word for word, score in similar_words]
|
|
31
|
+
print("10 similar words found:")
|
|
32
|
+
print(", ".join(words), "\n")
|
|
33
|
+
|
|
34
|
+
paragraph = generate_para(seed_word, words)
|
|
35
|
+
print("Generated paragraph:\n")
|
|
36
|
+
print(paragraph)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from transformers import pipeline
|
|
2
|
+
|
|
3
|
+
def run():
|
|
4
|
+
sentiment_analyzer = pipeline("sentiment-analysis")
|
|
5
|
+
|
|
6
|
+
def analyze_sentiment(text):
|
|
7
|
+
result = sentiment_analyzer(text)
|
|
8
|
+
label = result[0]["label"]
|
|
9
|
+
score = result[0]["score"]
|
|
10
|
+
return f"Sentiment: {label} (Confidence: {score:.2f})"
|
|
11
|
+
|
|
12
|
+
while True:
|
|
13
|
+
user_input = input("Enter sentence (or 'exit'): ").strip()
|
|
14
|
+
if user_input.lower() == "exit":
|
|
15
|
+
print("Exiting...")
|
|
16
|
+
break
|
|
17
|
+
print(analyze_sentiment(user_input))
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from transformers import pipeline
|
|
2
|
+
|
|
3
|
+
def run():
|
|
4
|
+
print("Loading summarization model...")
|
|
5
|
+
summarizer = pipeline("summarization", model="google/pegasus-arxiv")
|
|
6
|
+
|
|
7
|
+
text = input("\nEnter the text you want to summarize:\n").strip()
|
|
8
|
+
|
|
9
|
+
if len(text.split()) < 30:
|
|
10
|
+
print("Text too short to summarize.")
|
|
11
|
+
else:
|
|
12
|
+
text = "Summarize this text in simple words:\n" + text
|
|
13
|
+
summary = summarizer(text, max_length=60, min_length=25, do_sample=False)
|
|
14
|
+
print("\nSummarized Text:")
|
|
15
|
+
print(summary[0]['summary_text'])
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from langchain_core.prompts import PromptTemplate
|
|
2
|
+
from langchain_cohere import ChatCohere
|
|
3
|
+
|
|
4
|
+
def run():
|
|
5
|
+
api_key = input("Enter your Cohere API key: ").strip()
|
|
6
|
+
file_path = input("Enter the path to your text file: ").strip()
|
|
7
|
+
|
|
8
|
+
with open(file_path, "r", encoding="utf-8") as file:
|
|
9
|
+
text = file.read()
|
|
10
|
+
|
|
11
|
+
print("File loaded successfully!")
|
|
12
|
+
|
|
13
|
+
prompt = PromptTemplate(
|
|
14
|
+
input_variables=["text"],
|
|
15
|
+
template="Summarize the following text in simple words:\n\n{text}"
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
llm = ChatCohere(cohere_api_key=api_key)
|
|
19
|
+
result = llm.invoke(prompt.format(text=text))
|
|
20
|
+
|
|
21
|
+
print("\nSummary:\n")
|
|
22
|
+
print(result.content)
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from pydantic import BaseModel
|
|
2
|
+
import wikipediaapi
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
class InstitutionDetails(BaseModel):
|
|
6
|
+
name: str
|
|
7
|
+
founder: str
|
|
8
|
+
founded_year: str
|
|
9
|
+
branches: str
|
|
10
|
+
employees: str
|
|
11
|
+
summary: str
|
|
12
|
+
|
|
13
|
+
def run():
|
|
14
|
+
def fetch_details(name):
|
|
15
|
+
wiki = wikipediaapi.Wikipedia(
|
|
16
|
+
user_agent="InstitutionScraper/1.0 (student project; contact: your_email@example.com)",
|
|
17
|
+
language="en"
|
|
18
|
+
)
|
|
19
|
+
page = wiki.page(name)
|
|
20
|
+
|
|
21
|
+
if not page.exists():
|
|
22
|
+
raise ValueError("Institution not found")
|
|
23
|
+
|
|
24
|
+
text = page.text.lower()
|
|
25
|
+
founder = re.search(r"founded by ([^.]+)", text)
|
|
26
|
+
year = re.search(r"(founded|established) in (\d{4})", text)
|
|
27
|
+
employees = re.search(r"(\d+[,\d]*) employees", text)
|
|
28
|
+
|
|
29
|
+
return InstitutionDetails(
|
|
30
|
+
name=name,
|
|
31
|
+
founder=founder.group(1) if founder else "Not Available",
|
|
32
|
+
founded_year=year.group(2) if year else "Not Available",
|
|
33
|
+
branches="Multiple branches",
|
|
34
|
+
employees=employees.group(1) if employees else "Not Available",
|
|
35
|
+
summary=" ".join(page.summary.split(".")[:4]) + "."
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
name = input("Enter Institution Name: ")
|
|
39
|
+
try:
|
|
40
|
+
result = fetch_details(name)
|
|
41
|
+
print(result.model_dump_json(indent=4))
|
|
42
|
+
except ValueError as e:
|
|
43
|
+
print(e)
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: nlp-menu
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: A collection of 10 NLP programs accessible via an interactive CLI menu
|
|
5
|
+
License: MIT
|
|
6
|
+
Keywords: nlp,word2vec,glove,transformers,cohere,spacy,menu
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
11
|
+
Requires-Python: >=3.8
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
Requires-Dist: gensim
|
|
14
|
+
Requires-Dist: matplotlib
|
|
15
|
+
Requires-Dist: scikit-learn
|
|
16
|
+
Requires-Dist: nltk
|
|
17
|
+
Requires-Dist: cohere
|
|
18
|
+
Requires-Dist: transformers
|
|
19
|
+
Requires-Dist: torch
|
|
20
|
+
Requires-Dist: langchain
|
|
21
|
+
Requires-Dist: langchain-community
|
|
22
|
+
Requires-Dist: langchain-cohere
|
|
23
|
+
Requires-Dist: langchain-core
|
|
24
|
+
Requires-Dist: pydantic
|
|
25
|
+
Requires-Dist: wikipedia-api
|
|
26
|
+
Requires-Dist: spacy
|
|
27
|
+
|
|
28
|
+
# nlp-menu
|
|
29
|
+
|
|
30
|
+
An interactive CLI menu for 10 NLP programs covering word embeddings, sentiment analysis, summarization, and more.
|
|
31
|
+
|
|
32
|
+
## Installation
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install nlp-menu
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
After installing the spaCy model (required for Program 10):
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
python -m spacy download en_core_web_sm
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Usage
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
nlp-menu
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
This launches an interactive menu:
|
|
51
|
+
|
|
52
|
+
```
|
|
53
|
+
╔══════════════════════════════════════════════════════╗
|
|
54
|
+
║ 🧠 NLP PROGRAMS MENU 🧠 ║
|
|
55
|
+
╠══════════════════════════════════════════════════════╣
|
|
56
|
+
║ 1. GloVe Word Analogies (King-Man+Woman...) ║
|
|
57
|
+
║ 2. Word Embedding PCA Visualization (Sports) ║
|
|
58
|
+
║ 3. Word2Vec on Medical Corpus + PCA Plot ║
|
|
59
|
+
║ 4. Prompt Enrichment with GloVe + Cohere ║
|
|
60
|
+
║ 5. Paragraph Generator with GloVe + Cohere ║
|
|
61
|
+
║ 6. Sentiment Analysis (Transformers) ║
|
|
62
|
+
║ 7. Text Summarization (Pegasus) ║
|
|
63
|
+
║ 8. LangChain + Cohere File Summarizer ║
|
|
64
|
+
║ 9. Institution Details via Wikipedia + Pydantic ║
|
|
65
|
+
║ 10. IPC Chatbot (spaCy NER) ║
|
|
66
|
+
║ 0. Exit ║
|
|
67
|
+
╚══════════════════════════════════════════════════════╝
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Programs Overview
|
|
71
|
+
|
|
72
|
+
| # | Program | Libraries Used |
|
|
73
|
+
|---|---------|---------------|
|
|
74
|
+
| 1 | GloVe Word Analogies | gensim |
|
|
75
|
+
| 2 | Word Embedding PCA Visualization | gensim, matplotlib, scikit-learn |
|
|
76
|
+
| 3 | Word2Vec Medical Corpus | gensim, nltk, matplotlib |
|
|
77
|
+
| 4 | Prompt Enrichment | gensim, nltk, cohere |
|
|
78
|
+
| 5 | Paragraph Generator | gensim, cohere |
|
|
79
|
+
| 6 | Sentiment Analysis | transformers |
|
|
80
|
+
| 7 | Text Summarization | transformers |
|
|
81
|
+
| 8 | LangChain Summarizer | langchain, cohere |
|
|
82
|
+
| 9 | Institution Scraper | wikipedia-api, pydantic |
|
|
83
|
+
| 10 | IPC Chatbot | spacy |
|
|
84
|
+
|
|
85
|
+
## License
|
|
86
|
+
|
|
87
|
+
MIT
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
nlp_menu/__init__.py
|
|
4
|
+
nlp_menu/cli.py
|
|
5
|
+
nlp_menu/program1.py
|
|
6
|
+
nlp_menu/program10.py
|
|
7
|
+
nlp_menu/program2.py
|
|
8
|
+
nlp_menu/program3.py
|
|
9
|
+
nlp_menu/program4.py
|
|
10
|
+
nlp_menu/program5.py
|
|
11
|
+
nlp_menu/program6.py
|
|
12
|
+
nlp_menu/program7.py
|
|
13
|
+
nlp_menu/program8.py
|
|
14
|
+
nlp_menu/program9.py
|
|
15
|
+
nlp_menu.egg-info/PKG-INFO
|
|
16
|
+
nlp_menu.egg-info/SOURCES.txt
|
|
17
|
+
nlp_menu.egg-info/dependency_links.txt
|
|
18
|
+
nlp_menu.egg-info/entry_points.txt
|
|
19
|
+
nlp_menu.egg-info/requires.txt
|
|
20
|
+
nlp_menu.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
nlp_menu
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "nlp-menu"
|
|
7
|
+
version = "1.0.0"
|
|
8
|
+
description = "A collection of 10 NLP programs accessible via an interactive CLI menu"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.8"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
keywords = ["nlp", "word2vec", "glove", "transformers", "cohere", "spacy", "menu"]
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Programming Language :: Python :: 3",
|
|
15
|
+
"License :: OSI Approved :: MIT License",
|
|
16
|
+
"Operating System :: OS Independent",
|
|
17
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
18
|
+
]
|
|
19
|
+
dependencies = [
|
|
20
|
+
"gensim",
|
|
21
|
+
"matplotlib",
|
|
22
|
+
"scikit-learn",
|
|
23
|
+
"nltk",
|
|
24
|
+
"cohere",
|
|
25
|
+
"transformers",
|
|
26
|
+
"torch",
|
|
27
|
+
"langchain",
|
|
28
|
+
"langchain-community",
|
|
29
|
+
"langchain-cohere",
|
|
30
|
+
"langchain-core",
|
|
31
|
+
"pydantic",
|
|
32
|
+
"wikipedia-api",
|
|
33
|
+
"spacy",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
[project.scripts]
|
|
37
|
+
nlp-menu = "nlp_menu.cli:main"
|
|
38
|
+
|
|
39
|
+
[tool.setuptools.packages.find]
|
|
40
|
+
where = ["."]
|
|
41
|
+
include = ["nlp_menu*"]
|
nlp_menu-1.0.0/setup.cfg
ADDED