pipprograms-tanuj 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pipprograms_tanuj-1.0.0/MANIFEST.in +3 -0
- pipprograms_tanuj-1.0.0/PKG-INFO +143 -0
- pipprograms_tanuj-1.0.0/README.md +125 -0
- pipprograms_tanuj-1.0.0/pipprograms/__init__.py +59 -0
- pipprograms_tanuj-1.0.0/pipprograms/cli.py +75 -0
- pipprograms_tanuj-1.0.0/pipprograms/programs/__init__.py +1 -0
- pipprograms_tanuj-1.0.0/pipprograms/programs/program1.py +74 -0
- pipprograms_tanuj-1.0.0/pipprograms/programs/program2.py +91 -0
- pipprograms_tanuj-1.0.0/pipprograms/programs/program3.py +84 -0
- pipprograms_tanuj-1.0.0/pipprograms/programs/program4.py +213 -0
- pipprograms_tanuj-1.0.0/pipprograms/programs/program5.py +231 -0
- pipprograms_tanuj-1.0.0/pipprograms/programs/program6.py +199 -0
- pipprograms_tanuj-1.0.0/pipprograms/programs/program7.py +186 -0
- pipprograms_tanuj-1.0.0/pipprograms/programs/program8.py +14 -0
- pipprograms_tanuj-1.0.0/pipprograms/programs/program9.py +14 -0
- pipprograms_tanuj-1.0.0/pipprograms_tanuj.egg-info/PKG-INFO +143 -0
- pipprograms_tanuj-1.0.0/pipprograms_tanuj.egg-info/SOURCES.txt +23 -0
- pipprograms_tanuj-1.0.0/pipprograms_tanuj.egg-info/dependency_links.txt +1 -0
- pipprograms_tanuj-1.0.0/pipprograms_tanuj.egg-info/entry_points.txt +2 -0
- pipprograms_tanuj-1.0.0/pipprograms_tanuj.egg-info/requires.txt +6 -0
- pipprograms_tanuj-1.0.0/pipprograms_tanuj.egg-info/top_level.txt +1 -0
- pipprograms_tanuj-1.0.0/pyproject.toml +32 -0
- pipprograms_tanuj-1.0.0/setup.cfg +4 -0
- pipprograms_tanuj-1.0.0/setup.py +21 -0
- pipprograms_tanuj-1.0.0/test/test.py +5 -0
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pipprograms-tanuj
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: A hosted Python library containing 9 text processing and NLP programs, accessible from anywhere.
|
|
5
|
+
Author-email: Tanuj <tanuj@example.com>
|
|
6
|
+
Project-URL: Homepage, https://github.com/tanujs/pipprograms
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Requires-Python: >=3.7
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
Requires-Dist: numpy
|
|
13
|
+
Requires-Dist: pandas
|
|
14
|
+
Requires-Dist: nltk
|
|
15
|
+
Requires-Dist: matplotlib
|
|
16
|
+
Requires-Dist: scikit-learn
|
|
17
|
+
Requires-Dist: PyPDF2
|
|
18
|
+
|
|
19
|
+
# 🚀 Pipprograms: 9 NLP & Text Processing Programs
|
|
20
|
+
|
|
21
|
+
A hosted Python library containing 9 text processing and NLP programs. You can package this library and host it (e.g., on PyPI or GitHub) to import it and run it from anywhere in the world.
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## ✨ Features
|
|
26
|
+
|
|
27
|
+
- **CLI Interface**: Show or run any program with a single terminal command.
|
|
28
|
+
- **Python API**: Import and run/inspect the programs directly inside your own Python scripts.
|
|
29
|
+
- **Automated Fallbacks**: Programs automatically generate sample datasets if the default paths (like Colab `/content/` paths) are missing, ensuring they work out-of-the-box on any machine.
|
|
30
|
+
- **Backwards Compatible**: Works with modern PEP 517 build backends and older setup.py-based systems.
|
|
31
|
+
|
|
32
|
+
---
|
|
33
|
+
|
|
34
|
+
## 📂 Included Programs
|
|
35
|
+
|
|
36
|
+
| Program # | Name | Description | Key Libraries |
|
|
37
|
+
|---|---|---|---|
|
|
38
|
+
| **1** | TF-IDF Summarizer | Extracts key sentences to summarize text | `nltk`, `numpy` |
|
|
39
|
+
| **2** | Custom Word Cloud | Generates overlapping-free word frequency visual clouds | `nltk`, `matplotlib` |
|
|
40
|
+
| **3** | Sentiment Classifier (LR) | Sentiment Classifier using balanced Logistic Regression | `pandas`, `scikit-learn` |
|
|
41
|
+
| **4** | Sentiment Classifier (N-gram) | Classifies sentiment using custom-sized n-grams & Logistic Regression | `pandas`, `scikit-learn` |
|
|
42
|
+
| **5** | Sentiment Classifier (NB) | Classifies sentiment using balanced Naive Bayes | `pandas`, `scikit-learn` |
|
|
43
|
+
| **6** | Spam Detector | Classifies Ham vs. Spam using Random Forest | `pandas`, `scikit-learn` |
|
|
44
|
+
| **7** | Document Topic Modeling | Latent Dirichlet Allocation (LDA) document clustering | `PyPDF2`, `scikit-learn` |
|
|
45
|
+
| **8** | Placeholder 8 | Custom script placeholder (editable) | - |
|
|
46
|
+
| **9** | Placeholder 9 | Custom script placeholder (editable) | - |
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## 🛠️ Installation
|
|
51
|
+
|
|
52
|
+
### 1. From PyPI (Standard & Recommended)
|
|
53
|
+
Once the package is published to PyPI (see details below), anyone in the world can install it without Git:
|
|
54
|
+
```bash
|
|
55
|
+
pip install pipprograms-tanuj
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### 2. From a Direct URL (No Git Required)
|
|
59
|
+
If you host the built `.whl` file on any web server, public storage (S3, Dropbox, website, etc.), anyone can install it directly via the URL:
|
|
60
|
+
```bash
|
|
61
|
+
pip install https://your-domain.com/path/pipprograms-1.0.0-py3-none-any.whl
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
### 3. Local Installation (for development)
|
|
65
|
+
If you want to run or test it locally from the source folder:
|
|
66
|
+
```bash
|
|
67
|
+
pip install -e .
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
---
|
|
71
|
+
|
|
72
|
+
## 🖥️ Command Line Usage
|
|
73
|
+
|
|
74
|
+
Once installed, the `pipprograms` command will be globally available on your system.
|
|
75
|
+
|
|
76
|
+
### 1. List all programs
|
|
77
|
+
```bash
|
|
78
|
+
pipprograms list
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### 2. View the source code of a program (e.g. Program 1)
|
|
82
|
+
```bash
|
|
83
|
+
pipprograms show 1
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### 3. Run a program (e.g. Program 3)
|
|
87
|
+
```bash
|
|
88
|
+
pipprograms run 3
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
---
|
|
92
|
+
|
|
93
|
+
## 🐍 Python Usage
|
|
94
|
+
|
|
95
|
+
You can import `pipprograms` in any python script:
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
import pipprograms
|
|
99
|
+
|
|
100
|
+
# 1. Print the source code of Program 1
|
|
101
|
+
pipprograms.show(1)
|
|
102
|
+
|
|
103
|
+
# 2. Get the source code as a string
|
|
104
|
+
code_str = pipprograms.get_code(1)
|
|
105
|
+
|
|
106
|
+
# 3. Run Program 1
|
|
107
|
+
pipprograms.run(1)
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
---
|
|
111
|
+
|
|
112
|
+
## 📤 How to Host and Publish this Package
|
|
113
|
+
|
|
114
|
+
### Option A: Hosting on PyPI (Free and Public)
|
|
115
|
+
To make your package available via `pip install pipprograms` (or your chosen package name):
|
|
116
|
+
|
|
117
|
+
1. **Create an account** on [PyPI](https://pypi.org/).
|
|
118
|
+
2. **Install Build Tools**:
|
|
119
|
+
```bash
|
|
120
|
+
pip install --upgrade build twine
|
|
121
|
+
```
|
|
122
|
+
3. **Build the Distribution Archives**:
|
|
123
|
+
```bash
|
|
124
|
+
python -m build
|
|
125
|
+
```
|
|
126
|
+
This will create a `dist/` directory containing `.tar.gz` and `.whl` files.
|
|
127
|
+
4. **Upload to PyPI**:
|
|
128
|
+
```bash
|
|
129
|
+
python -m twine upload dist/*
|
|
130
|
+
```
|
|
131
|
+
*Note: You will be prompted to enter your PyPI token/credentials.*
|
|
132
|
+
|
|
133
|
+
### Option B: Hosting Wheel (.whl) files on Cloud/HTTP Servers (No Git required)
|
|
134
|
+
If you don't want to publish to PyPI and want to keep it private or host it yourself:
|
|
135
|
+
1. **Build the wheel file**:
|
|
136
|
+
```bash
|
|
137
|
+
python -m build
|
|
138
|
+
```
|
|
139
|
+
2. **Upload the wheel file** (located in `dist/pipprograms-1.0.0-py3-none-any.whl`) to any online storage that supports direct downloads (e.g. your personal web server, AWS S3, Google Cloud Storage, or a direct link from Dropbox/OneDrive).
|
|
140
|
+
3. **Install it on any machine** using the direct URL:
|
|
141
|
+
```bash
|
|
142
|
+
pip install https://your-server.com/pipprograms-1.0.0-py3-none-any.whl
|
|
143
|
+
```
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# 🚀 Pipprograms: 9 NLP & Text Processing Programs
|
|
2
|
+
|
|
3
|
+
A hosted Python library containing 9 text processing and NLP programs. You can package this library and host it (e.g., on PyPI or GitHub) to import it and run it from anywhere in the world.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## ✨ Features
|
|
8
|
+
|
|
9
|
+
- **CLI Interface**: Show or run any program with a single terminal command.
|
|
10
|
+
- **Python API**: Import and run/inspect the programs directly inside your own Python scripts.
|
|
11
|
+
- **Automated Fallbacks**: Programs automatically generate sample datasets if the default paths (like Colab `/content/` paths) are missing, ensuring they work out-of-the-box on any machine.
|
|
12
|
+
- **Backwards Compatible**: Works with modern PEP 517 build backends and older setup.py-based systems.
|
|
13
|
+
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
## 📂 Included Programs
|
|
17
|
+
|
|
18
|
+
| Program # | Name | Description | Key Libraries |
|
|
19
|
+
|---|---|---|---|
|
|
20
|
+
| **1** | TF-IDF Summarizer | Extracts key sentences to summarize text | `nltk`, `numpy` |
|
|
21
|
+
| **2** | Custom Word Cloud | Generates overlapping-free word frequency visual clouds | `nltk`, `matplotlib` |
|
|
22
|
+
| **3** | Sentiment Classifier (LR) | Sentiment Classifier using balanced Logistic Regression | `pandas`, `scikit-learn` |
|
|
23
|
+
| **4** | Sentiment Classifier (N-gram) | Classifies sentiment using custom-sized n-grams & Logistic Regression | `pandas`, `scikit-learn` |
|
|
24
|
+
| **5** | Sentiment Classifier (NB) | Classifies sentiment using balanced Naive Bayes | `pandas`, `scikit-learn` |
|
|
25
|
+
| **6** | Spam Detector | Classifies Ham vs. Spam using Random Forest | `pandas`, `scikit-learn` |
|
|
26
|
+
| **7** | Document Topic Modeling | Latent Dirichlet Allocation (LDA) document clustering | `PyPDF2`, `scikit-learn` |
|
|
27
|
+
| **8** | Placeholder 8 | Custom script placeholder (editable) | - |
|
|
28
|
+
| **9** | Placeholder 9 | Custom script placeholder (editable) | - |
|
|
29
|
+
|
|
30
|
+
---
|
|
31
|
+
|
|
32
|
+
## 🛠️ Installation
|
|
33
|
+
|
|
34
|
+
### 1. From PyPI (Standard & Recommended)
|
|
35
|
+
Once the package is published to PyPI (see details below), anyone in the world can install it without Git:
|
|
36
|
+
```bash
|
|
37
|
+
pip install pipprograms-tanuj
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
### 2. From a Direct URL (No Git Required)
|
|
41
|
+
If you host the built `.whl` file on any web server, public storage (S3, Dropbox, website, etc.), anyone can install it directly via the URL:
|
|
42
|
+
```bash
|
|
43
|
+
pip install https://your-domain.com/path/pipprograms-1.0.0-py3-none-any.whl
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### 3. Local Installation (for development)
|
|
47
|
+
If you want to run or test it locally from the source folder:
|
|
48
|
+
```bash
|
|
49
|
+
pip install -e .
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
---
|
|
53
|
+
|
|
54
|
+
## 🖥️ Command Line Usage
|
|
55
|
+
|
|
56
|
+
Once installed, the `pipprograms` command will be globally available on your system.
|
|
57
|
+
|
|
58
|
+
### 1. List all programs
|
|
59
|
+
```bash
|
|
60
|
+
pipprograms list
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### 2. View the source code of a program (e.g. Program 1)
|
|
64
|
+
```bash
|
|
65
|
+
pipprograms show 1
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### 3. Run a program (e.g. Program 3)
|
|
69
|
+
```bash
|
|
70
|
+
pipprograms run 3
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
---
|
|
74
|
+
|
|
75
|
+
## 🐍 Python Usage
|
|
76
|
+
|
|
77
|
+
You can import `pipprograms` in any python script:
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
import pipprograms
|
|
81
|
+
|
|
82
|
+
# 1. Print the source code of Program 1
|
|
83
|
+
pipprograms.show(1)
|
|
84
|
+
|
|
85
|
+
# 2. Get the source code as a string
|
|
86
|
+
code_str = pipprograms.get_code(1)
|
|
87
|
+
|
|
88
|
+
# 3. Run Program 1
|
|
89
|
+
pipprograms.run(1)
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
---
|
|
93
|
+
|
|
94
|
+
## 📤 How to Host and Publish this Package
|
|
95
|
+
|
|
96
|
+
### Option A: Hosting on PyPI (Free and Public)
|
|
97
|
+
To make your package available via `pip install pipprograms` (or your chosen package name):
|
|
98
|
+
|
|
99
|
+
1. **Create an account** on [PyPI](https://pypi.org/).
|
|
100
|
+
2. **Install Build Tools**:
|
|
101
|
+
```bash
|
|
102
|
+
pip install --upgrade build twine
|
|
103
|
+
```
|
|
104
|
+
3. **Build the Distribution Archives**:
|
|
105
|
+
```bash
|
|
106
|
+
python -m build
|
|
107
|
+
```
|
|
108
|
+
This will create a `dist/` directory containing `.tar.gz` and `.whl` files.
|
|
109
|
+
4. **Upload to PyPI**:
|
|
110
|
+
```bash
|
|
111
|
+
python -m twine upload dist/*
|
|
112
|
+
```
|
|
113
|
+
*Note: You will be prompted to enter your PyPI token/credentials.*
|
|
114
|
+
|
|
115
|
+
### Option B: Hosting Wheel (.whl) files on Cloud/HTTP Servers (No Git required)
|
|
116
|
+
If you don't want to publish to PyPI and want to keep it private or host it yourself:
|
|
117
|
+
1. **Build the wheel file**:
|
|
118
|
+
```bash
|
|
119
|
+
python -m build
|
|
120
|
+
```
|
|
121
|
+
2. **Upload the wheel file** (located in `dist/pipprograms-1.0.0-py3-none-any.whl`) to any online storage that supports direct downloads (e.g. your personal web server, AWS S3, Google Cloud Storage, or a direct link from Dropbox/OneDrive).
|
|
122
|
+
3. **Install it on any machine** using the direct URL:
|
|
123
|
+
```bash
|
|
124
|
+
pip install https://your-server.com/pipprograms-1.0.0-py3-none-any.whl
|
|
125
|
+
```
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
import subprocess
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
# Package metadata
|
|
7
|
+
__version__ = "1.0.0"
|
|
8
|
+
__author__ = "Tanuj"
|
|
9
|
+
|
|
10
|
+
PROGRAMS_DIR = Path(__file__).parent / "programs"
|
|
11
|
+
|
|
12
|
+
def get_program_path(num: int) -> Path:
|
|
13
|
+
"""Returns the path to the program file for a given number (1-9)."""
|
|
14
|
+
if not isinstance(num, int) or not (1 <= num <= 9):
|
|
15
|
+
raise ValueError("Program number must be an integer between 1 and 9 inclusive.")
|
|
16
|
+
return PROGRAMS_DIR / f"program{num}.py"
|
|
17
|
+
|
|
18
|
+
def get_code(num: int) -> str:
|
|
19
|
+
"""Returns the source code of the specified program number (1-9) as a string."""
|
|
20
|
+
path = get_program_path(num)
|
|
21
|
+
if not path.exists():
|
|
22
|
+
raise FileNotFoundError(f"Program {num} source file not found at {path}")
|
|
23
|
+
return path.read_text(encoding="utf-8")
|
|
24
|
+
|
|
25
|
+
def show(num: int):
|
|
26
|
+
"""Prints the source code of the specified program number (1-9)."""
|
|
27
|
+
print(get_code(num))
|
|
28
|
+
|
|
29
|
+
def run(num: int):
|
|
30
|
+
"""Runs the specified program number (1-9) in a subprocess to preserve interactive input/output."""
|
|
31
|
+
path = get_program_path(num)
|
|
32
|
+
if not path.exists():
|
|
33
|
+
raise FileNotFoundError(f"Program {num} source file not found at {path}")
|
|
34
|
+
|
|
35
|
+
print(f"--- Running Program {num}: {path.name} ---")
|
|
36
|
+
try:
|
|
37
|
+
# We run it as a subprocess using sys.executable so it runs under the same virtualenv/environment
|
|
38
|
+
# and has access to all dependencies. It also handles interactive input/output perfectly.
|
|
39
|
+
subprocess.run([sys.executable, str(path)], check=True)
|
|
40
|
+
except subprocess.CalledProcessError as e:
|
|
41
|
+
print(f"\nProgram {num} exited with an error code: {e.returncode}")
|
|
42
|
+
except KeyboardInterrupt:
|
|
43
|
+
print(f"\nProgram {num} execution interrupted by user.")
|
|
44
|
+
print(f"--- Program {num} Finished ---")
|
|
45
|
+
|
|
46
|
+
def list_programs():
|
|
47
|
+
"""Returns a list of available programs with their names and descriptions."""
|
|
48
|
+
descriptions = {
|
|
49
|
+
1: "TF-IDF Sentence Summarizer",
|
|
50
|
+
2: "Custom Word Cloud Generator",
|
|
51
|
+
3: "Sentiment Classifier (Logistic Regression - Balanced)",
|
|
52
|
+
4: "Sentiment Classifier with N-grams (Logistic Regression - Interactive)",
|
|
53
|
+
5: "Sentiment Classifier (Naive Bayes - Balanced)",
|
|
54
|
+
6: "Spam Detection (Random Forest)",
|
|
55
|
+
7: "Document Clustering and Topic Modeling (LDA)",
|
|
56
|
+
8: "Placeholder Program 8",
|
|
57
|
+
9: "Placeholder Program 9",
|
|
58
|
+
}
|
|
59
|
+
return descriptions
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
import argparse
|
|
4
|
+
from pipprograms import get_code, run, list_programs, __version__
|
|
5
|
+
|
|
6
|
+
# Simple ANSI colors for premium terminal look
|
|
7
|
+
BOLD = "\033[1m"
|
|
8
|
+
GREEN = "\033[32m"
|
|
9
|
+
BLUE = "\033[34m"
|
|
10
|
+
CYAN = "\033[36m"
|
|
11
|
+
RED = "\033[31m"
|
|
12
|
+
RESET = "\033[0m"
|
|
13
|
+
|
|
14
|
+
def supports_color():
|
|
15
|
+
"""Returns True if the running system supports colored output."""
|
|
16
|
+
plat = sys.platform
|
|
17
|
+
supported_platform = plat != 'win32' or 'ANSICON' in os.environ
|
|
18
|
+
is_a_tty = hasattr(sys.stdout, 'isatty') and sys.stdout.isatty()
|
|
19
|
+
return supported_platform and is_a_tty
|
|
20
|
+
|
|
21
|
+
def print_styled(text, style):
|
|
22
|
+
if supports_color():
|
|
23
|
+
print(f"{style}{text}{RESET}")
|
|
24
|
+
else:
|
|
25
|
+
print(text)
|
|
26
|
+
|
|
27
|
+
def main():
|
|
28
|
+
parser = argparse.ArgumentParser(
|
|
29
|
+
description="Access and run any of your 9 text processing and NLP programs."
|
|
30
|
+
)
|
|
31
|
+
parser.add_argument(
|
|
32
|
+
"--version", action="version", version=f"pipprograms {__version__}"
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
subparsers = parser.add_subparsers(dest="command", help="Available commands")
|
|
36
|
+
|
|
37
|
+
# List command
|
|
38
|
+
subparsers.add_parser("list", help="List all available programs")
|
|
39
|
+
|
|
40
|
+
# Show command
|
|
41
|
+
show_parser = subparsers.add_parser("show", help="Show the code of a program")
|
|
42
|
+
show_parser.add_argument("number", type=int, choices=range(1, 10), help="Program number (1-9)")
|
|
43
|
+
|
|
44
|
+
# Run command
|
|
45
|
+
run_parser = subparsers.add_parser("run", help="Run a program")
|
|
46
|
+
run_parser.add_argument("number", type=int, choices=range(1, 10), help="Program number (1-9)")
|
|
47
|
+
|
|
48
|
+
args = parser.parse_args()
|
|
49
|
+
|
|
50
|
+
if args.command == "list":
|
|
51
|
+
print_styled("\nAvailable Programs:", BOLD + CYAN)
|
|
52
|
+
print_styled("===================", CYAN)
|
|
53
|
+
for num, desc in list_programs().items():
|
|
54
|
+
print_styled(f" Program {num}:", BOLD + GREEN)
|
|
55
|
+
print(f" {desc}")
|
|
56
|
+
print()
|
|
57
|
+
elif args.command == "show":
|
|
58
|
+
try:
|
|
59
|
+
print_styled(f"\n--- Code for Program {args.number} ---", BOLD + BLUE)
|
|
60
|
+
print(get_code(args.number))
|
|
61
|
+
print_styled("-------------------------------\n", BOLD + BLUE)
|
|
62
|
+
except Exception as e:
|
|
63
|
+
print_styled(f"Error: {e}", RED)
|
|
64
|
+
sys.exit(1)
|
|
65
|
+
elif args.command == "run":
|
|
66
|
+
try:
|
|
67
|
+
run(args.number)
|
|
68
|
+
except Exception as e:
|
|
69
|
+
print_styled(f"Error: {e}", RED)
|
|
70
|
+
sys.exit(1)
|
|
71
|
+
else:
|
|
72
|
+
parser.print_help()
|
|
73
|
+
|
|
74
|
+
if __name__ == "__main__":
|
|
75
|
+
main()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Mark programs directory as a Python package
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import nltk
|
|
2
|
+
from nltk.tokenize import sent_tokenize, word_tokenize
|
|
3
|
+
from nltk.corpus import stopwords
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
# Download required NLTK resources
|
|
7
|
+
nltk.download('punkt')
|
|
8
|
+
nltk.download('punkt_tab')
|
|
9
|
+
nltk.download('stopwords')
|
|
10
|
+
print(nltk.data.find('tokenizers/punkt'))
|
|
11
|
+
|
|
12
|
+
def tf_idf(sentence, sentences):
|
|
13
|
+
# Preprocess the text
|
|
14
|
+
def preprocess(text):
|
|
15
|
+
text = text.lower()
|
|
16
|
+
cleaned = []
|
|
17
|
+
words = word_tokenize(text)
|
|
18
|
+
s_w = set(stopwords.words('english'))
|
|
19
|
+
|
|
20
|
+
for word in words:
|
|
21
|
+
if word not in s_w:
|
|
22
|
+
if word.isalnum():
|
|
23
|
+
cleaned.append(word)
|
|
24
|
+
return cleaned
|
|
25
|
+
|
|
26
|
+
# Calculate tf in the whole document
|
|
27
|
+
def tf(word, sentence):
|
|
28
|
+
count = 0
|
|
29
|
+
words = word_tokenize(sentence)
|
|
30
|
+
for w in words:
|
|
31
|
+
if word == w:
|
|
32
|
+
count = count + 1
|
|
33
|
+
return count / len(words)
|
|
34
|
+
|
|
35
|
+
# Calculate idf in the whole document
|
|
36
|
+
def idf(word, sentences):
|
|
37
|
+
count = 0
|
|
38
|
+
for sent in sentences:
|
|
39
|
+
if word in sent:
|
|
40
|
+
count += 1
|
|
41
|
+
return np.log(len(sentences) / (count + 1))
|
|
42
|
+
|
|
43
|
+
words = preprocess(sentence)
|
|
44
|
+
tf_idf_score = 0
|
|
45
|
+
for word in words:
|
|
46
|
+
tf_score = tf(word, sentence)
|
|
47
|
+
idf_score = idf(word, sentences)
|
|
48
|
+
tf_idf_score += tf_score * idf_score
|
|
49
|
+
|
|
50
|
+
return tf_idf_score
|
|
51
|
+
|
|
52
|
+
def summarizer(text, length):
|
|
53
|
+
# Creates a dictionary to store sentence and score, and later extract top_k sentences
|
|
54
|
+
sent_score = {}
|
|
55
|
+
sentences = sent_tokenize(text)
|
|
56
|
+
|
|
57
|
+
for sentence in sentences:
|
|
58
|
+
score = tf_idf(sentence, sentences)
|
|
59
|
+
sent_score[sentence] = score
|
|
60
|
+
|
|
61
|
+
sorted_sent_score = dict(sorted(sent_score.items(), key=lambda x: x[1], reverse=True))
|
|
62
|
+
top_k = list(sorted_sent_score.keys())[:length]
|
|
63
|
+
|
|
64
|
+
summary = ' '.join(top_k)
|
|
65
|
+
return summary
|
|
66
|
+
|
|
67
|
+
if __name__ == "__main__":
|
|
68
|
+
# tf-idf sentence summary
|
|
69
|
+
text = """
|
|
70
|
+
Born in Ranchi, Dhoni made his first class debut for Bihar in 1999. He made his debut for the Indian cricket team on 23 December 2004 in an ODI against Bangladesh and played his first test a year later against Sri Lanka. In 2007, he became the captain of the ODI side before taking over in all formats by 2008. Dhoni retired from test cricket in 2014, but continued playing in limited overs cricket till 2019. He has scored 17,266 runs in international cricket including 10,000 plus runs at an average of more than 50 in ODIs.
|
|
71
|
+
"""
|
|
72
|
+
summary = summarizer(text, 3)
|
|
73
|
+
print("\n--- Summary ---")
|
|
74
|
+
print(summary)
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import matplotlib.pyplot as plt
|
|
2
|
+
import random
|
|
3
|
+
from nltk.corpus import stopwords
|
|
4
|
+
from nltk.tokenize import word_tokenize
|
|
5
|
+
import nltk
|
|
6
|
+
|
|
7
|
+
# Download required NLTK resources
|
|
8
|
+
nltk.download('punkt_tab')
|
|
9
|
+
nltk.download('punkt')
|
|
10
|
+
nltk.download('stopwords')
|
|
11
|
+
|
|
12
|
+
def preprocess(text):
|
|
13
|
+
words = word_tokenize(text)
|
|
14
|
+
s_w = set(stopwords.words('english'))
|
|
15
|
+
|
|
16
|
+
dictionary = {}
|
|
17
|
+
for word in words:
|
|
18
|
+
if word not in s_w and word.isalnum():
|
|
19
|
+
if word in dictionary.keys():
|
|
20
|
+
dictionary[word] += 1
|
|
21
|
+
else:
|
|
22
|
+
dictionary[word] = 1
|
|
23
|
+
|
|
24
|
+
print("Word Frequencies:")
|
|
25
|
+
print(dictionary)
|
|
26
|
+
return dictionary
|
|
27
|
+
|
|
28
|
+
def wordcloud(freq):
|
|
29
|
+
fig, ax = plt.subplots(figsize=(12, 10))
|
|
30
|
+
ax.set_xlim(0, 1)
|
|
31
|
+
ax.set_ylim(0, 1)
|
|
32
|
+
ax.axis('off')
|
|
33
|
+
|
|
34
|
+
freq_sorted = sorted(freq.items(), key=lambda x: x[1], reverse=True)
|
|
35
|
+
|
|
36
|
+
if not freq_sorted:
|
|
37
|
+
plt.show()
|
|
38
|
+
return
|
|
39
|
+
|
|
40
|
+
max_freq = freq_sorted[0][1]
|
|
41
|
+
placed_boxes = []
|
|
42
|
+
|
|
43
|
+
for word, count in freq_sorted:
|
|
44
|
+
font_size = 10 + (count / max_freq) * 40
|
|
45
|
+
placed = False
|
|
46
|
+
|
|
47
|
+
for _ in range(5000):
|
|
48
|
+
x = random.uniform(0.05, 0.95)
|
|
49
|
+
y = random.uniform(0.05, 0.95)
|
|
50
|
+
|
|
51
|
+
txt = ax.text(
|
|
52
|
+
x,
|
|
53
|
+
y,
|
|
54
|
+
word,
|
|
55
|
+
fontsize=font_size,
|
|
56
|
+
ha='center',
|
|
57
|
+
va='center',
|
|
58
|
+
color=(random.random(),
|
|
59
|
+
random.random(),
|
|
60
|
+
random.random())
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
fig.canvas.draw()
|
|
64
|
+
bbox = txt.get_window_extent(
|
|
65
|
+
renderer=fig.canvas.get_renderer()
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
overlap = False
|
|
69
|
+
for existing in placed_boxes:
|
|
70
|
+
if bbox.overlaps(existing):
|
|
71
|
+
overlap = True
|
|
72
|
+
break
|
|
73
|
+
|
|
74
|
+
if not overlap:
|
|
75
|
+
placed_boxes.append(bbox)
|
|
76
|
+
placed = True
|
|
77
|
+
break
|
|
78
|
+
|
|
79
|
+
txt.remove()
|
|
80
|
+
|
|
81
|
+
if not placed:
|
|
82
|
+
txt.remove()
|
|
83
|
+
|
|
84
|
+
plt.tight_layout()
|
|
85
|
+
plt.show()
|
|
86
|
+
|
|
87
|
+
if __name__ == "__main__":
|
|
88
|
+
text = """On 14 November 1987, at age 14, Tendulkar was selected to represent Bombay in the Ranji Trophy for the 1987–88 season, but he was not selected for the final eleven in any of the matches, though he was often used as a substitute fielder.[43] A year later, on 11 December 1988, aged 15 years and 232 days, Tendulkar made his debut for Bombay against Gujarat at Wankhede Stadium and scored 100 (not out) in that match, making him the youngest Indian to score a century on debut in first-class cricket.[56] He was selected to play for the team by Bombay captain Dilip Vengsarkar, who watched him play Kapil Dev in Wankhede Stadium's cricket practice nets,[29] where the Indian team had come to play against the touring New Zealand team. Tendulkar followed this by scoring a century each in his Deodhar and Duleep Trophy debuts, which are also India's domestic cricket tournaments.[57]
|
|
89
|
+
"""
|
|
90
|
+
freq = preprocess(text)
|
|
91
|
+
wordcloud(freq)
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from sklearn.model_selection import train_test_split
|
|
4
|
+
from sklearn.linear_model import LogisticRegression
|
|
5
|
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
6
|
+
from nltk.corpus import stopwords
|
|
7
|
+
from nltk.tokenize import word_tokenize
|
|
8
|
+
import math
|
|
9
|
+
import os
|
|
10
|
+
import nltk
|
|
11
|
+
|
|
12
|
+
nltk.download('stopwords')
|
|
13
|
+
nltk.download('punkt')
|
|
14
|
+
nltk.download('punkt_tab')
|
|
15
|
+
|
|
16
|
+
csv_path = "/content/Musical_instruments_reviews 4.csv"
|
|
17
|
+
|
|
18
|
+
# Fallback mechanism if file does not exist
|
|
19
|
+
if not os.path.exists(csv_path):
|
|
20
|
+
csv_path = "Musical_instruments_reviews 4.csv"
|
|
21
|
+
if not os.path.exists(csv_path):
|
|
22
|
+
print(f"Warning: '{csv_path}' not found. Creating a sample dataset for demonstration...")
|
|
23
|
+
dummy_data = {
|
|
24
|
+
"summary": [
|
|
25
|
+
"Great strings, clear sound", "Okay but broke quickly", "Absolutely horrible quality",
|
|
26
|
+
"Excellent instrument, sounds beautiful", "Average product, does the job", "Not worth the price",
|
|
27
|
+
"Superb build and sound", "Very cheap feel", "Satisfied with this purchase",
|
|
28
|
+
"Disappointed, did not work", "Amazing product!", "Decent for beginners",
|
|
29
|
+
"Fantastic guitar, plays like a dream", "Terrible, arrived broken", "It is okay, nothing special"
|
|
30
|
+
] * 5,
|
|
31
|
+
"overall": [5.0, 2.0, 1.0, 5.0, 3.0, 2.0, 5.0, 2.0, 4.0, 1.0, 5.0, 3.0, 5.0, 1.0, 3.0] * 5
|
|
32
|
+
}
|
|
33
|
+
pd.DataFrame(dummy_data).to_csv(csv_path, index=False)
|
|
34
|
+
|
|
35
|
+
df = pd.read_csv(csv_path, on_bad_lines='skip')
|
|
36
|
+
df = df[["summary", "overall"]]
|
|
37
|
+
df.dropna(inplace=True)
|
|
38
|
+
|
|
39
|
+
print(df.head())
|
|
40
|
+
print(df.info())
|
|
41
|
+
print(df.describe())
|
|
42
|
+
|
|
43
|
+
print(df.info())
|
|
44
|
+
print(df.isnull().sum())
|
|
45
|
+
|
|
46
|
+
def label_sentiment(rating):
|
|
47
|
+
if rating >= 4:
|
|
48
|
+
return "Positive"
|
|
49
|
+
elif rating == 3:
|
|
50
|
+
return "Neutral"
|
|
51
|
+
else:
|
|
52
|
+
return "Negative"
|
|
53
|
+
|
|
54
|
+
df["label"] = df["overall"].apply(label_sentiment)
|
|
55
|
+
|
|
56
|
+
min_count = df["label"].value_counts().min()
|
|
57
|
+
print(f"Min count per class for balancing: {min_count}")
|
|
58
|
+
df_balanced = pd.concat([
|
|
59
|
+
df[df["label"] == "Positive"].sample(min_count, random_state=42),
|
|
60
|
+
df[df["label"] == "Neutral"].sample(min_count, random_state=42),
|
|
61
|
+
df[df["label"] == "Negative"].sample(min_count, random_state=42)
|
|
62
|
+
]).sample(frac=1, random_state=42)
|
|
63
|
+
|
|
64
|
+
vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
|
|
65
|
+
X = vectorizer.fit_transform(df_balanced["summary"])
|
|
66
|
+
y = df_balanced["label"]
|
|
67
|
+
|
|
68
|
+
print(vectorizer.get_feature_names_out())
|
|
69
|
+
tfidf_df = pd.DataFrame(X.toarray(), columns=vectorizer.get_feature_names_out())
|
|
70
|
+
|
|
71
|
+
print("\n\n", tfidf_df.head())
|
|
72
|
+
print("\n\n", tfidf_df.iloc[0] > 0)
|
|
73
|
+
|
|
74
|
+
split = int(0.8 * len(df_balanced))
|
|
75
|
+
X_train, X_test = X[:split], X[split:]
|
|
76
|
+
y_train, y_test = y[:split], y[split:]
|
|
77
|
+
|
|
78
|
+
model = LogisticRegression(max_iter=1000)
|
|
79
|
+
model.fit(X_train, y_train)
|
|
80
|
+
|
|
81
|
+
text = input("Enter a review to classify:\n")
|
|
82
|
+
vec = vectorizer.transform([text])
|
|
83
|
+
pred = model.predict(vec)[0]
|
|
84
|
+
print(f"\nSentiment: {pred}")
|