itertoolkit 1.5.4__tar.gz → 1.5.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. itertoolkit-1.5.9/EVAL_README.md +214 -0
  2. itertoolkit-1.5.9/IMPORTS.md +48 -0
  3. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/PKG-INFO +1 -1
  4. itertoolkit-1.5.9/README.md +214 -0
  5. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm-eval-metrics.pyproject.toml +3 -2
  6. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/importer/IR/__init__.py +2 -1
  7. itertoolkit-1.5.9/bm_preprocessing/importer/IR/pagerank_mat.py +6 -0
  8. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/importer/KALKI/__init__.py +2 -0
  9. itertoolkit-1.5.9/bm_preprocessing/importer/KALKI/pagerank_mat.py +6 -0
  10. itertoolkit-1.5.9/bm_preprocessing/importer/PY/__init__.py +5 -0
  11. itertoolkit-1.5.9/bm_preprocessing/importer/PY/vis_doc.py +6 -0
  12. itertoolkit-1.5.9/bm_preprocessing/src/IR/pagerank_mat.py +114 -0
  13. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/src/KALKI/collaborative_filtering.py +1 -1
  14. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/src/KALKI/content_based_filtering.py +2 -2
  15. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/src/KALKI/pagerank.py +3 -3
  16. itertoolkit-1.5.9/bm_preprocessing/src/KALKI/pagerank_mat.py +164 -0
  17. itertoolkit-1.5.9/bm_preprocessing/src/PY/vis_doc.py +247 -0
  18. itertoolkit-1.5.9/bm_preprocessing/vsk.py +66 -0
  19. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/itertoolkit.pyproject.toml +3 -3
  20. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/pyproject.toml +3 -3
  21. itertoolkit-1.5.4/bm_preprocessing/importer/PY/__init__.py +0 -4
  22. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/.gitignore +0 -0
  23. /itertoolkit-1.5.4/README.md → /itertoolkit-1.5.9/ITER_README.md +0 -0
  24. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/__init__.py +0 -0
  25. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/__init__.py +0 -0
  26. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/importer/DM/__init__.py +0 -0
  27. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/importer/DM/agg.py +0 -0
  28. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/importer/DM/dbscan.py +0 -0
  29. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/importer/DM/finals.py +0 -0
  30. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/importer/DM/gsp.py +0 -0
  31. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/importer/DM/test.py +0 -0
  32. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/importer/Finals/__init__.py +0 -0
  33. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/importer/Finals/kaadhal.py +0 -0
  34. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/importer/Finals/raaka.py +0 -0
  35. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/importer/Finals/seedan.py +0 -0
  36. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/importer/Finals/vikram.py +0 -0
  37. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/importer/IR/finals.py +0 -0
  38. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/importer/IR/pagerank.py +0 -0
  39. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/importer/IR/recommenders_pca.py +0 -0
  40. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/importer/IR/test.py +0 -0
  41. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/importer/KALKI/collaborative_filtering.py +0 -0
  42. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/importer/KALKI/content_based_filtering.py +0 -0
  43. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/importer/KALKI/pagerank.py +0 -0
  44. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/importer/KALKI/pca.py +0 -0
  45. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/importer/KALKI/pca_svd.py +0 -0
  46. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/importer/KALKI/svd.py +0 -0
  47. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/importer/PY/lib_doc.py +0 -0
  48. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/importer/PY/python_doc.py +0 -0
  49. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/importer/__init__.py +0 -0
  50. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/importer/_module_printer.py +0 -0
  51. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/src/DM/__init__.py +0 -0
  52. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/src/DM/agg.py +0 -0
  53. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/src/DM/dbscan.py +0 -0
  54. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/src/DM/finals.py +0 -0
  55. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/src/DM/gsp.py +0 -0
  56. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/src/DM/test.py +0 -0
  57. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/src/Finals/__init__.py +0 -0
  58. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/src/Finals/kaadhal.py +0 -0
  59. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/src/Finals/raaka.py +0 -0
  60. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/src/Finals/seedan.py +0 -0
  61. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/src/Finals/vikram.py +0 -0
  62. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/src/IR/__init__.py +0 -0
  63. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/src/IR/finals.py +0 -0
  64. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/src/IR/pagerank.py +0 -0
  65. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/src/IR/recommenders_pca.py +0 -0
  66. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/src/IR/test.py +0 -0
  67. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/src/KALKI/__init__.py +0 -0
  68. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/src/KALKI/item_features.csv +0 -0
  69. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/src/KALKI/pca.py +0 -0
  70. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/src/KALKI/pca_svd.py +0 -0
  71. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/src/KALKI/svd.py +0 -0
  72. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/src/KALKI/user_items.csv +0 -0
  73. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/src/PY/__init__.py +0 -0
  74. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/src/PY/lib_doc.py +0 -0
  75. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/src/PY/python_doc.py +0 -0
  76. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/bm_preprocessing/src/__init__.py +0 -0
  77. {itertoolkit-1.5.4 → itertoolkit-1.5.9}/main.py +0 -0
@@ -0,0 +1,214 @@
1
+ # bm-eval-metrics
2
+
3
+ bm-eval-metrics is a Python package providing easy-to-use evaluation metrics and utilities for machine learning workflows.
4
+
5
+ ## Features
6
+
7
+ - Text cleaning and normalization
8
+ - Tokenization and stopword removal
9
+ - Lemmatization
10
+ - TF-IDF and Bag-of-Words vectorization
11
+ - Pipeline-based preprocessing
12
+ - Built on NLTK and pandas
13
+ - Scikit-learn style API
14
+
15
+ ## Installation
16
+
17
+ Install from PyPI:
18
+
19
+ ```bash
20
+ pip install bm-eval-metrics
21
+ ```
22
+
23
+ ## Quick Start
24
+
25
+ ### Basic Usage With Pipeline
26
+
27
+ ```python
28
+ from bm_eval_metrics import (
29
+ TextCleaner,
30
+ Tokenizer,
31
+ Normalizer,
32
+ StopwordFilter,
33
+ Lemmatizer,
34
+ Vectorizer,
35
+ Pipeline,
36
+ )
37
+
38
+ # Sample documents
39
+ documents = [
40
+ "This is an example document! It has punctuation and numbers: 123.",
41
+ "Natural Language Processing is AMAZING!!!",
42
+ "Preprocessing text is very important for NLP tasks.",
43
+ ]
44
+
45
+ # Create preprocessing components
46
+ cleaner = TextCleaner(
47
+ lowercase=True,
48
+ remove_punctuation=True,
49
+ remove_numbers=True,
50
+ strip_whitespace=True,
51
+ )
52
+
53
+ tokenizer = Tokenizer(method="word")
54
+
55
+ normalizer = Normalizer(
56
+ expand_contractions=True,
57
+ fix_unicode=True,
58
+ )
59
+
60
+ stopword_filter = StopwordFilter(language="english")
61
+ lemmatizer = Lemmatizer(method="wordnet")
62
+
63
+ vectorizer = Vectorizer(
64
+ method="tfidf",
65
+ max_features=5000,
66
+ ngram_range=(1, 2),
67
+ )
68
+
69
+ # Build pipeline
70
+ preprocessing_pipeline = Pipeline(
71
+ [
72
+ cleaner,
73
+ normalizer,
74
+ tokenizer,
75
+ stopword_filter,
76
+ lemmatizer,
77
+ vectorizer,
78
+ ]
79
+ )
80
+
81
+ # Run preprocessing
82
+ processed_data = preprocessing_pipeline.fit_transform(documents)
83
+
84
+ # Inspect output
85
+ print("Processed features shape:", processed_data.shape)
86
+ print("Sample vector:", processed_data[0])
87
+ ```
88
+
89
+ ### Step-by-Step Processing Without Pipeline
90
+
91
+ ```python
92
+ from bm_eval_metrics import (
93
+ TextCleaner,
94
+ Tokenizer,
95
+ StopwordFilter,
96
+ Lemmatizer,
97
+ Vectorizer,
98
+ )
99
+
100
+ docs = [
101
+ "Machine learning is fun!",
102
+ "Text preprocessing improves results.",
103
+ ]
104
+
105
+ # Initialize tools
106
+ cleaner = TextCleaner(lowercase=True)
107
+ tokenizer = Tokenizer()
108
+ stopwords = StopwordFilter("english")
109
+ lemmatizer = Lemmatizer()
110
+ vectorizer = Vectorizer(method="bow")
111
+
112
+ # Process
113
+ cleaned = [cleaner.clean(d) for d in docs]
114
+ tokens = [tokenizer.tokenize(d) for d in cleaned]
115
+ filtered = [stopwords.remove(t) for t in tokens]
116
+ lemmatized = [lemmatizer.lemmatize(t) for t in filtered]
117
+
118
+ vectors = vectorizer.fit_transform(lemmatized)
119
+ print(vectors)
120
+ ```
121
+
122
+ ## Components Overview
123
+
124
+ | Component | Description |
125
+ | --- | --- |
126
+ | TextCleaner | Removes noise and formats text |
127
+ | Tokenizer | Splits text into tokens |
128
+ | Normalizer | Standardizes text |
129
+ | StopwordFilter | Removes common filler words |
130
+ | Lemmatizer | Converts words to base form |
131
+ | Vectorizer | Converts text to numeric features |
132
+ | Pipeline | Chains components into a workflow |
133
+
134
+ ## Deep Learning Preparation Example
135
+
136
+ ```python
137
+ from bm_eval_metrics import (
138
+ TextCleaner,
139
+ Tokenizer,
140
+ SequencePadder,
141
+ VocabularyBuilder,
142
+ )
143
+
144
+ texts = [
145
+ "Deep learning for NLP",
146
+ "Transformers are powerful",
147
+ ]
148
+
149
+ cleaner = TextCleaner(lowercase=True)
150
+ tokenizer = Tokenizer()
151
+ vocab = VocabularyBuilder(max_size=10000)
152
+ padder = SequencePadder(max_length=50)
153
+
154
+ # Clean
155
+ cleaned = [cleaner.clean(t) for t in texts]
156
+
157
+ # Tokenize
158
+ tokens = [tokenizer.tokenize(t) for t in cleaned]
159
+
160
+ # Build vocabulary
161
+ vocab.fit(tokens)
162
+
163
+ # Encode
164
+ encoded = [vocab.encode(t) for t in tokens]
165
+
166
+ # Pad
167
+ padded = padder.pad(encoded)
168
+
169
+ print(padded)
170
+ ```
171
+
172
+ ## Requirements
173
+
174
+ - Python 3.11+
175
+ - nltk
176
+ - pandas
177
+ - scikit-learn
178
+
179
+ Install dependencies automatically with:
180
+
181
+ ```bash
182
+ pip install bm-eval-metrics
183
+ ```
184
+
185
+ ## Project Structure
186
+
187
+ ```text
188
+ bm-eval-metrics/
189
+ ├── cleaning.py
190
+ ├── tokenization.py
191
+ ├── normalization.py
192
+ ├── filtering.py
193
+ ├── lemmatization.py
194
+ ├── vectorization.py
195
+ ├── pipeline.py
196
+ └── __init__.py
197
+ ```
198
+
199
+ ## Contributing
200
+
201
+ Contributions are welcome.
202
+
203
+ 1. Fork the repository.
204
+ 2. Create a new branch.
205
+ 3. Commit your changes.
206
+ 4. Open a pull request.
207
+
208
+ ## License
209
+
210
+ This project is licensed under the MIT License.
211
+
212
+ ## Support
213
+
214
+ If you encounter issues or have feature requests, open an issue on GitHub.
@@ -0,0 +1,48 @@
1
+ # Imports Guide
2
+
3
+ ```python
4
+ # Top-level section access
5
+ from bm_preprocessing import DM
6
+ from bm_preprocessing import IR
7
+ from bm_preprocessing import PY
8
+ from bm_preprocessing import Finals
9
+ from bm_preprocessing import KALKI
10
+
11
+ # Finals exports
12
+ from bm_preprocessing.Finals import kaadhal
13
+ from bm_preprocessing.Finals import raaka
14
+ from bm_preprocessing.Finals import seedan
15
+ from bm_preprocessing.Finals import vikram
16
+
17
+ # DM exports
18
+ from bm_preprocessing.DM import agg
19
+ from bm_preprocessing.DM import dbscan
20
+ from bm_preprocessing.DM import finals
21
+ from bm_preprocessing.DM import gsp
22
+ from bm_preprocessing.DM import test
23
+
24
+ # IR exports
25
+ from bm_preprocessing.IR import finals
26
+ from bm_preprocessing.IR import pagerank
27
+ from bm_preprocessing.IR import recommenders_pca
28
+ from bm_preprocessing.IR import test
29
+
30
+ # PY exports
31
+ from bm_preprocessing.PY import lib_doc
32
+ from bm_preprocessing.PY import python_doc
33
+
34
+ # KALKI exports
35
+ from bm_preprocessing.KALKI import collaborative_filtering
36
+ from bm_preprocessing.KALKI import content_based_filtering
37
+ from bm_preprocessing.KALKI import pagerank
38
+ from bm_preprocessing.KALKI import pca
39
+ from bm_preprocessing.KALKI import pca_svd
40
+ from bm_preprocessing.KALKI import svd
41
+
42
+ # Importer-layer access
43
+ from bm_preprocessing.importer.DM import agg, dbscan, finals, gsp, test
44
+ from bm_preprocessing.importer.IR import finals, pagerank, recommenders_pca, test
45
+ from bm_preprocessing.importer.PY import lib_doc, python_doc
46
+ from bm_preprocessing.importer.Finals import kaadhal, raaka, seedan, vikram
47
+ from bm_preprocessing.importer.KALKI import collaborative_filtering, content_based_filtering, pagerank, pca, pca_svd, svd
48
+ ```
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: itertoolkit
3
- Version: 1.5.4
3
+ Version: 1.5.9
4
4
  Summary: An itertools-inspired toolkit for cached iterator and data-structure processing
5
5
  Requires-Python: >=3.11
6
6
  Requires-Dist: gsppy>=5.3.0
@@ -0,0 +1,214 @@
1
+ # bm-eval-metrics
2
+
3
+ bm-eval-metrics is a Python package providing easy-to-use evaluation metrics and utilities for machine learning workflows.
4
+
5
+ ## Features
6
+
7
+ - Text cleaning and normalization
8
+ - Tokenization and stopword removal
9
+ - Lemmatization
10
+ - TF-IDF and Bag-of-Words vectorization
11
+ - Pipeline-based preprocessing
12
+ - Built on NLTK and pandas
13
+ - Scikit-learn style API
14
+
15
+ ## Installation
16
+
17
+ Install from PyPI:
18
+
19
+ ```bash
20
+ pip install bm-eval-metrics
21
+ ```
22
+
23
+ ## Quick Start
24
+
25
+ ### Basic Usage With Pipeline
26
+
27
+ ```python
28
+ from bm_eval_metrics import (
29
+ TextCleaner,
30
+ Tokenizer,
31
+ Normalizer,
32
+ StopwordFilter,
33
+ Lemmatizer,
34
+ Vectorizer,
35
+ Pipeline,
36
+ )
37
+
38
+ # Sample documents
39
+ documents = [
40
+ "This is an example document! It has punctuation and numbers: 123.",
41
+ "Natural Language Processing is AMAZING!!!",
42
+ "Preprocessing text is very important for NLP tasks.",
43
+ ]
44
+
45
+ # Create preprocessing components
46
+ cleaner = TextCleaner(
47
+ lowercase=True,
48
+ remove_punctuation=True,
49
+ remove_numbers=True,
50
+ strip_whitespace=True,
51
+ )
52
+
53
+ tokenizer = Tokenizer(method="word")
54
+
55
+ normalizer = Normalizer(
56
+ expand_contractions=True,
57
+ fix_unicode=True,
58
+ )
59
+
60
+ stopword_filter = StopwordFilter(language="english")
61
+ lemmatizer = Lemmatizer(method="wordnet")
62
+
63
+ vectorizer = Vectorizer(
64
+ method="tfidf",
65
+ max_features=5000,
66
+ ngram_range=(1, 2),
67
+ )
68
+
69
+ # Build pipeline
70
+ preprocessing_pipeline = Pipeline(
71
+ [
72
+ cleaner,
73
+ normalizer,
74
+ tokenizer,
75
+ stopword_filter,
76
+ lemmatizer,
77
+ vectorizer,
78
+ ]
79
+ )
80
+
81
+ # Run preprocessing
82
+ processed_data = preprocessing_pipeline.fit_transform(documents)
83
+
84
+ # Inspect output
85
+ print("Processed features shape:", processed_data.shape)
86
+ print("Sample vector:", processed_data[0])
87
+ ```
88
+
89
+ ### Step-by-Step Processing Without Pipeline
90
+
91
+ ```python
92
+ from bm_eval_metrics import (
93
+ TextCleaner,
94
+ Tokenizer,
95
+ StopwordFilter,
96
+ Lemmatizer,
97
+ Vectorizer,
98
+ )
99
+
100
+ docs = [
101
+ "Machine learning is fun!",
102
+ "Text preprocessing improves results.",
103
+ ]
104
+
105
+ # Initialize tools
106
+ cleaner = TextCleaner(lowercase=True)
107
+ tokenizer = Tokenizer()
108
+ stopwords = StopwordFilter("english")
109
+ lemmatizer = Lemmatizer()
110
+ vectorizer = Vectorizer(method="bow")
111
+
112
+ # Process
113
+ cleaned = [cleaner.clean(d) for d in docs]
114
+ tokens = [tokenizer.tokenize(d) for d in cleaned]
115
+ filtered = [stopwords.remove(t) for t in tokens]
116
+ lemmatized = [lemmatizer.lemmatize(t) for t in filtered]
117
+
118
+ vectors = vectorizer.fit_transform(lemmatized)
119
+ print(vectors)
120
+ ```
121
+
122
+ ## Components Overview
123
+
124
+ | Component | Description |
125
+ | --- | --- |
126
+ | TextCleaner | Removes noise and formats text |
127
+ | Tokenizer | Splits text into tokens |
128
+ | Normalizer | Standardizes text |
129
+ | StopwordFilter | Removes common filler words |
130
+ | Lemmatizer | Converts words to base form |
131
+ | Vectorizer | Converts text to numeric features |
132
+ | Pipeline | Chains components into a workflow |
133
+
134
+ ## Deep Learning Preparation Example
135
+
136
+ ```python
137
+ from bm_eval_metrics import (
138
+ TextCleaner,
139
+ Tokenizer,
140
+ SequencePadder,
141
+ VocabularyBuilder,
142
+ )
143
+
144
+ texts = [
145
+ "Deep learning for NLP",
146
+ "Transformers are powerful",
147
+ ]
148
+
149
+ cleaner = TextCleaner(lowercase=True)
150
+ tokenizer = Tokenizer()
151
+ vocab = VocabularyBuilder(max_size=10000)
152
+ padder = SequencePadder(max_length=50)
153
+
154
+ # Clean
155
+ cleaned = [cleaner.clean(t) for t in texts]
156
+
157
+ # Tokenize
158
+ tokens = [tokenizer.tokenize(t) for t in cleaned]
159
+
160
+ # Build vocabulary
161
+ vocab.fit(tokens)
162
+
163
+ # Encode
164
+ encoded = [vocab.encode(t) for t in tokens]
165
+
166
+ # Pad
167
+ padded = padder.pad(encoded)
168
+
169
+ print(padded)
170
+ ```
171
+
172
+ ## Requirements
173
+
174
+ - Python 3.11+
175
+ - nltk
176
+ - pandas
177
+ - scikit-learn
178
+
179
+ Install dependencies automatically with:
180
+
181
+ ```bash
182
+ pip install bm-eval-metrics
183
+ ```
184
+
185
+ ## Project Structure
186
+
187
+ ```text
188
+ bm-eval-metrics/
189
+ ├── cleaning.py
190
+ ├── tokenization.py
191
+ ├── normalization.py
192
+ ├── filtering.py
193
+ ├── lemmatization.py
194
+ ├── vectorization.py
195
+ ├── pipeline.py
196
+ └── __init__.py
197
+ ```
198
+
199
+ ## Contributing
200
+
201
+ Contributions are welcome.
202
+
203
+ 1. Fork the repository.
204
+ 2. Create a new branch.
205
+ 3. Commit your changes.
206
+ 4. Open a pull request.
207
+
208
+ ## License
209
+
210
+ This project is licensed under the MIT License.
211
+
212
+ ## Support
213
+
214
+ If you encounter issues or have feature requests, open an issue on GitHub.
@@ -1,10 +1,11 @@
1
1
  [project]
2
2
  name = "bm-eval-metrics"
3
- version = "1.5.4"
3
+ version = "1.5.8"
4
4
  description = "Python package providing easy-to-use evaluation metrics and utilities for Machine Learning"
5
- readme = "README.md"
5
+ readme = "EVAL_README.md"
6
6
  requires-python = ">=3.11"
7
7
  dependencies = [
8
+ "groq>=1.1.2",
8
9
  "gsppy>=5.3.0",
9
10
  "matplotlib>=3.10.8",
10
11
  "networkx>=3.6.1",
@@ -2,5 +2,6 @@ from .finals import finals
2
2
  from .pagerank import pagerank
3
3
  from .recommenders_pca import recommenders_pca
4
4
  from .test import test
5
+ from .pagerank_mat import pagerank_mat
5
6
 
6
- __all__ = ["finals", "test", "pagerank", "recommenders_pca"]
7
+ __all__ = ["finals", "test", "pagerank", "recommenders_pca", "pagerank_mat"]
@@ -0,0 +1,6 @@
1
+ from pathlib import Path
2
+
3
+ from .._module_printer import SourceCodeModule
4
+
5
+ _source_file = Path(__file__).parents[2] / "src" / "IR" / "pagerank_mat.py"
6
+ pagerank_mat = SourceCodeModule("bm_preprocessing.IR.pagerank_mat", _source_file)
@@ -4,6 +4,7 @@ from .pagerank import pagerank
4
4
  from .pca import pca
5
5
  from .pca_svd import pca_svd
6
6
  from .svd import svd
7
+ from .pagerank_mat import pagerank_mat
7
8
 
8
9
  __all__ = [
9
10
  "collaborative_filtering",
@@ -12,4 +13,5 @@ __all__ = [
12
13
  "pca",
13
14
  "pca_svd",
14
15
  "svd",
16
+ "pagerank_mat",
15
17
  ]
@@ -0,0 +1,6 @@
1
+ from pathlib import Path
2
+
3
+ from .._module_printer import SourceCodeModule
4
+
5
+ _source_file = Path(__file__).parents[2] / "src" / "KALKI" / "pagerank_mat.py"
6
+ pagerank_mat = SourceCodeModule("bm_preprocessing.KALKI.pagerank_mat", _source_file)
@@ -0,0 +1,5 @@
1
+ from .lib_doc import lib_doc
2
+ from .python_doc import python_doc
3
+ from .vis_doc import vis_doc
4
+
5
+ __all__ = ["lib_doc", "python_doc", "vis_doc"]
@@ -0,0 +1,6 @@
1
+ from pathlib import Path
2
+
3
+ from .._module_printer import SourceCodeModule
4
+
5
+ _source_file = Path(__file__).parents[2] / "src" / "PY" / "vis_doc.py"
6
+ vis_doc = SourceCodeModule("bm_preprocessing.PY.vis_doc", _source_file)
@@ -0,0 +1,114 @@
1
+ import matplotlib.pyplot as plt
2
+ import networkx as nx
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+ # ===== INPUT =====
7
+ pages = ["A", "B", "C", "D"]
8
+ A = np.array(
9
+ [
10
+ [0, 1, 1, 0], # A -> B,C
11
+ [0, 0, 1, 0], # B -> C
12
+ [1, 0, 0, 0], # C -> A
13
+ [0, 0, 1, 0], # D -> C
14
+ ],
15
+ dtype=float,
16
+ )
17
+
18
+ d = 0.85
19
+ max_iter = 20
20
+ tol = 1e-8
21
+ n = len(pages)
22
+
23
+ # ===== STEP 1: TRANSITION MATRIX (COLUMN STOCHASTIC) =====
24
+ S = np.zeros_like(A)
25
+
26
+ for j in range(n):
27
+ out_degree = A[j].sum()
28
+ if out_degree > 0:
29
+ S[j] = A[j] / out_degree
30
+ else:
31
+ S[j] = np.ones(n) / n # dangling node
32
+
33
+ S = S.T # convert to column-stochastic
34
+
35
+ print("Transition Matrix S:")
36
+ print(pd.DataFrame(S, index=pages, columns=pages))
37
+
38
+ # ===== STEP 2: GOOGLE MATRIX =====
39
+ M = d * S + (1 - d) / n * np.ones((n, n))
40
+
41
+ print("\nGoogle Matrix M:")
42
+ print(pd.DataFrame(M, index=pages, columns=pages))
43
+
44
+ # ===== STEP 3: INITIAL RANK =====
45
+ r = np.ones(n) / n
46
+ history = [r.copy()]
47
+
48
+ print("\nInitial PageRank:")
49
+ print(pd.DataFrame({"Page": pages, "PR": r}))
50
+
51
+ # ===== STEP 4: ITERATIONS =====
52
+ for it in range(1, max_iter + 1):
53
+ r_new = M @ r
54
+ history.append(r_new.copy())
55
+
56
+ print(f"\nIteration {it}")
57
+ print(pd.DataFrame({"Page": pages, "PR": r_new.round(6)}))
58
+
59
+ diff = np.linalg.norm(r_new - r, 1)
60
+ r = r_new
61
+
62
+ if diff < tol:
63
+ break
64
+
65
+ # ===== FINAL RESULT =====
66
+ final_df = pd.DataFrame({"Page": pages, "Final PR": r})
67
+ final_df = final_df.sort_values("Final PR", ascending=False)
68
+
69
+ print("\nFinal PageRank:")
70
+ print(final_df.round(6))
71
+
72
+ # ===== GRAPH VISUALIZATION (NODE SIZE ∝ PageRank) =====
73
+ G = nx.DiGraph()
74
+ for i, src in enumerate(pages):
75
+ for j, dst in enumerate(pages):
76
+ if A[i, j] == 1:
77
+ G.add_edge(src, dst)
78
+
79
+ plt.figure(figsize=(6, 4))
80
+ pos = nx.spring_layout(G, seed=42)
81
+
82
+ # --- Min-Max scaling for node sizes ---
83
+ pr_dict = {pages[i]: r[i] for i in range(n)}
84
+ pr_values = np.array([pr_dict[p] for p in G.nodes()])
85
+
86
+ min_size, max_size = 500, 5000
87
+ sizes = min_size + (pr_values - pr_values.min()) / (pr_values.max() - pr_values.min()) * (max_size - min_size)
88
+
89
+ nx.draw(G, pos, with_labels=True, node_size=sizes, arrows=True)
90
+ plt.title("Graph Visualization (node size ∝ PageRank)")
91
+ plt.show()
92
+
93
+ # ===== VISUALIZATION 2: CONVERGENCE =====
94
+ history_arr = np.array(history)
95
+
96
+ plt.figure()
97
+ for i, p in enumerate(pages):
98
+ plt.plot(history_arr[:, i], label=p)
99
+
100
+ plt.xlabel("Iteration")
101
+ plt.ylabel("PageRank")
102
+ plt.title("PageRank Convergence")
103
+ plt.legend()
104
+ plt.grid()
105
+ plt.show()
106
+
107
+ # ===== VISUALIZATION 3: FINAL SCORES =====
108
+ plt.figure()
109
+ plt.bar(final_df["Page"], final_df["Final PR"])
110
+ plt.xlabel("Page")
111
+ plt.ylabel("PageRank Score")
112
+ plt.title("Final PageRank Ranking")
113
+ plt.grid()
114
+ plt.show()
@@ -224,7 +224,7 @@ for i in range(df_iu.shape[0]):
224
224
  if i == ti and j == tu:
225
225
  ax.add_patch(
226
226
  plt.Rectangle(
227
- (j - 0.5, i - 0.5), 1, 1, fill=True, color="#4fc3f7", zorder=2
227
+ (j - 0.5, i - 0.5), 1, 1, fill=True, color="lightblue", zorder=2
228
228
  )
229
229
  )
230
230
  ax.text(
@@ -369,12 +369,12 @@ def set_tick_labels(ax, df):
369
369
  ax = axes[0, 0]
370
370
  ti = list(df_iu.index).index(target_item)
371
371
  tu = list(df_iu.columns).index(target_user)
372
- im = ax.imshow(df_iu.values.astype(float), cmap="YlGn", aspect="auto", vmin=1, vmax=5)
372
+ im = ax.imshow(df_iu.values.astype(float), cmap="viridis", aspect="auto", vmin=1, vmax=5)
373
373
  for i in range(df_iu.shape[0]):
374
374
  for j in range(df_iu.shape[1]):
375
375
  val = df_iu.iloc[i, j]
376
376
  if i == ti and j == tu:
377
- ax.add_patch(plt.Rectangle((j-.5, i-.5), 1, 1, fill=True, color="#4fc3f7", zorder=2))
377
+ ax.add_patch(plt.Rectangle((j-.5, i-.5), 1, 1, fill=True, color="lightblue", zorder=2))
378
378
  ax.text(j, i, "?", ha="center", va="center", fontsize=9, fontweight="bold", color="navy", zorder=3)
379
379
  elif pd.notna(val):
380
380
  ax.text(j, i, int(val), ha="center", va="center", fontsize=8)