itertoolkit 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. bm_preprocessing/__init__.py +14 -0
  2. bm_preprocessing/importer/DM/__init__.py +7 -0
  3. bm_preprocessing/importer/DM/agg.py +6 -0
  4. bm_preprocessing/importer/DM/dbscan.py +6 -0
  5. bm_preprocessing/importer/DM/finals.py +6 -0
  6. bm_preprocessing/importer/DM/gsp.py +6 -0
  7. bm_preprocessing/importer/DM/test.py +6 -0
  8. bm_preprocessing/importer/Finals/__init__.py +7 -0
  9. bm_preprocessing/importer/Finals/kaadhal.py +6 -0
  10. bm_preprocessing/importer/Finals/raaka.py +6 -0
  11. bm_preprocessing/importer/Finals/seedan.py +6 -0
  12. bm_preprocessing/importer/Finals/vikram.py +6 -0
  13. bm_preprocessing/importer/IR/__init__.py +6 -0
  14. bm_preprocessing/importer/IR/finals.py +6 -0
  15. bm_preprocessing/importer/IR/pagerank.py +6 -0
  16. bm_preprocessing/importer/IR/recommenders_pca.py +8 -0
  17. bm_preprocessing/importer/IR/test.py +6 -0
  18. bm_preprocessing/importer/PY/__init__.py +4 -0
  19. bm_preprocessing/importer/PY/lib_doc.py +6 -0
  20. bm_preprocessing/importer/PY/python_doc.py +6 -0
  21. bm_preprocessing/importer/__init__.py +8 -0
  22. bm_preprocessing/importer/_module_printer.py +23 -0
  23. bm_preprocessing/src/DM/__init__.py +1 -0
  24. bm_preprocessing/src/DM/agg.py +267 -0
  25. bm_preprocessing/src/DM/dbscan.py +218 -0
  26. bm_preprocessing/src/DM/finals.py +19 -0
  27. bm_preprocessing/src/DM/gsp.py +378 -0
  28. bm_preprocessing/src/DM/test.py +19 -0
  29. bm_preprocessing/src/Finals/__init__.py +1 -0
  30. bm_preprocessing/src/Finals/kaadhal.py +1453 -0
  31. bm_preprocessing/src/Finals/raaka.py +1338 -0
  32. bm_preprocessing/src/Finals/seedan.py +1173 -0
  33. bm_preprocessing/src/Finals/vikram.py +520 -0
  34. bm_preprocessing/src/IR/__init__.py +1 -0
  35. bm_preprocessing/src/IR/finals.py +14 -0
  36. bm_preprocessing/src/IR/pagerank.py +109 -0
  37. bm_preprocessing/src/IR/recommenders_pca.py +487 -0
  38. bm_preprocessing/src/IR/test.py +14 -0
  39. bm_preprocessing/src/PY/__init__.py +1 -0
  40. bm_preprocessing/src/PY/lib_doc.py +295 -0
  41. bm_preprocessing/src/PY/python_doc.py +177 -0
  42. bm_preprocessing/src/__init__.py +1 -0
  43. itertoolkit-1.5.0.dist-info/METADATA +120 -0
  44. itertoolkit-1.5.0.dist-info/RECORD +45 -0
  45. itertoolkit-1.5.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,295 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+ # ===========================================================
5
+ # 📂 FILE READING & INITIAL INSPECTION
6
+ # ===========================================================
7
+
8
+ # Load data (CSV, Excel, JSON)
9
+ df = pd.read_csv("data.csv") # Load CSV
10
+ df = pd.read_excel("data.xlsx") # Load Excel
11
+ df = pd.read_json("data.json") # Load JSON
12
+
13
+ # Quick Inspection
14
+ print(df.head())
15
+ print(df.info())
16
+ print(df.describe()) # Basic stats & info
17
+ print(df.shape) # (rows, columns)
18
+ print(df.columns.tolist()) # List all column names
19
+ print(df.dtypes) # Data types of each column
20
+ print(df.isnull().sum()) # Count NaNs per column
21
+ print(df.nunique()) # Unique values per column
22
+ print(df.value_counts("col")) # Frequency of each value
23
+
24
+ # ===========================================================
25
+ # 🔍 SELECTION & MANIPULATION
26
+ # ===========================================================
27
+
28
+ # Selecting rows and columns
29
+ cols = df[["col1", "col2"]] # Select multiple columns
30
+ rows = df.iloc[0:10] # Select first 10 rows by index
31
+ filtered = df[df["age"] > 25] # Select rows based on condition
32
+ cell = df.loc[0, "col1"] # Select specific cell (label-based)
33
+ cell = df.iloc[0, 0] # Select specific cell (index-based)
34
+ filtered = df.query("age > 25 and city == 'NYC'") # Query-based filtering
35
+ sampled = df.sample(n=5, random_state=42) # Random sample of rows
36
+
37
+ # Sorting
38
+ sorted_df = df.sort_values("col", ascending=False) # Sort by column descending
39
+ sorted_df = df.sort_values(
40
+ ["col1", "col2"], ascending=[True, False]
41
+ ) # Multi-column sort
42
+
43
+ # Stacking & Merging
44
+ df1 = pd.DataFrame({"id": [1, 2], "val": ["A", "B"]})
45
+ df2 = pd.DataFrame({"id": [2, 3], "val": ["C", "D"]})
46
+ df_stacked = pd.concat([df1, df2], axis=0) # Stack vertically (rows)
47
+ df_wide = pd.concat([df1, df2], axis=1) # Stack horizontally (columns)
48
+ df_merged = pd.merge(
49
+ df1, df2, on="id", how="inner"
50
+ ) # SQL-like join (inner/left/right/outer)
51
+
52
+ # ===========================================================
53
+ # 🛠️ DATA CLEANING & PROCESSING
54
+ # ===========================================================
55
+
56
+ # Handling Missing Values
57
+ df["col"].fillna(df["col"].mean(), inplace=True) # Impute by mean (one-liner)
58
+ df["col"].fillna(df["col"].median(), inplace=True) # Impute by median
59
+ df["col"].fillna(df["col"].mode()[0], inplace=True) # Impute by mode (categorical)
60
+ df.dropna(axis=0, inplace=True) # Drop rows with any NaN values
61
+ df.dropna(
62
+ subset=["col1", "col2"], inplace=True
63
+ ) # Drop rows with NaN in specific columns
64
+ df.ffill(inplace=True) # Forward fill NaN values
65
+ df.bfill(inplace=True) # Backward fill NaN values
66
+
67
+ # Transformations
68
+ df["new_col"] = df["col"].apply(lambda x: x**2) # Apply custom function
69
+ grouped = df.groupby("category")["sales"].sum() # Groupby & aggregate
70
+ grouped = df.groupby("cat").agg(
71
+ {"sales": "sum", "qty": "mean"}
72
+ ) # Multiple aggregations
73
+ df["cat_code"] = df["category"].astype("category").cat.codes # Quick label encoding
74
+ df["col"] = df["col"].str.lower() # String lowercase
75
+ df["col"] = df["col"].str.replace("old", "new") # String replace
76
+ df["col"] = df["col"].str.strip() # Strip whitespace
77
+ df["binned"] = pd.cut(df["age"], bins=[0, 18, 35, 60, 100]) # Binning/discretization
78
+ df = pd.get_dummies(df, columns=["city"], drop_first=True) # One-hot encode columns
79
+ df.rename(columns={"old_name": "new_name"}, inplace=True) # Rename columns
80
+ df.drop(columns=["col1", "col2"], inplace=True) # Drop columns
81
+ df.drop_duplicates(inplace=True) # Remove duplicate rows
82
+
83
+ # Type Conversions
84
+ df["col"] = df["col"].astype(int) # Convert column type
85
+ df["date"] = pd.to_datetime(df["date_str"]) # Parse dates
86
+
87
+ # ===========================================================
88
+ # 🔢 NUMPY ESSENTIALS
89
+ # ===========================================================
90
+
91
+ arr = np.array([1, 2, 3]) # Create array
92
+ reshaped = arr.reshape(1, -1) # Reshape for sklearn (2D)
93
+ mean_val = np.mean(arr)
94
+ std_val = np.std(arr) # Basic stats
95
+ mask = arr[arr > 2] # Boolean indexing/filtering
96
+ zeros = np.zeros((3, 3))
97
+ ones = np.ones((3, 3)) # Zero/One matrices
98
+ eye = np.eye(3) # Identity matrix
99
+ rand = np.random.rand(3, 3) # Random matrix [0,1)
100
+ dot = np.dot(arr, arr) # Dot product
101
+ norm = np.linalg.norm(arr) # Vector norm (L2)
102
+ log = np.log2(arr) # Log base 2 (entropy)
103
+ unique, counts = np.unique(arr, return_counts=True) # Unique values & counts
104
+
105
+ # ===========================================================
106
+ # 🤖 SCIKIT-LEARN PREPROCESSING
107
+ # ===========================================================
108
+
109
+ from sklearn.impute import SimpleImputer
110
+ from sklearn.model_selection import train_test_split
111
+ from sklearn.preprocessing import (
112
+ LabelEncoder,
113
+ MinMaxScaler,
114
+ OneHotEncoder,
115
+ StandardScaler,
116
+ )
117
+
118
+ # Splitting Data
119
+ X = df.drop("target", axis=1)
120
+ y = df["target"]
121
+ X_train, X_test, y_train, y_test = train_test_split(
122
+ X, y, test_size=0.2, random_state=42
123
+ )
124
+
125
+ # Imputing (Standard approach)
126
+ imputed = SimpleImputer(strategy="mean").fit_transform(
127
+ df[["num_col"]]
128
+ ) # Impute missing with mean
129
+ imputed = SimpleImputer(strategy="most_frequent").fit_transform(
130
+ df[["cat"]]
131
+ ) # Impute categorical
132
+
133
+ # Scaling & Encoding
134
+ scaled = StandardScaler().fit_transform(
135
+ df[["age", "salary"]]
136
+ ) # Standardize (mean=0, std=1)
137
+ scaled = MinMaxScaler().fit_transform(df[["age", "salary"]]) # Normalize to [0, 1]
138
+ encoded = OneHotEncoder().fit_transform(df[["gender"]]).toarray() # One-hot encode
139
+ le = LabelEncoder().fit_transform(df["target"]) # Encode target labels
140
+
141
+ # Pipeline (all-in-one)
142
+ from sklearn.compose import ColumnTransformer
143
+ from sklearn.pipeline import Pipeline
144
+
145
+ processor = ColumnTransformer(
146
+ [("num", StandardScaler(), ["age"]), ("cat", OneHotEncoder(), ["city"])]
147
+ )
148
+
149
+ # ===========================================================
150
+ # ⛏️ DATA MINING (DM) ESSENTIALS
151
+ # ===========================================================
152
+
153
+ from sklearn.cluster import KMeans
154
+ from sklearn.ensemble import (
155
+ AdaBoostClassifier,
156
+ BaggingClassifier,
157
+ RandomForestClassifier,
158
+ )
159
+ from sklearn.metrics import (
160
+ accuracy_score,
161
+ auc,
162
+ classification_report,
163
+ confusion_matrix,
164
+ f1_score,
165
+ precision_recall_curve,
166
+ precision_score,
167
+ recall_score,
168
+ roc_curve,
169
+ )
170
+ from sklearn.preprocessing import label_binarize
171
+ from sklearn.tree import DecisionTreeClassifier
172
+
173
+ # --- Decision Trees ---
174
+ dt = DecisionTreeClassifier(random_state=42).fit(
175
+ X_train, y_train
176
+ ) # Train decision tree
177
+ y_pred = dt.predict(X_test) # Predict
178
+ print(accuracy_score(y_test, y_pred)) # Accuracy
179
+
180
+ # --- Ensemble Methods ---
181
+ bag = BaggingClassifier(n_estimators=10, random_state=42).fit(
182
+ X_train, y_train
183
+ ) # Bagging
184
+ ada = AdaBoostClassifier(n_estimators=50, random_state=42).fit(
185
+ X_train, y_train
186
+ ) # AdaBoost
187
+ rf = RandomForestClassifier(n_estimators=100, random_state=42).fit(
188
+ X_train, y_train
189
+ ) # Random Forest
190
+
191
+ # --- Clustering ---
192
+ kmeans = KMeans(n_clusters=3, random_state=42).fit(X) # K-Means clustering
193
+ labels = kmeans.labels_ # Cluster labels
194
+ centers = kmeans.cluster_centers_ # Cluster centroids
195
+
196
+ # --- Classification Metrics ---
197
+ print(accuracy_score(y_test, y_pred)) # Accuracy = (TP+TN)/(TP+TN+FP+FN)
198
+ print(precision_score(y_test, y_pred, average="weighted")) # Precision = TP/(TP+FP)
199
+ print(recall_score(y_test, y_pred, average="weighted")) # Recall = TP/(TP+FN)
200
+ print(f1_score(y_test, y_pred, average="weighted")) # F1 = 2*(P*R)/(P+R)
201
+ print(confusion_matrix(y_test, y_pred)) # Confusion matrix
202
+ print(classification_report(y_test, y_pred)) # Full report
203
+
204
+ # --- ROC & AUC ---
205
+ y_bin = label_binarize(y_test, classes=[0, 1, 2]) # Binarize for multiclass ROC
206
+ y_proba = rf.predict_proba(X_test)
207
+ fpr, tpr, _ = roc_curve(y_bin[:, 0], y_proba[:, 0]) # ROC curve (per class)
208
+ roc_auc = auc(fpr, tpr) # AUC score
209
+
210
+ # --- Apriori / Association Rules (manual) ---
211
+ from itertools import combinations
212
+
213
+ support = lambda itemset, txns: sum(1 for t in txns if itemset.issubset(t)) / len(txns)
214
+ transactions = [
215
+ ["milk", "bread"],
216
+ ["milk", "diaper", "beer", "bread"],
217
+ ["milk", "bread"],
218
+ ]
219
+ items = set(item for t in transactions for item in t)
220
+ freq_items = {frozenset([i]) for t in transactions for i in t} # C1 candidates
221
+ pairs = [
222
+ frozenset(c) for c in combinations(sorted(items), 2)
223
+ ] # Generate candidate pairs
224
+
225
+ # --- Entropy & Information Gain (ID3) ---
226
+ entropy = lambda probs: -sum(p * np.log2(p) for p in probs if p > 0) # Shannon entropy
227
+ info_gain = lambda parent_ent, children: parent_ent - sum(
228
+ w * entropy(c) for w, c in children
229
+ )
230
+ gini = lambda probs: 1 - sum(p**2 for p in probs) # Gini impurity
231
+
232
+ # ===========================================================
233
+ # 🔎 INFORMATION RETRIEVAL (IR) ESSENTIALS
234
+ # ===========================================================
235
+
236
+ import math
237
+ from collections import Counter
238
+
239
+ # --- Term Frequency (TF) ---
240
+ # TF(t, d) = count(t in d) / total_terms_in_d
241
+ tf = lambda term, doc: doc.count(term) / len(doc) # Term frequency
242
+
243
+ # --- Inverse Document Frequency (IDF) ---
244
+ # IDF(t) = log(N / df(t)) where N = total docs, df(t) = docs containing t
245
+ idf = lambda term, docs: math.log(len(docs) / sum(1 for d in docs if term in d))
246
+
247
+ # --- TF-IDF ---
248
+ # TF-IDF(t, d) = TF(t, d) * IDF(t)
249
+ tfidf = lambda term, doc, docs: tf(term, doc) * idf(term, docs) # TF-IDF score
250
+
251
+ # --- BM25 ---
252
+ # BM25(t, d) = IDF(t) * (TF * (k1 + 1)) / (TF + k1 * (1 - b + b * |d| / avgdl))
253
+ k1 = 1.5
254
+ b = 0.75 # BM25 parameters
255
+ docs = [["term1", "term2"], ["term2", "term3"], ["term1", "term3", "term4"]]
256
+ avgdl = np.mean([len(d) for d in docs]) # Average document length
257
+
258
+ # --- Boolean Retrieval ---
259
+ # AND: set(doc1_terms) & set(doc2_terms)
260
+ # OR: set(doc1_terms) | set(doc2_terms)
261
+ # NOT: set(all_terms) - set(doc_terms)
262
+ bool_and = lambda q_terms, doc: all(t in doc for t in q_terms) # Boolean AND query
263
+ bool_or = lambda q_terms, doc: any(t in doc for t in q_terms) # Boolean OR query
264
+
265
+ # --- Cosine Similarity ---
266
+ # cos(A, B) = (A · B) / (||A|| * ||B||)
267
+ cosine_sim = lambda a, b: np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
268
+
269
+ # --- Jaccard Similarity ---
270
+ # J(A, B) = |A ∩ B| / |A ∪ B|
271
+ jaccard = lambda a, b: len(a & b) / len(a | b) # Jaccard similarity (sets)
272
+
273
+ # --- Tokenization & Text Processing ---
274
+ tokens = "hello world foo bar".lower().split() # Basic tokenization
275
+ vocab = set(tokens) # Vocabulary
276
+ bow = Counter(tokens) # Bag of words
277
+ from sklearn.feature_extraction.text import TfidfVectorizer
278
+
279
+ tfidf_matrix = TfidfVectorizer().fit_transform(
280
+ ["doc1 text", "doc2 text"]
281
+ ) # TF-IDF vectorizer
282
+
283
+ # --- Inverted Index ---
284
+ inv_index = {} # Build inverted index
285
+ for doc_id, doc in enumerate(docs):
286
+ for term in doc:
287
+ inv_index.setdefault(term, set()).add(doc_id)
288
+
289
+ # --- Precision & Recall (IR) ---
290
+ # Precision@k = relevant_in_top_k / k
291
+ # Recall@k = relevant_in_top_k / total_relevant
292
+ precision_at_k = lambda retrieved, relevant, k: len(set(retrieved[:k]) & relevant) / k
293
+ recall_at_k = lambda retrieved, relevant, k: len(set(retrieved[:k]) & relevant) / len(
294
+ relevant
295
+ )
@@ -0,0 +1,177 @@
1
+ """
2
+ =============================================================================
3
+ PYTHON BASICS CHEAT SHEET
4
+ =============================================================================
5
+ A quick reference guide for core Python concepts, data structures, and features.
6
+ """
7
+
8
+ # =============================================================================
9
+ # 1. LISTS (Mutable, Ordered)
10
+ # =============================================================================
11
+ my_list = [1, 2, 3, "a", "b"]
12
+
13
+ # Operations
14
+ my_list.append(4) # Add to end: [1, 2, 3, 'a', 'b', 4]
15
+ my_list.insert(0, 0) # Insert at index: [0, 1, 2, 3, 'a', 'b', 4]
16
+ my_list.extend([5, 6]) # Append multiple: [0, 1, 2, 3, 'a', 'b', 4, 5, 6]
17
+ my_list.pop() # Remove & return last item (6)
18
+ my_list.pop(1) # Remove & return item at index 1 (1)
19
+ my_list.remove("a") # Remove first occurrence of 'a'
20
+ my_list.reverse() # Reverse in place
21
+ # my_list.sort() # Sort in place (requires same types)
22
+ # sorted(my_list) # Return new sorted list
23
+ my_list.clear() # Empty the list
24
+ count = my_list.count(2) # Count occurrences
25
+ idx = my_list.index(3) # Find index of first occurrence
26
+
27
+ # Slicing: list[start:stop:step]
28
+ # my_list[1:4] (index 1 to 3), my_list[::-1] (reverse)
29
+
30
+ # =============================================================================
31
+ # 2. SETS (Mutable, Unordered, Unique Elements)
32
+ # =============================================================================
33
+ my_set = {1, 2, 3}
34
+ empty_set = set() # Note: {} creates an empty dict, not a set
35
+
36
+ # Operations
37
+ my_set.add(4) # Add element
38
+ my_set.update([5, 6]) # Add multiple elements
39
+ my_set.remove(6) # Remove element (raises KeyError if not found)
40
+ my_set.discard(10) # Remove element (safe, no error if not found)
41
+ my_set.pop() # Remove & return arbitrary element
42
+ my_set.clear() # Empty the set
43
+
44
+ set_a, set_b = {1, 2}, {2, 3}
45
+ union = set_a | set_b # {1, 2, 3} (or set_a.union(set_b))
46
+ intersection = set_a & set_b # {2} (or set_a.intersection(set_b))
47
+ diff = set_a - set_b # {1} (or set_a.difference(set_b))
48
+ sym_diff = set_a ^ set_b # {1, 3} (or set_a.symmetric_difference(set_b))
49
+
50
+ # =============================================================================
51
+ # 3. TUPLES (Immutable, Ordered)
52
+ # =============================================================================
53
+ my_tuple = (1, 2, 3, 2)
54
+ single_tuple = (1,) # Comma needed for single-element tuple
55
+
56
+ # Operations (Very limited since immutable)
57
+ count = my_tuple.count(2) # Count occurrences (2)
58
+ idx = my_tuple.index(3) # Find index of first occurrence (2)
59
+ # Tuples support unpacking: a, b, c, d = my_tuple
60
+
61
+ # =============================================================================
62
+ # 4. DICTIONARIES (Mutable, Key-Value Pairs, Unordered before Python 3.7)
63
+ # =============================================================================
64
+ my_dict = {"name": "Alice", "age": 25}
65
+
66
+ # Operations
67
+ my_dict["city"] = "NYC" # Add or update key
68
+ val = my_dict.get("age") # Safe get (returns None if not found, instead of KeyError)
69
+ val = my_dict.get("x", 0) # Safe get with default value
70
+ keys = my_dict.keys() # dict_keys(['name', 'age', 'city'])
71
+ values = my_dict.values() # dict_values(['Alice', 25, 'NYC'])
72
+ items = my_dict.items() # dict_items([('name', 'Alice'), ...])
73
+
74
+ # Removal
75
+ popped_val = my_dict.pop("age") # Remove key 'age' and return value
76
+ popped_item = my_dict.popitem() # Remove & return last key-value pair as tuple
77
+ # del my_dict['name'] # Delete key
78
+ my_dict.clear() # Empty dict
79
+ my_dict.update({"a": 1, "b": 2}) # Merge / Update with another dict
80
+
81
+ # =============================================================================
82
+ # 5. LIST / DICT / SET COMPREHENSIONS
83
+ # =============================================================================
84
+ # List Comprehension: [expression for item in iterable if condition]
85
+ squares = [x**2 for x in range(10) if x % 2 == 0] # [0, 4, 16, 36, 64]
86
+
87
+ # Dict Comprehension: {key_expr: val_expr for item in iterable if condition}
88
+ sq_dict = {x: x**2 for x in range(5)} # {0: 0, 1: 1, 2: 4, 3: 9, 4: 16}
89
+
90
+ # Set Comprehension: {expression for item in iterable if condition}
91
+ sq_set = {x**2 for x in [-1, 1, 2]} # {1, 4}
92
+
93
+ # Generator Expression: (expression for item in iterable if condition)
94
+ gen = (x**2 for x in range(10)) # Lazy evaluation
95
+
96
+ # =============================================================================
97
+ # 6. LAMBDA FUNCTIONS, MAP, FILTER, REDUCE
98
+ # =============================================================================
99
+ # lambda arguments: expression
100
+ add = lambda x, y: x + y
101
+ print(add(2, 3)) # 5
102
+
103
+ nums = [1, 2, 3, 4]
104
+ # map: apply function to all items
105
+ mapped = list(map(lambda x: x * 2, nums)) # [2, 4, 6, 8]
106
+
107
+ # filter: keep items where function returns True
108
+ filtered = list(filter(lambda x: x % 2 == 0, nums)) # [2, 4]
109
+
110
+ # reduce (requires functools): cumulative application
111
+ from functools import reduce
112
+
113
+ product = reduce(lambda x, y: x * y, nums) # 24
114
+
115
+ # Sort with lambda key
116
+ words = ["apple", "banana", "cherry"]
117
+ words.sort(key=lambda w: len(w)) # Sort by length
118
+
119
+
120
+ # =============================================================================
121
+ # 7. CLASSES AND OBJECTS (OOP)
122
+ # =============================================================================
123
+ class Animal:
124
+ """Base class for animals."""
125
+
126
+ species_count = 0 # Class attribute
127
+
128
+ def __init__(self, name):
129
+ self.name = name # Instance attribute
130
+ Animal.species_count += 1
131
+
132
+ def speak(self):
133
+ """Instance method"""
134
+ return "Some sound"
135
+
136
+ @classmethod
137
+ def get_count(cls):
138
+ """Class method: takes class as first arg"""
139
+ return cls.species_count
140
+
141
+ @staticmethod
142
+ def is_alive():
143
+ """Static method: no implicit self or cls args"""
144
+ return True
145
+
146
+
147
+ # Inheritance
148
+ class Dog(Animal):
149
+ def __init__(self, name, breed):
150
+ super().__init__(name) # Call parent constructor
151
+ self.breed = breed
152
+
153
+ def speak(self): # Method Overriding
154
+ return "Woof!"
155
+
156
+
157
+ dog = Dog("Buddy", "Golden Retriever")
158
+ print(dog.speak()) # "Woof!"
159
+ print(Animal.get_count()) # 1
160
+
161
+ # =============================================================================
162
+ # 8. FILE HANDLING
163
+ # =============================================================================
164
+ # Using 'with' is a best practice, as it automatically closes the file
165
+ # Modes: 'r' (read), 'w' (write, truncates), 'a' (append), 'r+' (read & write), 'b' (binary)
166
+
167
+ # Write to file
168
+ with open("example.txt", "w", encoding="utf-8") as file:
169
+ file.write("Hello World\nLine 2")
170
+
171
+ # Read from file
172
+ with open("example.txt", "r", encoding="utf-8") as file:
173
+ content = file.read() # Read entire file
174
+ # file.seek(0) # Reset cursor to start
175
+ # lines = file.readlines() # Read lines into a list
176
+ # for line in file: # Iterate line by line (memory efficient)
177
+ # print(line.strip())
@@ -0,0 +1 @@
1
+ """Raw source snippets shipped with the package."""
@@ -0,0 +1,120 @@
1
+ Metadata-Version: 2.4
2
+ Name: itertoolkit
3
+ Version: 1.5.0
4
+ Summary: An itertools-inspired toolkit for cached iterator and data-structure processing
5
+ Requires-Python: >=3.11
6
+ Requires-Dist: gsppy>=5.3.0
7
+ Requires-Dist: matplotlib>=3.10.8
8
+ Requires-Dist: networkx>=3.6.1
9
+ Requires-Dist: numpy>=2.4.4
10
+ Requires-Dist: pandas>=3.0.2
11
+ Requires-Dist: plotly>=6.6.0
12
+ Requires-Dist: scikit-learn>=1.8.0
13
+ Requires-Dist: scipy>=1.17.1
14
+ Requires-Dist: seaborn>=0.13.2
15
+ Description-Content-Type: text/markdown
16
+
17
+ # itertoolkit
18
+
19
+ Functions creating iterators and cached data pipelines for efficient looping.
20
+
21
+ `itertoolkit` is an `itertools`-inspired wrapper focused on practical data processing. It keeps the lazy, composable style of iterator algebra, then adds cache-aware helpers so repeated list and data-structure transformations run faster.
22
+
23
+ The goal is simple:
24
+
25
+ - Keep memory usage low with lazy iterators.
26
+ - Speed up repeated workloads with caching.
27
+ - Make iterator pipelines readable and reusable.
28
+
29
+ ## Installation
30
+
31
+ ```bash
32
+ pip install itertoolkit
33
+ ```
34
+
35
+ ## Quick Start
36
+
37
+ ```python
38
+ from itertools import count, islice
39
+
40
+ # Install name: itertoolkit
41
+ # Current import path in this repo remains bm_preprocessing
42
+ from bm_preprocessing import IR, DM
43
+
44
+ # Example: base itertools stream
45
+ stream = (x * x for x in count(1))
46
+ print(list(islice(stream, 5))) # [1, 4, 9, 16, 25]
47
+
48
+ # Example: cached computation workflow (concept)
49
+ # result = itertoolkit.cached_map(expensive_fn, dataset, cache_key="v1")
50
+ ```
51
+
52
+ ## Why It Is Faster
53
+
54
+ `itertoolkit` performance comes from combining:
55
+
56
+ - Lazy iteration, so intermediate materialization is avoided.
57
+ - Cache-first wrappers, so repeated transformations are reused.
58
+ - Composable pipelines, so complex loops stay compact and optimized.
59
+
60
+ In repeated analytics or feature-building jobs, the first pass computes and stores results, and later passes can fetch from cache instead of recomputing every step.
61
+
62
+ ## Core Iterator Families
63
+
64
+ ### General iterators
65
+
66
+ | Iterator concept | Input | Output shape | Typical use |
67
+ | --- | --- | --- | --- |
68
+ | Running reduction | iterable, func | incremental totals | rolling stats |
69
+ | Batching | iterable, n | tuples of size n | chunk processing |
70
+ | Chaining | multiple iterables | one continuous stream | merging sources |
71
+ | Selection | data + selectors | filtered stream | mask-based filtering |
72
+ | Windowing | iterable | adjacent pairs/windows | transition analysis |
73
+ | Truncation | predicate/slice | bounded output | safe handling of infinite streams |
74
+
75
+ ### Combinatoric iterators
76
+
77
+ | Iterator concept | Output |
78
+ | --- | --- |
79
+ | Cartesian products | all pairings across inputs |
80
+ | Permutations | order-sensitive tuples |
81
+ | Combinations | order-insensitive unique tuples |
82
+ | Combinations with replacement | tuples allowing repeated values |
83
+
84
+ ## Pipeline Pattern
85
+
86
+ Use this pattern when processing large lists, tables, graphs, or text records:
87
+
88
+ 1. Start from one or more iterables.
89
+ 2. Chain filtering, mapping, grouping, and batching.
90
+ 3. Add cache boundaries around expensive stages.
91
+ 4. Materialize only where needed (`list`, `tuple`, `DataFrame`, model input).
92
+
93
+ ```python
94
+ from itertools import chain
95
+
96
+ sources = [[1, 2, 3], [4, 5], [6]]
97
+ pipeline = (x * 10 for x in chain.from_iterable(sources) if x % 2 == 0)
98
+ print(list(pipeline)) # [20, 40, 60]
99
+ ```
100
+
101
+ ## Caching Strategy
102
+
103
+ Recommended caching behavior for data-heavy workloads:
104
+
105
+ - Key by transformation signature and input fingerprint.
106
+ - Keep deterministic steps cacheable.
107
+ - Invalidate cache on function/version changes.
108
+ - Persist long-running results between sessions.
109
+
110
+ This makes repeated preprocessing and feature extraction significantly cheaper.
111
+
112
+ ## Compatibility Note
113
+
114
+ Package distribution name is `itertoolkit`.
115
+
116
+ Current code in this repository still exposes the import path `bm_preprocessing` for compatibility with existing users. If needed, a follow-up release can add a top-level `itertoolkit` import alias as well.
117
+
118
+ ## License
119
+
120
+ MIT
@@ -0,0 +1,45 @@
1
+ bm_preprocessing/__init__.py,sha256=780DeJ5tt1xkg572jEcYXd1z8Jj7QteRv9tfRcwqamo,353
2
+ bm_preprocessing/importer/__init__.py,sha256=3144LTa5i9lr6vJZRdvDyqNmRPRM7FWMOj0aOS5NHM8,186
3
+ bm_preprocessing/importer/_module_printer.py,sha256=rWR9jyfr7ACt5qVeZ4uRHfaZLOb4oKzSIP319rzp4Po,649
4
+ bm_preprocessing/importer/DM/__init__.py,sha256=NytK_IN5e7Lxdfh_T1tjLQ0JFrcLoupjdGOJpU5FwWw,180
5
+ bm_preprocessing/importer/DM/agg.py,sha256=QE1GMhYzSDQ9fs1Ymjy80Bwfsfv7mOlm9tWcgCDljx4,211
6
+ bm_preprocessing/importer/DM/dbscan.py,sha256=P-pnTG7PX2E4Z6yCBVgYhodVSTrrYvWNbJEFHW8744M,220
7
+ bm_preprocessing/importer/DM/finals.py,sha256=ldHMMjmRaG8ToFoc0ALzpLQ2nqaqIe-gjIg7_Odrw7g,220
8
+ bm_preprocessing/importer/DM/gsp.py,sha256=tSb0uOc_eistbdpgeykdjLQoOCZrhaT9rVHrFrpsoWs,211
9
+ bm_preprocessing/importer/DM/test.py,sha256=tS-NUS0fIxmEyc7B_YhLh6duT8k87x0wCfq0O1oG1VM,214
10
+ bm_preprocessing/importer/Finals/__init__.py,sha256=chN1hYPGdRM1GX4q54APyDgOSBWNKcSXaexnxnVYdUA,195
11
+ bm_preprocessing/importer/Finals/kaadhal.py,sha256=7gaY85G_sRr3XlbLmbPMUHUjz2S0vkBHfTkF-xjruiE,231
12
+ bm_preprocessing/importer/Finals/raaka.py,sha256=OdmLlgq4u-OpvdZe8ruRlD0g4jxpg8USqIdN2YJ_6lo,225
13
+ bm_preprocessing/importer/Finals/seedan.py,sha256=fEXKRMA_rnDyM3iN5Ka76pv9SaDfeVht_tHK-oHOS2U,228
14
+ bm_preprocessing/importer/Finals/vikram.py,sha256=cijp6K4snmdD_o4vbUceWvN-w7Z1qDmQkOWa1PzHBlM,228
15
+ bm_preprocessing/importer/IR/__init__.py,sha256=ibuzqcqquie8vom3Yj_J4pnjlosUvEwVW2tyFz5d8wo,196
16
+ bm_preprocessing/importer/IR/finals.py,sha256=aTI2-mVhDBInWYjNMdpPbwId_bK9jrXMTML22F2RzNk,220
17
+ bm_preprocessing/importer/IR/pagerank.py,sha256=XH8NxjKm1lczJ0PrQV1QZujogNn5gMJN6pUlth9itxw,226
18
+ bm_preprocessing/importer/IR/recommenders_pca.py,sha256=5Ui3ZK6iVXEAVDlC72LgPKjB0uyW1OSl7F0kz4QYfek,258
19
+ bm_preprocessing/importer/IR/test.py,sha256=RlRBb6usbeKuWUtrs6QKiahK5FZ4p4A7Y-Qoy4eQjMU,214
20
+ bm_preprocessing/importer/PY/__init__.py,sha256=0Dsh94cSFgb1N4We7zwYzQDR3YHC-Y7pcDvHR-9FKAs,105
21
+ bm_preprocessing/importer/PY/lib_doc.py,sha256=z3H3JO5GSVrDJZy8PEzSGlM_tq_dH5E-4sCjzbPHvzg,223
22
+ bm_preprocessing/importer/PY/python_doc.py,sha256=5HCNqn4Y3jEYTt97koY34LAKODQX7HcB93aApbQL54w,232
23
+ bm_preprocessing/src/__init__.py,sha256=Y4g952oNndkD4fX7aRnbrOVTObZOyhfM7yYewiZaR-U,53
24
+ bm_preprocessing/src/DM/__init__.py,sha256=tGoE9Z5I3cdOxyosBmA5fp6Keh2sO2o0hbhf3SGeBEM,36
25
+ bm_preprocessing/src/DM/agg.py,sha256=IptAnnInn37qkADRGLWL41XqMz99kksIwBPAPVCLeaA,6816
26
+ bm_preprocessing/src/DM/dbscan.py,sha256=j06WbIN-nws9D5BKnEQW_oUl5QZPqPpZdOzKWqhXz5A,6337
27
+ bm_preprocessing/src/DM/finals.py,sha256=gIj5e37rvfvsotomjWSTBFd8gX6ax9XA7loadMuyu0g,337
28
+ bm_preprocessing/src/DM/gsp.py,sha256=EYjEe245MMExdyQQxPrsVnkr_g9-sTOGyAoBZVAyXVs,10944
29
+ bm_preprocessing/src/DM/test.py,sha256=sgNQvC72u767xjqeBHsJxHvAaULv0s7d8frLr7vacvg,342
30
+ bm_preprocessing/src/Finals/__init__.py,sha256=SQpE3o4m6Dkcrm8bv-Qaf29wo7a-sDIOwsB-nN-sPHo,31
31
+ bm_preprocessing/src/Finals/kaadhal.py,sha256=XsnzDLcnK6zrGFwDoFYhIsVb2jfLlfYuX-c7Dh_sJX0,38558
32
+ bm_preprocessing/src/Finals/raaka.py,sha256=PzgfzO_FigPZ9J-PTgWO2KcReOKQFjQtJxIGsQC5bZk,40431
33
+ bm_preprocessing/src/Finals/seedan.py,sha256=6g7wDwpsBv0BavLsR-U4AmjlDRrcelTZaWGlAP7_Yo8,31711
34
+ bm_preprocessing/src/Finals/vikram.py,sha256=GbqoPpa4dg4Q9gGDZF4leUfGzrVwMbId1f73-ooU7cI,12841
35
+ bm_preprocessing/src/IR/__init__.py,sha256=ngrkbmeRSLuVoNAztpNYt6WxYhftH0PSNP72sb7TbaE,27
36
+ bm_preprocessing/src/IR/finals.py,sha256=XvPbcxUo5ib-KkhClTKHgAcNYX3w9KSgMrgdCXStVCQ,217
37
+ bm_preprocessing/src/IR/pagerank.py,sha256=UdRvgkezmYrgG_RK_h3kC9-qnZDcoE7tAw_VYqkuRvo,2989
38
+ bm_preprocessing/src/IR/recommenders_pca.py,sha256=pBz5HDwtyGavITDkvBVJtGgHcyl1sGgmcvkCOt9VNuw,14181
39
+ bm_preprocessing/src/IR/test.py,sha256=tdS9WKDBl7-zMu-ETnVSYgAUkqR-nSnB7Sb7AlnJNO4,222
40
+ bm_preprocessing/src/PY/__init__.py,sha256=v-U6e8H9-BNFcb24QEDYMPKiTaFT_q_NB7WGBFpA_jc,39
41
+ bm_preprocessing/src/PY/lib_doc.py,sha256=7zPuq5Ez_NPmtaeBRkKq2vA9mU6Tgf4bFuDqgZ9TnjI,11302
42
+ bm_preprocessing/src/PY/python_doc.py,sha256=iPKngoIgDRtrXxao5WVlx1ZaLmQIs5cf3x39aHLG34o,7088
43
+ itertoolkit-1.5.0.dist-info/METADATA,sha256=gJLZSTVSGPE8mwJ_2ayQM2_VMJe4zArD_go8KbbUALA,3918
44
+ itertoolkit-1.5.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
45
+ itertoolkit-1.5.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any