bm-preprocessing 1.3.5__tar.gz → 1.3.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. bm_preprocessing-1.3.8/PKG-INFO +10 -0
  2. bm_preprocessing-1.3.8/PY/__init__.py +0 -0
  3. bm_preprocessing-1.3.8/PY/_module_printer.py +11 -0
  4. bm_preprocessing-1.3.8/PY/lib_doc.py +297 -0
  5. {bm_preprocessing-1.3.5/src/bm_preprocessing/DM/sources → bm_preprocessing-1.3.8/PY}/python_doc.py +54 -49
  6. bm_preprocessing-1.3.8/README.md +0 -0
  7. bm_preprocessing-1.3.8/bm_preprocessing.egg-info/PKG-INFO +10 -0
  8. bm_preprocessing-1.3.8/bm_preprocessing.egg-info/SOURCES.txt +11 -0
  9. bm_preprocessing-1.3.8/bm_preprocessing.egg-info/dependency_links.txt +1 -0
  10. bm_preprocessing-1.3.8/bm_preprocessing.egg-info/requires.txt +4 -0
  11. bm_preprocessing-1.3.8/bm_preprocessing.egg-info/top_level.txt +1 -0
  12. bm_preprocessing-1.3.8/pyproject.toml +12 -0
  13. bm_preprocessing-1.3.8/setup.cfg +4 -0
  14. bm_preprocessing-1.3.5/.gitignore +0 -221
  15. bm_preprocessing-1.3.5/INSTALLATION.md +0 -35
  16. bm_preprocessing-1.3.5/PKG-INFO +0 -257
  17. bm_preprocessing-1.3.5/README.md +0 -243
  18. bm_preprocessing-1.3.5/USAGE.md +0 -127
  19. bm_preprocessing-1.3.5/pyproject.toml +0 -22
  20. bm_preprocessing-1.3.5/src/bm_preprocessing/DM/__init__.py +0 -21
  21. bm_preprocessing-1.3.5/src/bm_preprocessing/DM/adaboost.py +0 -30
  22. bm_preprocessing-1.3.5/src/bm_preprocessing/DM/all.py +0 -30
  23. bm_preprocessing-1.3.5/src/bm_preprocessing/DM/all_vis.py +0 -30
  24. bm_preprocessing-1.3.5/src/bm_preprocessing/DM/apriori.py +0 -30
  25. bm_preprocessing-1.3.5/src/bm_preprocessing/DM/bagging.py +0 -30
  26. bm_preprocessing-1.3.5/src/bm_preprocessing/DM/hash.py +0 -30
  27. bm_preprocessing-1.3.5/src/bm_preprocessing/DM/hunts.py +0 -30
  28. bm_preprocessing-1.3.5/src/bm_preprocessing/DM/hunts_test.py +0 -30
  29. bm_preprocessing-1.3.5/src/bm_preprocessing/DM/id3.py +0 -30
  30. bm_preprocessing-1.3.5/src/bm_preprocessing/DM/id3_test.py +0 -30
  31. bm_preprocessing-1.3.5/src/bm_preprocessing/DM/lib_doc.py +0 -30
  32. bm_preprocessing-1.3.5/src/bm_preprocessing/DM/metrics.py +0 -30
  33. bm_preprocessing-1.3.5/src/bm_preprocessing/DM/preprocessing.py +0 -30
  34. bm_preprocessing-1.3.5/src/bm_preprocessing/DM/python_doc.py +0 -30
  35. bm_preprocessing-1.3.5/src/bm_preprocessing/DM/sources/adaboost.py +0 -69
  36. bm_preprocessing-1.3.5/src/bm_preprocessing/DM/sources/all.py +0 -308
  37. bm_preprocessing-1.3.5/src/bm_preprocessing/DM/sources/all_vis.py +0 -368
  38. bm_preprocessing-1.3.5/src/bm_preprocessing/DM/sources/apriori.py +0 -113
  39. bm_preprocessing-1.3.5/src/bm_preprocessing/DM/sources/bagging.py +0 -173
  40. bm_preprocessing-1.3.5/src/bm_preprocessing/DM/sources/data.csv +0 -11
  41. bm_preprocessing-1.3.5/src/bm_preprocessing/DM/sources/hash.py +0 -161
  42. bm_preprocessing-1.3.5/src/bm_preprocessing/DM/sources/heart.csv +0 -304
  43. bm_preprocessing-1.3.5/src/bm_preprocessing/DM/sources/hunts.py +0 -96
  44. bm_preprocessing-1.3.5/src/bm_preprocessing/DM/sources/hunts_test.py +0 -101
  45. bm_preprocessing-1.3.5/src/bm_preprocessing/DM/sources/id3.py +0 -134
  46. bm_preprocessing-1.3.5/src/bm_preprocessing/DM/sources/id3_test.py +0 -148
  47. bm_preprocessing-1.3.5/src/bm_preprocessing/DM/sources/lib_doc.py +0 -261
  48. bm_preprocessing-1.3.5/src/bm_preprocessing/DM/sources/metrics.py +0 -240
  49. bm_preprocessing-1.3.5/src/bm_preprocessing/DM/sources/preprocessing.py +0 -42
  50. bm_preprocessing-1.3.5/src/bm_preprocessing/DM/sources/tennis.csv +0 -15
  51. bm_preprocessing-1.3.5/src/bm_preprocessing/DM/sources/test_all.py +0 -400
  52. bm_preprocessing-1.3.5/src/bm_preprocessing/DM/sources/worksheet.py +0 -305
  53. bm_preprocessing-1.3.5/src/bm_preprocessing/DM/test_all.py +0 -30
  54. bm_preprocessing-1.3.5/src/bm_preprocessing/DM/worksheet.py +0 -30
  55. bm_preprocessing-1.3.5/src/bm_preprocessing/IR/__init__.py +0 -9
  56. bm_preprocessing-1.3.5/src/bm_preprocessing/IR/all.py +0 -30
  57. bm_preprocessing-1.3.5/src/bm_preprocessing/IR/all_vis.py +0 -30
  58. bm_preprocessing-1.3.5/src/bm_preprocessing/IR/eval_metrics.py +0 -26
  59. bm_preprocessing-1.3.5/src/bm_preprocessing/IR/ndd.py +0 -26
  60. bm_preprocessing-1.3.5/src/bm_preprocessing/IR/rel.py +0 -26
  61. bm_preprocessing-1.3.5/src/bm_preprocessing/IR/sources/all.py +0 -255
  62. bm_preprocessing-1.3.5/src/bm_preprocessing/IR/sources/all_vis.py +0 -294
  63. bm_preprocessing-1.3.5/src/bm_preprocessing/IR/sources/eval_metrics.py +0 -224
  64. bm_preprocessing-1.3.5/src/bm_preprocessing/IR/sources/ndd.py +0 -105
  65. bm_preprocessing-1.3.5/src/bm_preprocessing/IR/sources/rel.py +0 -116
  66. bm_preprocessing-1.3.5/src/bm_preprocessing/__init__.py +0 -5
@@ -0,0 +1,10 @@
1
+ Metadata-Version: 2.4
2
+ Name: bm-preprocessing
3
+ Version: 1.3.8
4
+ Summary: Add your description here
5
+ Requires-Python: >=3.11
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: matplotlib>=3.10.8
8
+ Requires-Dist: numpy>=2.4.4
9
+ Requires-Dist: pandas>=3.0.2
10
+ Requires-Dist: seaborn>=0.13.2
File without changes
@@ -0,0 +1,11 @@
1
+ import types
2
+
3
+
4
+ class PrintableModule(types.ModuleType):
5
+ """A module subclass that prints its source when print() is called."""
6
+
7
+ def __repr__(self):
8
+ return self._source_content
9
+
10
+ def __str__(self):
11
+ return self._source_content
@@ -0,0 +1,297 @@
1
+ import sys
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+ from bm_preprocessing.PY._module_printer import PrintableModule
7
+
8
+ # ===========================================================
9
+ # 📂 FILE READING & INITIAL INSPECTION
10
+ # ===========================================================
11
+
12
+ # Load data (CSV, Excel, JSON)
13
+ df = pd.read_csv("data.csv") # Load CSV
14
+ df = pd.read_excel("data.xlsx") # Load Excel
15
+ df = pd.read_json("data.json") # Load JSON
16
+
17
+ # Quick Inspection
18
+ print(df.head())
19
+ print(df.info())
20
+ print(df.describe()) # Basic stats & info
21
+ print(df.shape) # (rows, columns)
22
+ print(df.columns.tolist()) # List all column names
23
+ print(df.dtypes) # Data types of each column
24
+ print(df.isnull().sum()) # Count NaNs per column
25
+ print(df.nunique()) # Unique values per column
26
+ print(df.value_counts("col")) # Frequency of each value
27
+
28
+ # ===========================================================
29
+ # 🔍 SELECTION & MANIPULATION
30
+ # ===========================================================
31
+
32
+ # Selecting rows and columns
33
+ cols = df[["col1", "col2"]] # Select multiple columns
34
+ rows = df.iloc[0:10] # Select first 10 rows by index
35
+ filtered = df[df["age"] > 25] # Select rows based on condition
36
+ cell = df.loc[0, "col1"] # Select specific cell (label-based)
37
+ cell = df.iloc[0, 0] # Select specific cell (index-based)
38
+ filtered = df.query("age > 25 and city == 'NYC'") # Query-based filtering
39
+ sampled = df.sample(n=5, random_state=42) # Random sample of rows
40
+
41
+ # Sorting
42
+ sorted_df = df.sort_values("col", ascending=False) # Sort by column descending
43
+ sorted_df = df.sort_values(
44
+ ["col1", "col2"], ascending=[True, False]
45
+ ) # Multi-column sort
46
+
47
+ # Stacking & Merging
48
+ df1 = pd.DataFrame({"id": [1, 2], "val1": ["A", "B"]})
49
+ df2 = pd.DataFrame({"id": [1, 2], "val2": ["C", "D"]})
50
+ df_stacked = pd.concat([df1, df2], axis=0) # Stack vertically (rows)
51
+ df_wide = pd.concat([df1, df2], axis=1) # Stack horizontally (columns)
52
+ df_merged = pd.merge(
53
+ df1, df2, on="id", how="inner"
54
+ ) # SQL-like join (inner/left/right/outer)
55
+
56
+ # ===========================================================
57
+ # 🛠️ DATA CLEANING & PROCESSING
58
+ # ===========================================================
59
+
60
+ # Handling Missing Values
61
+ df["col"].fillna(df["col"].mean(), inplace=True) # Impute by mean (one-liner)
62
+ df["col"].fillna(df["col"].median(), inplace=True) # Impute by median
63
+ df["col"].fillna(df["col"].mode()[0], inplace=True) # Impute by mode (categorical)
64
+ df.dropna(axis=0, inplace=True) # Drop rows with any NaN values
65
+ df.dropna(
66
+ subset=["col1", "col2"], inplace=True
67
+ ) # Drop rows with NaN in specific columns
68
+ df.ffill(inplace=True) # Forward fill NaN values
69
+ df.bfill(inplace=True) # Backward fill NaN values
70
+
71
+ # Transformations
72
+ df["new_col"] = df["col"].apply(lambda x: x**2) # Apply custom function
73
+ grouped = df.groupby("category")["sales"].sum() # Groupby & aggregate
74
+ grouped = df.groupby("cat").agg(
75
+ {"sales": "sum", "qty": "mean"}
76
+ ) # Multiple aggregations
77
+ df["cat_code"] = df["category"].astype("category").cat.codes # Quick label encoding
78
+ df["col"] = df["col"].str.lower() # String lowercase
79
+ df["col"] = df["col"].str.replace("old", "new") # String replace
80
+ df["col"] = df["col"].str.strip() # Strip whitespace
81
+ df["binned"] = pd.cut(df["age"], bins=[0, 18, 35, 60, 100]) # Binning/discretization
82
+ df = pd.get_dummies(df, columns=["city"], drop_first=True) # One-hot encode columns
83
+ df.rename(columns={"old_name": "new_name"}, inplace=True) # Rename columns
84
+ df.drop(columns=["col1", "col2"], inplace=True) # Drop columns
85
+ df.drop_duplicates(inplace=True) # Remove duplicate rows
86
+
87
+ # Type Conversions
88
+ df["col"] = df["col"].astype(int) # Convert column type
89
+ df["date"] = pd.to_datetime(df["date_str"]) # Parse dates
90
+
91
+ # ===========================================================
92
+ # 🔢 NUMPY ESSENTIALS
93
+ # ===========================================================
94
+
95
+ arr = np.array([1, 2, 3]) # Create array
96
+ reshaped = arr.reshape(1, -1) # Reshape for sklearn (2D)
97
+ mean_val = np.mean(arr)
98
+ std_val = np.std(arr) # Basic stats
99
+ mask = arr[arr > 2] # Boolean indexing/filtering
100
+ zeros = np.zeros((3, 3))
101
+ ones = np.ones((3, 3)) # Zero/One matrices
102
+ eye = np.eye(3) # Identity matrix
103
+ rand = np.random.rand(3, 3) # Random matrix [0,1)
104
+ dot = np.dot(arr, arr) # Dot product
105
+ norm = np.linalg.norm(arr) # Vector norm (L2)
106
+ log = np.log2(arr) # Log base 2 (entropy)
107
+ unique, counts = np.unique(arr, return_counts=True) # Unique values & counts
108
+
109
+ # ===========================================================
110
+ # 🤖 SCIKIT-LEARN PREPROCESSING
111
+ # ===========================================================
112
+
113
+ from sklearn.impute import SimpleImputer
114
+ from sklearn.model_selection import train_test_split
115
+ from sklearn.preprocessing import (LabelEncoder, MinMaxScaler, OneHotEncoder,
116
+ StandardScaler)
117
+
118
+ # Splitting Data
119
+ X = df.drop("target", axis=1)
120
+ y = df["target"]
121
+ X_train, X_test, y_train, y_test = train_test_split(
122
+ X, y, test_size=0.2, random_state=42
123
+ )
124
+
125
+ # Imputing (Standard approach)
126
+ imputed = SimpleImputer(strategy="mean").fit_transform(
127
+ df[["num_col"]]
128
+ ) # Impute missing with mean
129
+ imputed = SimpleImputer(strategy="most_frequent").fit_transform(
130
+ df[["cat"]]
131
+ ) # Impute categorical
132
+
133
+ # Scaling & Encoding
134
+ scaled = StandardScaler().fit_transform(
135
+ df[["age", "salary"]]
136
+ ) # Standardize (mean=0, std=1)
137
+ scaled = MinMaxScaler().fit_transform(df[["age", "salary"]]) # Normalize to [0, 1]
138
+ encoded = OneHotEncoder().fit_transform(df[["gender"]]).toarray() # One-hot encode
139
+ le = LabelEncoder().fit_transform(df["target"]) # Encode target labels
140
+
141
+ # Pipeline (all-in-one)
142
+ from sklearn.compose import ColumnTransformer
143
+ from sklearn.pipeline import Pipeline
144
+
145
+ processor = ColumnTransformer(
146
+ [("num", StandardScaler(), ["age"]), ("cat", OneHotEncoder(), ["city"])]
147
+ )
148
+
149
+ # ===========================================================
150
+ # ⛏️ DATA MINING (DM) ESSENTIALS
151
+ # ===========================================================
152
+
153
+ from sklearn.cluster import KMeans
154
+ from sklearn.ensemble import (AdaBoostClassifier, BaggingClassifier,
155
+ RandomForestClassifier)
156
+ from sklearn.metrics import (accuracy_score, auc, classification_report,
157
+ confusion_matrix, f1_score,
158
+ precision_recall_curve, precision_score,
159
+ recall_score, roc_curve)
160
+ from sklearn.preprocessing import label_binarize
161
+ from sklearn.tree import DecisionTreeClassifier
162
+
163
+ # --- Decision Trees ---
164
+ dt = DecisionTreeClassifier(random_state=42).fit(
165
+ X_train, y_train
166
+ ) # Train decision tree
167
+ y_pred = dt.predict(X_test) # Predict
168
+ print(accuracy_score(y_test, y_pred)) # Accuracy
169
+
170
+ # --- Ensemble Methods ---
171
+ bag = BaggingClassifier(n_estimators=10, random_state=42).fit(
172
+ X_train, y_train
173
+ ) # Bagging
174
+ ada = AdaBoostClassifier(n_estimators=50, random_state=42).fit(
175
+ X_train, y_train
176
+ ) # AdaBoost
177
+ rf = RandomForestClassifier(n_estimators=100, random_state=42).fit(
178
+ X_train, y_train
179
+ ) # Random Forest
180
+
181
+ # --- Clustering ---
182
+ kmeans = KMeans(n_clusters=3, random_state=42).fit(X) # K-Means clustering
183
+ labels = kmeans.labels_ # Cluster labels
184
+ centers = kmeans.cluster_centers_ # Cluster centroids
185
+
186
+ # --- Classification Metrics ---
187
+ print(accuracy_score(y_test, y_pred)) # Accuracy = (TP+TN)/(TP+TN+FP+FN)
188
+ print(precision_score(y_test, y_pred, average="weighted")) # Precision = TP/(TP+FP)
189
+ print(recall_score(y_test, y_pred, average="weighted")) # Recall = TP/(TP+FN)
190
+ print(f1_score(y_test, y_pred, average="weighted")) # F1 = 2*(P*R)/(P+R)
191
+ print(confusion_matrix(y_test, y_pred)) # Confusion matrix
192
+ print(classification_report(y_test, y_pred)) # Full report
193
+
194
+ # --- ROC & AUC ---
195
+ y_bin = label_binarize(y_test, classes=[0, 1, 2]) # Binarize for multiclass ROC
196
+ y_proba = dt.predict_proba(X_test) # Predicted probabilities
197
+ fpr, tpr, _ = roc_curve(y_bin[:, 0], y_proba[:, 0]) # ROC curve (per class)
198
+ roc_auc = auc(fpr, tpr) # AUC score
199
+
200
+ # --- Apriori / Association Rules (manual) ---
201
+ from itertools import combinations
202
+
203
+ support = lambda itemset, txns: sum(1 for t in txns if itemset.issubset(t)) / len(txns)
204
+ transactions = [
205
+ ["milk", "bread"],
206
+ ["milk", "diaper", "beer", "bread"],
207
+ ["milk", "bread", "diaper"],
208
+ ["bread", "diaper"],
209
+ ]
210
+ items = set(item for t in transactions for item in t)
211
+ freq_items = {frozenset([i]) for t in transactions for i in t} # C1 candidates
212
+ pairs = [
213
+ frozenset(c) for c in combinations(sorted(items), 2)
214
+ ] # Generate candidate pairs
215
+
216
+ # --- Entropy & Information Gain (ID3) ---
217
+ entropy = lambda probs: -sum(p * np.log2(p) for p in probs if p > 0) # Shannon entropy
218
+ info_gain = lambda parent_ent, children: parent_ent - sum(
219
+ w * entropy(c) for w, c in children
220
+ )
221
+ gini = lambda probs: 1 - sum(p**2 for p in probs) # Gini impurity
222
+
223
+ # ===========================================================
224
+ # 🔎 INFORMATION RETRIEVAL (IR) ESSENTIALS
225
+ # ===========================================================
226
+
227
+ import math
228
+ from collections import Counter
229
+
230
+ # --- Term Frequency (TF) ---
231
+ # TF(t, d) = count(t in d) / total_terms_in_d
232
+ tf = lambda term, doc: doc.count(term) / len(doc) # Term frequency
233
+
234
+ # --- Inverse Document Frequency (IDF) ---
235
+ # IDF(t) = log(N / df(t)) where N = total docs, df(t) = docs containing t
236
+ idf = lambda term, docs: math.log(len(docs) / sum(1 for d in docs if term in d))
237
+
238
+ # --- TF-IDF ---
239
+ # TF-IDF(t, d) = TF(t, d) * IDF(t)
240
+ tfidf = lambda term, doc, docs: tf(term, doc) * idf(term, docs) # TF-IDF score
241
+
242
+ # --- BM25 ---
243
+ # BM25(t, d) = IDF(t) * (TF * (k1 + 1)) / (TF + k1 * (1 - b + b * |d| / avgdl))
244
+ k1 = 1.5
245
+ b = 0.75 # BM25 parameters
246
+ docs = [
247
+ ["term1", "term2"],
248
+ ["term2", "term3"],
249
+ ["term1", "term3", "term4"],
250
+ ] # Sample documents
251
+ avgdl = np.mean([len(d) for d in docs]) # Average document length
252
+
253
+ # --- Boolean Retrieval ---
254
+ # AND: set(doc1_terms) & set(doc2_terms)
255
+ # OR: set(doc1_terms) | set(doc2_terms)
256
+ # NOT: set(all_terms) - set(doc_terms)
257
+ bool_and = lambda q_terms, doc: all(t in doc for t in q_terms) # Boolean AND query
258
+ bool_or = lambda q_terms, doc: any(t in doc for t in q_terms) # Boolean OR query
259
+
260
+ # --- Cosine Similarity ---
261
+ # cos(A, B) = (A · B) / (||A|| * ||B||)
262
+ cosine_sim = lambda a, b: np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
263
+
264
+ # --- Jaccard Similarity ---
265
+ # J(A, B) = |A ∩ B| / |A ∪ B|
266
+ jaccard = lambda a, b: len(a & b) / len(a | b) # Jaccard similarity (sets)
267
+
268
+ # --- Tokenization & Text Processing ---
269
+ tokens = "hello world foo bar".lower().split() # Basic tokenization
270
+ vocab = set(tokens) # Vocabulary
271
+ bow = Counter(tokens) # Bag of words
272
+ from sklearn.feature_extraction.text import TfidfVectorizer
273
+
274
+ tfidf_matrix = TfidfVectorizer().fit_transform(
275
+ ["doc1 text", "doc2 text"]
276
+ ) # TF-IDF vectorizer
277
+
278
+ # --- Inverted Index ---
279
+ inv_index = {} # Build inverted index
280
+ for doc_id, doc in enumerate(docs):
281
+ for term in doc:
282
+ inv_index.setdefault(term, set()).add(doc_id)
283
+
284
+ # --- Precision & Recall (IR) ---
285
+ # Precision@k = relevant_in_top_k / k
286
+ # Recall@k = relevant_in_top_k / total_relevant
287
+ precision_at_k = lambda retrieved, relevant, k: len(set(retrieved[:k]) & relevant) / k
288
+ recall_at_k = lambda retrieved, relevant, k: len(set(retrieved[:k]) & relevant) / len(
289
+ relevant
290
+ )
291
+
292
+
293
+ with open(__file__, "r", encoding="utf-8") as _f:
294
+ _source_content = _f.read()
295
+
296
+ sys.modules[__name__].__class__ = PrintableModule
297
+ sys.modules[__name__]._source_content = _source_content
@@ -8,21 +8,21 @@ A quick reference guide for core Python concepts, data structures, and features.
8
8
  # =============================================================================
9
9
  # 1. LISTS (Mutable, Ordered)
10
10
  # =============================================================================
11
- my_list = [1, 2, 3, 'a', 'b']
11
+ my_list = [1, 2, 3, "a", "b"]
12
12
 
13
13
  # Operations
14
- my_list.append(4) # Add to end: [1, 2, 3, 'a', 'b', 4]
15
- my_list.insert(0, 0) # Insert at index: [0, 1, 2, 3, 'a', 'b', 4]
16
- my_list.extend([5, 6]) # Append multiple: [0, 1, 2, 3, 'a', 'b', 4, 5, 6]
17
- my_list.pop() # Remove & return last item (6)
18
- my_list.pop(1) # Remove & return item at index 1 (1)
19
- my_list.remove('a') # Remove first occurrence of 'a'
20
- my_list.reverse() # Reverse in place
14
+ my_list.append(4) # Add to end: [1, 2, 3, 'a', 'b', 4]
15
+ my_list.insert(0, 0) # Insert at index: [0, 1, 2, 3, 'a', 'b', 4]
16
+ my_list.extend([5, 6]) # Append multiple: [0, 1, 2, 3, 'a', 'b', 4, 5, 6]
17
+ my_list.pop() # Remove & return last item (6)
18
+ my_list.pop(1) # Remove & return item at index 1 (1)
19
+ my_list.remove("a") # Remove first occurrence of 'a'
20
+ my_list.reverse() # Reverse in place
21
21
  # my_list.sort() # Sort in place (requires same types)
22
22
  # sorted(my_list) # Return new sorted list
23
- my_list.clear() # Empty the list
24
- count = my_list.count(2) # Count occurrences
25
- idx = my_list.index(3) # Find index of first occurrence
23
+ my_list.clear() # Empty the list
24
+ count = my_list.count(2) # Count occurrences
25
+ idx = my_list.index(3) # Find index of first occurrence
26
26
 
27
27
  # Slicing: list[start:stop:step]
28
28
  # my_list[1:4] (index 1 to 3), my_list[::-1] (reverse)
@@ -31,67 +31,67 @@ idx = my_list.index(3) # Find index of first occurrence
31
31
  # 2. SETS (Mutable, Unordered, Unique Elements)
32
32
  # =============================================================================
33
33
  my_set = {1, 2, 3}
34
- empty_set = set() # Note: {} creates an empty dict, not a set
34
+ empty_set = set() # Note: {} creates an empty dict, not a set
35
35
 
36
36
  # Operations
37
- my_set.add(4) # Add element
38
- my_set.update([5, 6]) # Add multiple elements
39
- my_set.remove(6) # Remove element (raises KeyError if not found)
40
- my_set.discard(10) # Remove element (safe, no error if not found)
41
- my_set.pop() # Remove & return arbitrary element
42
- my_set.clear() # Empty the set
37
+ my_set.add(4) # Add element
38
+ my_set.update([5, 6]) # Add multiple elements
39
+ my_set.remove(6) # Remove element (raises KeyError if not found)
40
+ my_set.discard(10) # Remove element (safe, no error if not found)
41
+ my_set.pop() # Remove & return arbitrary element
42
+ my_set.clear() # Empty the set
43
43
 
44
44
  set_a, set_b = {1, 2}, {2, 3}
45
- union = set_a | set_b # {1, 2, 3} (or set_a.union(set_b))
46
- intersection = set_a & set_b # {2} (or set_a.intersection(set_b))
47
- diff = set_a - set_b # {1} (or set_a.difference(set_b))
48
- sym_diff = set_a ^ set_b # {1, 3} (or set_a.symmetric_difference(set_b))
45
+ union = set_a | set_b # {1, 2, 3} (or set_a.union(set_b))
46
+ intersection = set_a & set_b # {2} (or set_a.intersection(set_b))
47
+ diff = set_a - set_b # {1} (or set_a.difference(set_b))
48
+ sym_diff = set_a ^ set_b # {1, 3} (or set_a.symmetric_difference(set_b))
49
49
 
50
50
  # =============================================================================
51
51
  # 3. TUPLES (Immutable, Ordered)
52
52
  # =============================================================================
53
53
  my_tuple = (1, 2, 3, 2)
54
- single_tuple = (1,) # Comma needed for single-element tuple
54
+ single_tuple = (1,) # Comma needed for single-element tuple
55
55
 
56
56
  # Operations (Very limited since immutable)
57
- count = my_tuple.count(2) # Count occurrences (2)
58
- idx = my_tuple.index(3) # Find index of first occurrence (2)
57
+ count = my_tuple.count(2) # Count occurrences (2)
58
+ idx = my_tuple.index(3) # Find index of first occurrence (2)
59
59
  # Tuples support unpacking: a, b, c, d = my_tuple
60
60
 
61
61
  # =============================================================================
62
62
  # 4. DICTIONARIES (Mutable, Key-Value Pairs, Unordered before Python 3.7)
63
63
  # =============================================================================
64
- my_dict = {'name': 'Alice', 'age': 25}
64
+ my_dict = {"name": "Alice", "age": 25}
65
65
 
66
66
  # Operations
67
- my_dict['city'] = 'NYC' # Add or update key
68
- val = my_dict.get('age') # Safe get (returns None if not found, instead of KeyError)
69
- val = my_dict.get('x', 0) # Safe get with default value
70
- keys = my_dict.keys() # dict_keys(['name', 'age', 'city'])
71
- values = my_dict.values() # dict_values(['Alice', 25, 'NYC'])
72
- items = my_dict.items() # dict_items([('name', 'Alice'), ...])
67
+ my_dict["city"] = "NYC" # Add or update key
68
+ val = my_dict.get("age") # Safe get (returns None if not found, instead of KeyError)
69
+ val = my_dict.get("x", 0) # Safe get with default value
70
+ keys = my_dict.keys() # dict_keys(['name', 'age', 'city'])
71
+ values = my_dict.values() # dict_values(['Alice', 25, 'NYC'])
72
+ items = my_dict.items() # dict_items([('name', 'Alice'), ...])
73
73
 
74
74
  # Removal
75
- popped_val = my_dict.pop('age') # Remove key 'age' and return value
76
- popped_item = my_dict.popitem() # Remove & return last key-value pair as tuple
75
+ popped_val = my_dict.pop("age") # Remove key 'age' and return value
76
+ popped_item = my_dict.popitem() # Remove & return last key-value pair as tuple
77
77
  # del my_dict['name'] # Delete key
78
- my_dict.clear() # Empty dict
79
- my_dict.update({'a': 1, 'b': 2}) # Merge / Update with another dict
78
+ my_dict.clear() # Empty dict
79
+ my_dict.update({"a": 1, "b": 2}) # Merge / Update with another dict
80
80
 
81
81
  # =============================================================================
82
82
  # 5. LIST / DICT / SET COMPREHENSIONS
83
83
  # =============================================================================
84
84
  # List Comprehension: [expression for item in iterable if condition]
85
- squares = [x**2 for x in range(10) if x % 2 == 0] # [0, 4, 16, 36, 64]
85
+ squares = [x**2 for x in range(10) if x % 2 == 0] # [0, 4, 16, 36, 64]
86
86
 
87
87
  # Dict Comprehension: {key_expr: val_expr for item in iterable if condition}
88
- sq_dict = {x: x**2 for x in range(5)} # {0: 0, 1: 1, 2: 4, 3: 9, 4: 16}
88
+ sq_dict = {x: x**2 for x in range(5)} # {0: 0, 1: 1, 2: 4, 3: 9, 4: 16}
89
89
 
90
90
  # Set Comprehension: {expression for item in iterable if condition}
91
- sq_set = {x**2 for x in [-1, 1, 2]} # {1, 4}
91
+ sq_set = {x**2 for x in [-1, 1, 2]} # {1, 4}
92
92
 
93
93
  # Generator Expression: (expression for item in iterable if condition)
94
- gen = (x**2 for x in range(10)) # Lazy evaluation
94
+ gen = (x**2 for x in range(10)) # Lazy evaluation
95
95
 
96
96
  # =============================================================================
97
97
  # 6. LAMBDA FUNCTIONS, MAP, FILTER, REDUCE
@@ -102,24 +102,27 @@ print(add(2, 3)) # 5
102
102
 
103
103
  nums = [1, 2, 3, 4]
104
104
  # map: apply function to all items
105
- mapped = list(map(lambda x: x*2, nums)) # [2, 4, 6, 8]
105
+ mapped = list(map(lambda x: x * 2, nums)) # [2, 4, 6, 8]
106
106
 
107
107
  # filter: keep items where function returns True
108
- filtered = list(filter(lambda x: x % 2 == 0, nums)) # [2, 4]
108
+ filtered = list(filter(lambda x: x % 2 == 0, nums)) # [2, 4]
109
109
 
110
110
  # reduce (requires functools): cumulative application
111
111
  from functools import reduce
112
- product = reduce(lambda x, y: x * y, nums) # 24
112
+
113
+ product = reduce(lambda x, y: x * y, nums) # 24
113
114
 
114
115
  # Sort with lambda key
115
116
  words = ["apple", "banana", "cherry"]
116
- words.sort(key=lambda w: len(w)) # Sort by length
117
+ words.sort(key=lambda w: len(w)) # Sort by length
118
+
117
119
 
118
120
  # =============================================================================
119
121
  # 7. CLASSES AND OBJECTS (OOP)
120
122
  # =============================================================================
121
123
  class Animal:
122
124
  """Base class for animals."""
125
+
123
126
  species_count = 0 # Class attribute
124
127
 
125
128
  def __init__(self, name):
@@ -140,18 +143,20 @@ class Animal:
140
143
  """Static method: no implicit self or cls args"""
141
144
  return True
142
145
 
146
+
143
147
  # Inheritance
144
148
  class Dog(Animal):
145
149
  def __init__(self, name, breed):
146
150
  super().__init__(name) # Call parent constructor
147
151
  self.breed = breed
148
-
149
- def speak(self): # Method Overriding
152
+
153
+ def speak(self): # Method Overriding
150
154
  return "Woof!"
151
155
 
156
+
152
157
  dog = Dog("Buddy", "Golden Retriever")
153
- print(dog.speak()) # "Woof!"
154
- print(Animal.get_count()) # 1
158
+ print(dog.speak()) # "Woof!"
159
+ print(Animal.get_count()) # 1
155
160
 
156
161
  # =============================================================================
157
162
  # 8. FILE HANDLING
@@ -165,7 +170,7 @@ with open("example.txt", "w", encoding="utf-8") as file:
165
170
 
166
171
  # Read from file
167
172
  with open("example.txt", "r", encoding="utf-8") as file:
168
- content = file.read() # Read entire file
173
+ content = file.read() # Read entire file
169
174
  # file.seek(0) # Reset cursor to start
170
175
  # lines = file.readlines() # Read lines into a list
171
176
  # for line in file: # Iterate line by line (memory efficient)
File without changes
@@ -0,0 +1,10 @@
1
+ Metadata-Version: 2.4
2
+ Name: bm-preprocessing
3
+ Version: 1.3.8
4
+ Summary: Add your description here
5
+ Requires-Python: >=3.11
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: matplotlib>=3.10.8
8
+ Requires-Dist: numpy>=2.4.4
9
+ Requires-Dist: pandas>=3.0.2
10
+ Requires-Dist: seaborn>=0.13.2
@@ -0,0 +1,11 @@
1
+ README.md
2
+ pyproject.toml
3
+ PY/__init__.py
4
+ PY/_module_printer.py
5
+ PY/lib_doc.py
6
+ PY/python_doc.py
7
+ bm_preprocessing.egg-info/PKG-INFO
8
+ bm_preprocessing.egg-info/SOURCES.txt
9
+ bm_preprocessing.egg-info/dependency_links.txt
10
+ bm_preprocessing.egg-info/requires.txt
11
+ bm_preprocessing.egg-info/top_level.txt
@@ -0,0 +1,4 @@
1
+ matplotlib>=3.10.8
2
+ numpy>=2.4.4
3
+ pandas>=3.0.2
4
+ seaborn>=0.13.2
@@ -0,0 +1,12 @@
1
+ [project]
2
+ name = "bm-preprocessing"
3
+ version = "1.3.8"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.11"
7
+ dependencies = [
8
+ "matplotlib>=3.10.8",
9
+ "numpy>=2.4.4",
10
+ "pandas>=3.0.2",
11
+ "seaborn>=0.13.2",
12
+ ]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+