itertoolkit 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. bm_preprocessing/__init__.py +14 -0
  2. bm_preprocessing/importer/DM/__init__.py +7 -0
  3. bm_preprocessing/importer/DM/agg.py +6 -0
  4. bm_preprocessing/importer/DM/dbscan.py +6 -0
  5. bm_preprocessing/importer/DM/finals.py +6 -0
  6. bm_preprocessing/importer/DM/gsp.py +6 -0
  7. bm_preprocessing/importer/DM/test.py +6 -0
  8. bm_preprocessing/importer/Finals/__init__.py +7 -0
  9. bm_preprocessing/importer/Finals/kaadhal.py +6 -0
  10. bm_preprocessing/importer/Finals/raaka.py +6 -0
  11. bm_preprocessing/importer/Finals/seedan.py +6 -0
  12. bm_preprocessing/importer/Finals/vikram.py +6 -0
  13. bm_preprocessing/importer/IR/__init__.py +6 -0
  14. bm_preprocessing/importer/IR/finals.py +6 -0
  15. bm_preprocessing/importer/IR/pagerank.py +6 -0
  16. bm_preprocessing/importer/IR/recommenders_pca.py +8 -0
  17. bm_preprocessing/importer/IR/test.py +6 -0
  18. bm_preprocessing/importer/PY/__init__.py +4 -0
  19. bm_preprocessing/importer/PY/lib_doc.py +6 -0
  20. bm_preprocessing/importer/PY/python_doc.py +6 -0
  21. bm_preprocessing/importer/__init__.py +8 -0
  22. bm_preprocessing/importer/_module_printer.py +23 -0
  23. bm_preprocessing/src/DM/__init__.py +1 -0
  24. bm_preprocessing/src/DM/agg.py +267 -0
  25. bm_preprocessing/src/DM/dbscan.py +218 -0
  26. bm_preprocessing/src/DM/finals.py +19 -0
  27. bm_preprocessing/src/DM/gsp.py +378 -0
  28. bm_preprocessing/src/DM/test.py +19 -0
  29. bm_preprocessing/src/Finals/__init__.py +1 -0
  30. bm_preprocessing/src/Finals/kaadhal.py +1453 -0
  31. bm_preprocessing/src/Finals/raaka.py +1338 -0
  32. bm_preprocessing/src/Finals/seedan.py +1173 -0
  33. bm_preprocessing/src/Finals/vikram.py +520 -0
  34. bm_preprocessing/src/IR/__init__.py +1 -0
  35. bm_preprocessing/src/IR/finals.py +14 -0
  36. bm_preprocessing/src/IR/pagerank.py +109 -0
  37. bm_preprocessing/src/IR/recommenders_pca.py +487 -0
  38. bm_preprocessing/src/IR/test.py +14 -0
  39. bm_preprocessing/src/PY/__init__.py +1 -0
  40. bm_preprocessing/src/PY/lib_doc.py +295 -0
  41. bm_preprocessing/src/PY/python_doc.py +177 -0
  42. bm_preprocessing/src/__init__.py +1 -0
  43. itertoolkit-1.5.0.dist-info/METADATA +120 -0
  44. itertoolkit-1.5.0.dist-info/RECORD +45 -0
  45. itertoolkit-1.5.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,520 @@
1
+ # -*- coding: utf-8 -*-
2
+ """ir dm final code sem.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1pShgFSMDWpmjt0iCqFTyic4hYGP5vgFc
8
+ """
9
+
10
+ #-------------------------------------------------------------------------------------------------------#
11
+ #IR CODE#
12
+ #-------------------------------------------------------------------------------------------------------#
13
+
14
+ import pandas as pd
15
+ import matplotlib.pyplot as plt
16
+ import seaborn as sns
17
+ from sklearn.metrics.pairwise import cosine_similarity
18
+ from sklearn.feature_extraction.text import TfidfVectorizer
19
+ from sklearn.decomposition import PCA
20
+ from sklearn.preprocessing import StandardScaler
21
+ import networkx as nx
22
+ import os
23
+
24
+ sns.set()
25
+
26
+ figures = []
27
+
28
+ print("Current Directory:", os.getcwd())
29
+ print("Files in folder:", os.listdir())
30
+
31
+ # ============================================================
32
+
33
+ # 1. CONTENT-BASED RECOMMENDER SYSTEM
34
+
35
+ # ============================================================
36
+
37
+ try:
38
+ print("\nCONTENT-BASED RECOMMENDER SYSTEM")
39
+
40
+
41
+ movies = pd.read_csv("movies.csv")
42
+ movies.columns = movies.columns.str.lower().str.strip()
43
+
44
+ title_col = [c for c in movies.columns if 'title' in c][0]
45
+ genre_col = [c for c in movies.columns if 'genre' in c][0]
46
+
47
+ tfidf = TfidfVectorizer()
48
+ tfidf_matrix = tfidf.fit_transform(movies[genre_col])
49
+ cosine_sim = cosine_similarity(tfidf_matrix)
50
+
51
+ movie_idx = 0
52
+ scores = sorted(list(enumerate(cosine_sim[movie_idx])), key=lambda x: x[1], reverse=True)[1:4]
53
+
54
+ print("Recommendations for:", movies[title_col].iloc[movie_idx])
55
+ for i in scores:
56
+ print(movies[title_col].iloc[i[0]])
57
+
58
+ fig1 = plt.figure()
59
+ sns.heatmap(cosine_sim, cmap="Blues")
60
+ plt.title("Content Similarity")
61
+ figures.append(fig1)
62
+
63
+
64
+ except Exception as e:
65
+ print("Error in Content-Based:", e)
66
+
67
+ # ============================================================
68
+
69
+ # 2. COLLABORATIVE FILTERING
70
+
71
+ # ============================================================
72
+
73
+ try:
74
+ print("\nCOLLABORATIVE FILTERING")
75
+
76
+
77
+ ratings = pd.read_csv("data.csv")
78
+ ratings.columns = ratings.columns.str.lower().str.strip()
79
+
80
+ user_col = [c for c in ratings.columns if 'user' in c][0]
81
+ item_col = [c for c in ratings.columns if 'movie' in c or 'item' in c][0]
82
+ rating_col = [c for c in ratings.columns if 'rating' in c][0]
83
+
84
+ user_item = ratings.pivot(index=user_col, columns=item_col, values=rating_col).fillna(0)
85
+ user_sim = cosine_similarity(user_item)
86
+
87
+ print("User Similarity Matrix:\n", user_sim)
88
+
89
+ fig2 = plt.figure()
90
+ sns.heatmap(user_item, cmap="coolwarm")
91
+ plt.title("User-Item Matrix")
92
+ figures.append(fig2)
93
+
94
+
95
+ except Exception as e:
96
+ print("Error in Collaborative:", e)
97
+
98
+ # ============================================================
99
+
100
+ # 3. PCA
101
+
102
+ # ============================================================
103
+
104
+ try:
105
+ print("\nPCA")
106
+
107
+ data = pd.read_csv("data.csv")
108
+ data.columns = data.columns.str.lower().str.strip()
109
+
110
+ scaler = StandardScaler()
111
+ scaled = scaler.fit_transform(data)
112
+
113
+ pca = PCA(n_components=2)
114
+ result = pca.fit_transform(scaled)
115
+
116
+ print("Reduced Data:\n", result)
117
+
118
+ fig3 = plt.figure()
119
+ plt.scatter(result[:, 0], result[:, 1])
120
+ plt.title("PCA Projection")
121
+ figures.append(fig3)
122
+
123
+
124
+ except Exception as e:
125
+ print("Error in PCA:", e)
126
+
127
+ # ============================================================
128
+
129
+ # 4. PAGERANK
130
+
131
+ # ============================================================
132
+
133
+ try:
134
+ print("\nPAGERANK")
135
+
136
+ graph = pd.read_csv("graph.csv")
137
+ graph.columns = graph.columns.str.lower().str.strip()
138
+
139
+ source_col = graph.columns[0]
140
+ target_col = graph.columns[1]
141
+
142
+ G = nx.from_pandas_edgelist(graph, source_col, target_col, create_using=nx.DiGraph())
143
+ pr = nx.pagerank(G)
144
+
145
+ print("PageRank Scores:")
146
+ for k, v in pr.items():
147
+ print(k, ":", round(v, 4))
148
+
149
+ fig4 = plt.figure()
150
+ nx.draw(G, with_labels=True, node_color='lightblue')
151
+ plt.title("Web Graph")
152
+ figures.append(fig4)
153
+
154
+ except Exception as e:
155
+ print("Error in PageRank:", e)
156
+
157
+ # ============================================================
158
+
159
+ # 5. INVERTED INDEX
160
+
161
+ # ============================================================
162
+
163
+ try:
164
+ print("\nINVERTED INDEX")
165
+
166
+ docs = pd.read_csv("documents.csv")
167
+ docs.columns = docs.columns.str.lower().str.strip()
168
+
169
+ id_col = [c for c in docs.columns if 'id' in c][0]
170
+ text_col = [c for c in docs.columns if 'content' in c or 'text' in c][0]
171
+
172
+ index = {}
173
+
174
+ for _, row in docs.iterrows():
175
+ words = str(row[text_col]).lower().split()
176
+ for w in words:
177
+ if w not in index:
178
+ index[w] = []
179
+ index[w].append(row[id_col])
180
+
181
+ query = "data"
182
+ print("Search:", query)
183
+
184
+ if query in index:
185
+ for doc_id in index[query]:
186
+ print(docs[docs[id_col] == doc_id][text_col].values[0])
187
+ else:
188
+ print("No results")
189
+
190
+
191
+ except Exception as e:
192
+ print("Error in IR:", e)
193
+
194
+ # ============================================================
195
+
196
+ # SHOW ALL PLOTS
197
+
198
+ # ============================================================
199
+ print("\nShowing all plots...")
200
+
201
+ # Activate all figures created
202
+ for num in plt.get_fignums():
203
+ plt.figure(num)
204
+
205
+ # Display all together
206
+ plt.show()
207
+
208
+ #-------------------------------------------------------------------------------------------------------#
209
+ #DM CODE#
210
+ #-------------------------------------------------------------------------------------------------------#
211
+ # ============================================================
212
+ # PROBLEM SHEET 13 – AGGLOMERATIVE CLUSTERING (WARD)
213
+ # ============================================================
214
+
215
+ import pandas as pd
216
+ import numpy as np
217
+ import matplotlib.pyplot as plt
218
+ from scipy.cluster.hierarchy import dendrogram, linkage
219
+
220
+ print("\n===== AGGLOMERATIVE CLUSTERING =====")
221
+
222
+ # Load dataset
223
+ df = pd.read_csv("Mall_Customers.csv")
224
+
225
+ # Remove CustomerID
226
+ df = df.drop("CustomerID", axis=1)
227
+
228
+ # Missing values
229
+ print("Missing values:\n", df.isnull().sum())
230
+
231
+ # Convert Gender
232
+ df['Gender'] = df['Gender'].map({'Male': 1, 'Female': 0})
233
+
234
+ # Pie chart
235
+ gender_counts = df['Gender'].value_counts()
236
+ plt.figure()
237
+ plt.pie(gender_counts, labels=['Male', 'Female'], autopct='%1.1f%%')
238
+ plt.title("Male vs Female Ratio")
239
+ plt.show()
240
+
241
+ # Bar graphs
242
+ plt.figure()
243
+ plt.bar(range(len(df)), df['Age'])
244
+ plt.title("Age Distribution")
245
+ plt.show()
246
+
247
+ plt.figure()
248
+ plt.bar(range(len(df)), df['Annual Income (k$)'])
249
+ plt.title("Income Distribution")
250
+ plt.show()
251
+
252
+ # Ward linkage clustering
253
+ data = df[['Age', 'Annual Income (k$)', 'Spending Score (1-100)']].values
254
+ Z = linkage(data, method='ward')
255
+
256
+ plt.figure(figsize=(10,5))
257
+ dendrogram(Z)
258
+ plt.title("Dendrogram (Ward Linkage)")
259
+ plt.xlabel("Data Points")
260
+ plt.ylabel("Distance")
261
+ plt.show()
262
+
263
+
264
+ # ============================================================
265
+ # PROBLEM SHEET 14 – DBSCAN
266
+ # ============================================================
267
+
268
+ from sklearn.preprocessing import StandardScaler
269
+ from sklearn.datasets import make_moons
270
+ from sklearn.cluster import DBSCAN
271
+
272
+ print("\n===== DBSCAN CLUSTERING =====")
273
+
274
+ # Load dataset
275
+ df = pd.read_csv("Wholesale customers data.csv")
276
+ df = df.drop(['Channel', 'Region'], axis=1)
277
+
278
+ print("First few records:\n", df.head())
279
+
280
+ # Normalize
281
+ data = df[['Grocery', 'Milk']].values
282
+ scaler = StandardScaler()
283
+ data_scaled = scaler.fit_transform(data)
284
+
285
+ plt.scatter(data_scaled[:,0], data_scaled[:,1])
286
+ plt.title("Normalized Data")
287
+ plt.xlabel("Grocery")
288
+ plt.ylabel("Milk")
289
+ plt.show()
290
+
291
+ # Manual DBSCAN
292
+ EPS = 0.5
293
+ MINPTS = 15
294
+
295
+ UNVISITED = 0
296
+ NOISE = -1
297
+
298
+ labels = [UNVISITED] * len(data_scaled)
299
+ cluster_id = 0
300
+
301
+ def euclidean(p1, p2):
302
+ return np.sqrt(np.sum((p1 - p2)**2))
303
+
304
+ def region_query(idx):
305
+ neighbors = []
306
+ for i in range(len(data_scaled)):
307
+ if euclidean(data_scaled[idx], data_scaled[i]) <= EPS:
308
+ neighbors.append(i)
309
+ return neighbors
310
+
311
+ def expand_cluster(idx, neighbors, cluster_id):
312
+ labels[idx] = cluster_id
313
+
314
+ i = 0
315
+ while i < len(neighbors):
316
+ point = neighbors[i]
317
+
318
+ if labels[point] == UNVISITED:
319
+ labels[point] = cluster_id
320
+ new_neighbors = region_query(point)
321
+
322
+ if len(new_neighbors) >= MINPTS:
323
+ neighbors += new_neighbors
324
+
325
+ elif labels[point] == NOISE:
326
+ labels[point] = cluster_id
327
+
328
+ i += 1
329
+
330
+ # Main loop
331
+ for i in range(len(data_scaled)):
332
+ if labels[i] != UNVISITED:
333
+ continue
334
+
335
+ neighbors = region_query(i)
336
+
337
+ if len(neighbors) < MINPTS:
338
+ labels[i] = NOISE
339
+ else:
340
+ cluster_id += 1
341
+ expand_cluster(i, neighbors, cluster_id)
342
+
343
+ # Plot clusters
344
+ plt.scatter(data_scaled[:,0], data_scaled[:,1], c=labels, cmap='rainbow')
345
+ plt.title("Manual DBSCAN Clustering")
346
+ plt.show()
347
+
348
+ # Built-in DBSCAN (moon dataset)
349
+ X, _ = make_moons(n_samples=2000, noise=0.05)
350
+
351
+ model = DBSCAN(eps=0.2, min_samples=5)
352
+ labels_moon = model.fit_predict(X)
353
+
354
+ plt.scatter(X[:,0], X[:,1], c=labels_moon, cmap='rainbow')
355
+ plt.title("DBSCAN on Moon Dataset")
356
+ plt.show()
357
+
358
+ # Add noise
359
+ noise = np.random.uniform(low=-1.5, high=2.5, size=(200,2))
360
+ X_noisy = np.vstack([X, noise])
361
+
362
+ labels_noisy = model.fit_predict(X_noisy)
363
+
364
+ plt.scatter(X_noisy[:,0], X_noisy[:,1], c=labels_noisy, cmap='rainbow')
365
+ plt.title("DBSCAN with Noise")
366
+ plt.show()
367
+
368
+
369
+ # ============================================================
370
+ # PROBLEM SHEET 15 – MS-GSP (FULLY CORRECT)
371
+ # ============================================================
372
+
373
+ import re
374
+
375
+ print("\n===== MS-GSP SEQUENTIAL PATTERN MINING =====")
376
+
377
+ # Read parameters
378
+ MIS = {}
379
+ SDC = 0
380
+
381
+ def convert(x):
382
+ try:
383
+ return int(x)
384
+ except:
385
+ return x
386
+
387
+ with open("para.txt") as f:
388
+ for line in f:
389
+ if "MIS" in line:
390
+ item = re.findall(r'\((.*?)\)', line)[0]
391
+ item = convert(item)
392
+ MIS[item] = float(line.split("=")[1])
393
+ elif "SDC" in line:
394
+ SDC = float(line.split("=")[1])
395
+
396
+ # Read data
397
+ def parse_sequence(line):
398
+ sets = re.findall(r'\{(.*?)\}', line)
399
+ return [set(convert(x) for x in s.split(',')) for s in sets]
400
+
401
+ sequences = []
402
+ with open("data (1).txt") as f:
403
+ for line in f:
404
+ sequences.append(parse_sequence(line.strip()))
405
+
406
+ N = len(sequences)
407
+
408
+ # Support
409
+ def item_support(item):
410
+ return sum(1 for seq in sequences if any(item in s for s in seq)) / N
411
+
412
+ support = {i: item_support(i) for i in MIS}
413
+
414
+ # Subsequence check
415
+ def is_subsequence(pattern, sequence):
416
+ i = 0
417
+ for itemset in sequence:
418
+ if pattern[i].issubset(itemset):
419
+ i += 1
420
+ if i == len(pattern):
421
+ return True
422
+ return False
423
+
424
+ def sequence_support(pattern):
425
+ return sum(1 for seq in sequences if is_subsequence(pattern, seq))
426
+
427
+ # Init pass
428
+ L = sorted(MIS.keys(), key=lambda x: MIS[x])
429
+ F = []
430
+
431
+ F1 = []
432
+ for item in L:
433
+ if support[item] >= MIS[item]:
434
+ F1.append([{item}])
435
+
436
+ F.append(F1)
437
+ print("F1:", F1)
438
+
439
+ # Join
440
+ def join_step(Fk):
441
+ candidates = []
442
+ for s1 in Fk:
443
+ for s2 in Fk:
444
+ if s1[1:] == s2[:-1]:
445
+ candidates.append(s1 + [s2[-1]])
446
+
447
+ if len(s1) == len(s2) and s1[:-1] == s2[:-1]:
448
+ new_seq = s1[:-1] + [s1[-1] | s2[-1]]
449
+ if new_seq not in candidates:
450
+ candidates.append(new_seq)
451
+ return candidates
452
+
453
+ # Prune
454
+ def prune_step(candidates, Fk):
455
+ pruned = []
456
+ Fk_set = [tuple(tuple(sorted(s)) for s in seq) for seq in Fk]
457
+
458
+ for c in candidates:
459
+ valid = True
460
+ for i in range(len(c)):
461
+ sub = c[:i] + c[i+1:]
462
+ sub_tuple = tuple(tuple(sorted(s)) for s in sub)
463
+
464
+ if sub_tuple not in Fk_set:
465
+ valid = False
466
+ break
467
+
468
+ if valid:
469
+ pruned.append(c)
470
+
471
+ return pruned
472
+
473
+ # Main loop
474
+ k = 1
475
+ while True:
476
+ if len(F[k-1]) == 0:
477
+ break
478
+
479
+ Ck = prune_step(join_step(F[k-1]), F[k-1])
480
+ Fk = []
481
+
482
+ for c in Ck:
483
+ count = sequence_support(c)
484
+
485
+ items = set()
486
+ for s in c:
487
+ items.update(s)
488
+
489
+ min_mis = min(MIS[i] for i in items)
490
+
491
+ if (count / N) >= min_mis:
492
+ valid = True
493
+ item_list = list(items)
494
+
495
+ for i in range(len(item_list)):
496
+ for j in range(i+1, len(item_list)):
497
+ if abs(support[item_list[i]] - support[item_list[j]]) > SDC:
498
+ valid = False
499
+ break
500
+
501
+ if valid:
502
+ Fk.append(c)
503
+
504
+ if not Fk:
505
+ break
506
+
507
+ print(f"F{k+1}:", Fk)
508
+ F.append(Fk)
509
+ k += 1
510
+
511
+ # Output
512
+ def format_pattern(p):
513
+ return "<" + "".join(
514
+ ["{" + ",".join(map(str, sorted(s))) + "}" for s in p]
515
+ ) + ">"
516
+
517
+ print("\nFINAL PATTERNS:")
518
+ for level in F:
519
+ for p in level:
520
+ print(f"Pattern :{format_pattern(p)} count: {sequence_support(p)}")
@@ -0,0 +1 @@
1
+ """IR source snippets."""
@@ -0,0 +1,14 @@
1
+ "IR source snippets."
2
+
3
+ import pandas as pd
4
+
5
+ # Dimensionality reduction source snippets
6
+
7
+ df = pd.DataFrame(
8
+ {
9
+ "A": [1, 2, 3],
10
+ "B": [4, 5, 6],
11
+ "C": [7, 8, 9],
12
+ }
13
+ )
14
+ print(df)
@@ -0,0 +1,109 @@
1
+ import matplotlib.pyplot as plt
2
+ import networkx as nx
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+ # ===== INPUT =====
7
+ # rows = source page, cols = destination page
8
+ pages = ["A", "B", "C", "D"]
9
+ A = np.array(
10
+ [
11
+ [0, 1, 1, 0], # A -> B,C
12
+ [0, 0, 1, 0], # B -> C
13
+ [1, 0, 0, 0], # C -> A
14
+ [0, 0, 1, 0], # D -> C
15
+ ],
16
+ dtype=int,
17
+ )
18
+
19
+ d = 0.85
20
+ max_iter = 10
21
+ tol = 1e-8
22
+
23
+ # ===== PREP =====
24
+ n = len(pages)
25
+ base = (1 - d) / n
26
+ adj = pd.DataFrame(A, index=pages, columns=pages)
27
+
28
+ incoming = {p: [src for src in pages if adj.loc[src, p] == 1] for p in pages}
29
+ outgoing = {p: int(adj.loc[p].sum()) for p in pages}
30
+ pr = {p: 1 / n for p in pages}
31
+
32
+ print("Step 1: Web Graph")
33
+ print(adj)
34
+ print("\nTotal pages N =", n)
35
+ print("\nInitial PageRank:")
36
+ print(pd.DataFrame({"Page": pages, "Initial PR": [pr[p] for p in pages]}))
37
+
38
+ print("\nStep 2: Incoming Links")
39
+ print(
40
+ pd.DataFrame(
41
+ {
42
+ "Page": pages,
43
+ "Incoming Links": [
44
+ ", ".join(incoming[p]) if incoming[p] else "—" for p in pages
45
+ ],
46
+ }
47
+ )
48
+ )
49
+
50
+ print("\nOutgoing Links Count")
51
+ print(pd.DataFrame({"Page": pages, "Outgoing Links": [outgoing[p] for p in pages]}))
52
+
53
+ # ===== ITERATIONS =====
54
+ history = [pr.copy()]
55
+
56
+ for it in range(1, max_iter + 1):
57
+ new_pr = {}
58
+ print(f"\n\n===== Iteration {it} =====")
59
+ for p in pages:
60
+ s = 0
61
+ terms, nums = [], []
62
+ for src in incoming[p]:
63
+ c = pr[src] / outgoing[src] if outgoing[src] > 0 else 0
64
+ s += c
65
+ terms.append(f"PR({src})/{outgoing[src]}")
66
+ nums.append(f"{pr[src]:.4f}/{outgoing[src]}={c:.4f}")
67
+ new_pr[p] = base + d * s
68
+
69
+ print(f"\nPage {p}")
70
+ print("Incoming links:", ", ".join(incoming[p]) if incoming[p] else "—")
71
+ if terms:
72
+ print(f"PR({p}) = {base:.4f} + {d} * (" + " + ".join(terms) + ")")
73
+ print("Substitution:", " + ".join(nums))
74
+ else:
75
+ print(f"PR({p}) = {base:.4f}")
76
+ print(f"PR({p}) = {new_pr[p]:.6f}")
77
+
78
+ history.append(new_pr.copy())
79
+ diff = sum(abs(new_pr[p] - pr[p]) for p in pages)
80
+ print("\nPageRank after this iteration:")
81
+ print(
82
+ pd.DataFrame(
83
+ {"Page": pages, f"PR Iteration {it}": [round(new_pr[p], 6) for p in pages]}
84
+ )
85
+ )
86
+ pr = new_pr
87
+ if diff < tol:
88
+ break
89
+
90
+ # ===== FINAL RESULT =====
91
+ print("\n\nFinal PageRank:")
92
+ final_df = pd.DataFrame(
93
+ {"Page": pages, "Final PR": [pr[p] for p in pages]}
94
+ ).sort_values("Final PR", ascending=False)
95
+ print(final_df)
96
+
97
+ # ===== OPTIONAL VISUALIZATION =====
98
+ G = nx.DiGraph()
99
+ for i, src in enumerate(pages):
100
+ for j, dst in enumerate(pages):
101
+ if A[i, j] == 1:
102
+ G.add_edge(src, dst)
103
+
104
+ plt.figure(figsize=(6, 4))
105
+ pos = nx.spring_layout(G, seed=42)
106
+ sizes = [pr[p] * 5000 + 800 for p in G.nodes()]
107
+ nx.draw(G, pos, with_labels=True, node_size=sizes, arrows=True)
108
+ plt.title("Graph Visualization (node size ~ Final PageRank)")
109
+ plt.show()