pyinspect-cli 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,27 @@
1
+ Metadata-Version: 2.4
2
+ Name: pyinspect-cli
3
+ Version: 0.1.0
4
+ Summary: A CLI tool to inspect practical source codes
5
+ Author-email: Developer <developer@example.com>
6
+ Classifier: Programming Language :: Python :: 3
7
+ Classifier: License :: OSI Approved :: MIT License
8
+ Classifier: Operating System :: OS Independent
9
+ Requires-Python: >=3.7
10
+ Description-Content-Type: text/markdown
11
+
12
+ # pyinspect-cli
13
+
14
+ A CLI tool to inspect practical source codes.
15
+
16
+ ## Installation
17
+
18
+ ```bash
19
+ pip install -e .
20
+ ```
21
+
22
+ ## Usage
23
+
24
+ ```bash
25
+ pyinspect-cli list
26
+ pyinspect-cli 1
27
+ ```
@@ -0,0 +1,16 @@
1
+ # pyinspect-cli
2
+
3
+ A CLI tool to inspect practical source codes.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install -e .
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ ```bash
14
+ pyinspect-cli list
15
+ pyinspect-cli 1
16
+ ```
File without changes
@@ -0,0 +1,43 @@
1
+ import sys
2
+ import os
3
+
4
+ def main():
5
+ args = sys.argv[1:]
6
+
7
+ if not args:
8
+ print("Usage: pyinspect-cli <number> | pyinspect-cli list")
9
+ sys.exit(1)
10
+
11
+ cmd = args[0]
12
+
13
+ # Get the directory of this file
14
+ package_dir = os.path.dirname(os.path.abspath(__file__))
15
+ modules_dir = os.path.join(package_dir, 'modules')
16
+
17
+ if cmd == 'list':
18
+ for i in range(1, 9):
19
+ print(f"{i} - Module {i}")
20
+ return
21
+
22
+ if cmd.isdigit():
23
+ module_num = int(cmd)
24
+ if 1 <= module_num <= 8:
25
+ file_name = f"m{module_num:02d}.py"
26
+ file_path = os.path.join(modules_dir, file_name)
27
+
28
+ if os.path.exists(file_path):
29
+ with open(file_path, 'r') as f:
30
+ print(f.read())
31
+ else:
32
+ print(f"Error: Module file {file_name} not found.")
33
+ sys.exit(1)
34
+ else:
35
+ print("Error: Please provide a module number between 1 and 8.")
36
+ sys.exit(1)
37
+ else:
38
+ print(f"Error: Invalid command or module number '{cmd}'.")
39
+ print("Usage: pyinspect-cli <number> | pyinspect-cli list")
40
+ sys.exit(1)
41
+
42
+ if __name__ == "__main__":
43
+ main()
File without changes
@@ -0,0 +1,41 @@
1
+ import nltk
2
+ from nltk.corpus import stopwords
3
+
4
+ document1 = "The quick brown fox jumped over a lazy dog"
5
+ document2 = "The lazy dog slept in the sun"
6
+
7
+ nltk.download('stopwords')
8
+ stopWords = stopwords.words('english')
9
+
10
+ tokens1 = document1.lower().split()
11
+ tokens2 = document2.lower().split()
12
+ terms = list(set(tokens1 + tokens2))
13
+
14
+ inverted_index = {}
15
+ occ_num_doc1 = {}
16
+ occ_num_doc2 = {}
17
+
18
+ for term in terms:
19
+ if term in stopWords:
20
+ continue
21
+
22
+ document = []
23
+
24
+ if term in tokens1:
25
+ document.append("Document 1")
26
+ occ_num_doc1[term] = tokens1.count(term)
27
+
28
+ if term in tokens2:
29
+ document.append("Document 2")
30
+ occ_num_doc2[term] = tokens2.count(term)
31
+
32
+ inverted_index[term] = document
33
+
34
+ for term, documents in inverted_index.items():
35
+ print(term, "->", end="")
36
+ for doc in documents:
37
+ if doc == "Document 1":
38
+ print(f"{doc}({occ_num_doc1.get(term, 0)})", end=", ")
39
+ else:
40
+ print(f"{doc}({occ_num_doc2.get(term, 0)})", end=", ")
41
+ print()
@@ -0,0 +1,111 @@
1
+ #Part A
2
+ documents = {
3
+ 1: "apple banana orange",
4
+ 2: "apple banana",
5
+ 3: "banana orange",
6
+ 4: "apple"
7
+ }
8
+
9
+ def build_index(docs):
10
+ index = {}
11
+ for doc_id, text in docs.items():
12
+ terms = set(text.split())
13
+ for term in terms:
14
+ if term not in index:
15
+ index[term] = {doc_id}
16
+ else:
17
+ index[term].add(doc_id)
18
+ return index
19
+
20
+ inverted_index = build_index(documents)
21
+
22
+ def boolean_and(operands, index):
23
+ if not operands:
24
+ return list(range(1, len(documents) + 1))
25
+
26
+ result = index.get(operands[0], set())
27
+ for term in operands[1:]:
28
+ result = result.intersection(index.get(term, set()))
29
+
30
+ return list(result)
31
+
32
+ def boolean_or(operands, index):
33
+ result = set()
34
+ for term in operands:
35
+ result = result.union(index.get(term, set()))
36
+
37
+ return list(result)
38
+
39
+ def boolean_not(operand, index, total_docs):
40
+ operand_set = set(index.get(operand, set()))
41
+ all_docs_set = set(range(1, total_docs + 1))
42
+ return list(all_docs_set.difference(operand_set))
43
+
44
+ query1 = ["apple", "banana"]
45
+ query2 = ["apple", "orange"]
46
+
47
+ result1 = boolean_and(query1, inverted_index)
48
+ result2 = boolean_or(query2, inverted_index)
49
+ result3 = boolean_not("orange", inverted_index, len(documents))
50
+
51
+ print("Documents containing 'apple' and 'banana':", result1)
52
+ print("Documents containing 'apple' and 'orange':", result2)
53
+ print("Documents not containing 'orange':", result3)
54
+
55
+
56
+
57
+
58
+
59
+
60
+
61
+
62
+ # Part B:
63
+ from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
64
+ import nltk
65
+ from nltk.corpus import stopwords
66
+ import numpy as np
67
+ from numpy.linalg import norm
68
+
69
+ # Training and testing datasets
70
+ train_set = ["The sky is blue.", "The sun is bright."]
71
+ test_set = ["The sun in the sky is bright."]
72
+
73
+ # Download stopwords and set them
74
+ nltk.download('stopwords')
75
+ stopWords = stopwords.words('english')
76
+
77
+ # Create CountVectorizer and TfidfTransformer
78
+ vectorizer = CountVectorizer(stop_words=stopWords)
79
+ transformer = TfidfTransformer()
80
+
81
+ # Fit and transform the training set
82
+ trainVectorizerArray = vectorizer.fit_transform(train_set).toarray()
83
+ testVectorizerArray = vectorizer.transform(test_set).toarray()
84
+
85
+ # Print vectorized results
86
+ print("Fit Vectorizer to train set:", trainVectorizerArray)
87
+ print("Transformer Vectorizer to test set:", testVectorizerArray)
88
+
89
+ # Cosine similarity function
90
+ cx = lambda a, b: round(np.inner(a, b) / (norm(a) * norm(b)), 3)
91
+
92
+ # Compute cosine similarity
93
+ for vector in trainVectorizerArray:
94
+ print(vector)
95
+
96
+ for testV in testVectorizerArray:
97
+ print(testV)
98
+ cosine = cx(vector, testV)
99
+ print(cosine)
100
+
101
+ # Fit the TF-IDF transformer
102
+ transformer.fit(testVectorizerArray)
103
+ print()
104
+ print(transformer.transform(trainVectorizerArray).toarray())
105
+
106
+ transformer.fit(testVectorizerArray)
107
+ print()
108
+
109
+ # Transform test set using TF-IDF
110
+ tfidf = transformer.transform(testVectorizerArray)
111
+ print(tfidf.todense())
@@ -0,0 +1,19 @@
1
+ def editDistance(str1, str2, m, n):
2
+ if m == 0:
3
+ return n
4
+ if n == 0:
5
+ return m
6
+ if str1[m - 1] == str2[n - 1]:
7
+ return editDistance(str1, str2, m - 1, n - 1)
8
+
9
+ return 1 + min(
10
+ editDistance(str1, str2, m, n - 1), # Insert
11
+ editDistance(str1, str2, m - 1, n), # Remove
12
+ editDistance(str1, str2, m - 1, n - 1) # Replace
13
+ )
14
+
15
+ # Test case
16
+ str1 = "sunday"
17
+ str2 = "saturday"
18
+
19
+ print("Edit Distance is:", editDistance(str1, str2, len(str1), len(str2)))
@@ -0,0 +1,24 @@
1
+ def calculate_metrics(retrieved_set, relevant_set):
2
+ true_positive = len(retrieved_set.intersection(relevant_set))
3
+ false_positive = len(retrieved_set.difference(relevant_set))
4
+ false_negative = len(relevant_set.difference(retrieved_set))
5
+
6
+ print("True Positive:", true_positive,
7
+ "\nFalse Positive:", false_positive,
8
+ "\nFalse Negative:", false_negative, "\n")
9
+
10
+ precision = true_positive / (true_positive + false_positive) if (true_positive + false_positive) > 0 else 0
11
+ recall = true_positive / (true_positive + false_negative) if (true_positive + false_negative) > 0 else 0
12
+ f_measure = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
13
+
14
+ return precision, recall, f_measure
15
+
16
+ # Example data
17
+ retrieved_set = set(["doc1", "doc2", "doc3"])
18
+ relevant_set = set(["doc1", "doc4"])
19
+
20
+ precision, recall, f_measure = calculate_metrics(retrieved_set, relevant_set)
21
+
22
+ print(f"Precision: {precision}")
23
+ print(f"Recall: {recall}")
24
+ print(f"F-measure: {f_measure}")
@@ -0,0 +1,82 @@
1
+ import numpy as np
2
+ import re
3
+
4
+ class NaiveBayesClassifier:
5
+ def __init__(self):
6
+ self.vocab = []
7
+ self.positive_counts = None
8
+ self.negative_counts = None
9
+ self.positive_prior = 0
10
+ self.negative_prior = 0
11
+
12
+ def preprocess_review(self, review):
13
+ tokens = re.findall(r'\b\w+\b', review.lower())
14
+ return tokens
15
+
16
+ def vectorize_review(self, review):
17
+ vector = np.zeros(len(self.vocab))
18
+ for word in review:
19
+ if word in self.vocab:
20
+ vector[self.vocab.index(word)] += 1
21
+ return vector
22
+
23
+ def train(self, X_train, Y_train):
24
+ all_tokens = []
25
+ for review in X_train:
26
+ tokens = self.preprocess_review(review)
27
+ all_tokens.extend(tokens)
28
+ self.vocab = list(set(all_tokens)) # Unique words
29
+
30
+ self.positive_counts = np.zeros(len(self.vocab))
31
+ self.negative_counts = np.zeros(len(self.vocab))
32
+
33
+ X_train_vectorized = [self.vectorize_review(self.preprocess_review(review)) for review in X_train]
34
+
35
+ positive_count = 0
36
+ for i in range(len(X_train_vectorized)):
37
+ if Y_train[i] == 'positive':
38
+ self.positive_counts += X_train_vectorized[i]
39
+ positive_count += 1
40
+ else:
41
+ self.negative_counts += X_train_vectorized[i]
42
+
43
+ self.positive_prior = positive_count / len(Y_train)
44
+ self.negative_prior = 1 - self.positive_prior
45
+
46
+ def predict(self, X_test):
47
+ predictions = []
48
+ for review in X_test:
49
+ review_vectorized = self.vectorize_review(self.preprocess_review(review))
50
+
51
+ positive_prob = (self.positive_counts + 1) / (sum(self.positive_counts) + len(self.vocab))
52
+ negative_prob = (self.negative_counts + 1) / (sum(self.negative_counts) + len(self.vocab))
53
+
54
+ positive_likelihood = np.prod(positive_prob ** review_vectorized) * self.positive_prior
55
+ negative_likelihood = np.prod(negative_prob ** review_vectorized) * self.negative_prior
56
+
57
+ if positive_likelihood > negative_likelihood:
58
+ predictions.append('positive')
59
+ else:
60
+ predictions.append('negative')
61
+
62
+ return predictions
63
+
64
+ # Training Data
65
+ positive_review = ["The movie was amazing, I like great acting and an engaging plot"]
66
+ negative_review = ["I hate this movie so much, it's terrible!"]
67
+ X_train = positive_review + negative_review
68
+ Y_train = ['positive'] * len(positive_review) + ['negative'] * len(negative_review)
69
+ X_test = ["The acting was superb!"]
70
+ Y_test = ['positive']
71
+
72
+ # Train the classifier
73
+ classifier = NaiveBayesClassifier()
74
+ classifier.train(X_train, Y_train)
75
+
76
+ # Predictions
77
+ predictions = classifier.predict(X_test)
78
+ print("Predicted class for the new review:", predictions[0])
79
+
80
+ # Accuracy Calculation
81
+ accuracy = sum(1 for true_label, predicted_label in zip(Y_test, predictions) if true_label == predicted_label) / len(Y_test)
82
+ print("Accuracy of the model:", accuracy)
@@ -0,0 +1,40 @@
1
+ import numpy as np
2
+ from sklearn.feature_extraction.text import TfidfVectorizer
3
+ from sklearn.cluster import KMeans
4
+ from sklearn.metrics import silhouette_score
5
+
6
+ # Sample documents
7
+ documents = [
8
+ "Machine learning is the study of computer algorithms that improve automatically through experience.",
9
+ "Deep learning is a subset of machine learning.",
10
+ "Natural language processing is a field of artificial intelligence.",
11
+ "Computer vision is a field of study that enables computers to interpret and understand the visual world.",
12
+ "Reinforcement learning is a type of machine learning algorithm that teaches an agent how to make decisions in an environment by rewarding desired behaviors.",
13
+ "Information retrieval is the process of obtaining information from a collection of documents.",
14
+ "Text mining is the process of deriving high-quality information from text.",
15
+ "Data clustering is the task of dividing a set of objects into groups.",
16
+ "Hierarchical clustering builds a tree of clusters.",
17
+ "K-means clustering is a method of vector quantization."
18
+ ]
19
+
20
+ # Convert documents into TF-IDF vectors
21
+ vectorizer = TfidfVectorizer()
22
+ X = vectorizer.fit_transform(documents)
23
+
24
+ # Perform K-means clustering
25
+ k = 3 # Number of clusters
26
+ kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
27
+ kmeans.fit(X)
28
+
29
+ # Evaluate clustering results
30
+ silhouette_avg = silhouette_score(X, kmeans.labels_)
31
+ print("Silhouette Score:", silhouette_avg)
32
+
33
+ # Print clusters
34
+ for i in range(k):
35
+ cluster_docs_indices = np.where(kmeans.labels_ == i)[0]
36
+ cluster_docs = [documents[idx] for idx in cluster_docs_indices]
37
+
38
+ print(f"\nCluster {i+1}:")
39
+ for doc in cluster_docs:
40
+ print("-", doc)
@@ -0,0 +1,117 @@
1
+ # Part A:
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+
5
+ class WebCrawler:
6
+ def __init__(self):
7
+ self.visited_urls = set()
8
+
9
+ def crawl(self, url, depth=3):
10
+ if depth == 0 or url in self.visited_urls:
11
+ return
12
+
13
+ try:
14
+ response = requests.get(url, timeout=5)
15
+ if response.status_code == 200:
16
+ soup = BeautifulSoup(response.text, 'html.parser')
17
+ self.index_page(url, soup)
18
+ self.visited_urls.add(url)
19
+
20
+ for link in soup.find_all('a', href=True):
21
+ new_url = link.get('href')
22
+ if new_url.startswith('http'): # Ensure it's a valid URL
23
+ print(f"Crawling: {new_url}")
24
+ self.crawl(new_url, depth - 1)
25
+
26
+ except Exception as e:
27
+ print(f"Error crawling {url}: {e}")
28
+
29
+ def index_page(self, url, soup):
30
+ """Extracts and indexes the page title and first paragraph."""
31
+ title = soup.title.string if soup.title else "No title"
32
+ paragraph = soup.find('p').get_text() if soup.find('p') else "No paragraph found"
33
+
34
+ print(f"\nIndexing: {url}")
35
+ print(f"Title: {title}")
36
+ print(f"First Paragraph: {paragraph}\n")
37
+ if __name__ == "__main__":
38
+ start_url = "https://example.com"
39
+ crawler = WebCrawler()
40
+ crawler.crawl(start_url, depth=2)
41
+
42
+
43
+
44
+
45
+
46
+ # Part B:
47
+ import requests
48
+ from bs4 import BeautifulSoup
49
+ import time
50
+ from urllib.parse import urlparse
51
+
52
+ class WebCrawler:
53
+ def __init__(self):
54
+ self.visited_urls = set()
55
+
56
+ def crawl(self, url, depth=3, delay=1):
57
+ if depth == 0 or url in self.visited_urls:
58
+ return
59
+
60
+ try:
61
+ # Check robots.txt
62
+ if not self.is_allowed_by_robots(url):
63
+ print(f"Skipping {url} due to robots.txt rules")
64
+ return
65
+
66
+ response = requests.get(url, timeout=5)
67
+ if response.status_code == 200:
68
+ soup = BeautifulSoup(response.text, 'html.parser')
69
+ self.index_page(url, soup)
70
+ self.visited_urls.add(url)
71
+
72
+ for link in soup.find_all('a', href=True):
73
+ new_url = link.get('href')
74
+ if new_url.startswith('http'):
75
+ time.sleep(delay) # Delay between requests
76
+ self.crawl(new_url, depth - 1, delay)
77
+ else:
78
+ print(f"Failed to fetch {url}: Status code {response.status_code}")
79
+
80
+ except Exception as e:
81
+ print(f"Error crawling {url}: {e}")
82
+
83
+ def is_allowed_by_robots(self, url):
84
+ parsed_url = urlparse(url)
85
+ robots_url = f"{parsed_url.scheme}://{parsed_url.netloc}/robots.txt"
86
+
87
+ try:
88
+ response = requests.get(robots_url, timeout=5)
89
+ if response.status_code == 200:
90
+ robots_txt = response.text
91
+ if "User-agent: *" in robots_txt:
92
+ start_index = robots_txt.find("User-agent: *")
93
+ end_index = robots_txt.find("User-agent:", start_index + 1)
94
+
95
+ if end_index == -1:
96
+ end_index = len(robots_txt)
97
+
98
+ relevant_section = robots_txt[start_index:end_index]
99
+
100
+ if "Disallow: /" in relevant_section:
101
+ return False
102
+ return True
103
+ except requests.RequestException:
104
+ return True # If robots.txt is unreachable, assume allowed
105
+
106
+ def index_page(self, url, soup):
107
+ title = soup.title.string if soup.title else "No title"
108
+ paragraph = soup.find('p').get_text() if soup.find('p') else "No paragraph found"
109
+ print(f"\nIndexing: {url}")
110
+ print(f"Title: {title}")
111
+ print(f"First Paragraph: {paragraph}")
112
+ print("-------------------------------------------------")
113
+
114
+ # Example usage
115
+ if __name__ == "__main__":
116
+ crawler = WebCrawler()
117
+ crawler.crawl("https://www.mercedes-benz.com")
@@ -0,0 +1,44 @@
1
+ # link analysis
2
+
3
+ def pagerank(graph, damping_factor=0.85, epsilon=1.0e-8, max_iterations=100):
4
+ # Initialize PageRank scores
5
+ num_nodes = len(graph)
6
+ pagerank_scores = {node: 1.0 / num_nodes for node in graph}
7
+
8
+ for _ in range(max_iterations):
9
+ new_pagerank_scores = {}
10
+ max_diff = 0
11
+
12
+ for node in graph:
13
+ new_pagerank = (1 - damping_factor) / num_nodes
14
+ for referring_page, links in graph.items():
15
+ if node in links:
16
+ num_outlinks = len(links)
17
+ new_pagerank += damping_factor * (pagerank_scores[referring_page] / num_outlinks)
18
+
19
+ new_pagerank_scores[node] = new_pagerank
20
+ max_diff = max(max_diff, abs(new_pagerank - pagerank_scores[node]))
21
+
22
+ pagerank_scores = new_pagerank_scores
23
+
24
+ if max_diff < epsilon:
25
+ break
26
+
27
+ return pagerank_scores
28
+
29
+ # Create a small web graph
30
+ web_graph = {
31
+ 'A': ['B', 'C'],
32
+ 'B': ['C'],
33
+ 'C': ['A']
34
+ }
35
+
36
+ # Compute PageRank scores
37
+ pagerank_scores = pagerank(web_graph)
38
+
39
+ # Analyze the results
40
+ sorted_scores = sorted(pagerank_scores.items(), key=lambda x: x[1], reverse=True)
41
+
42
+ print("PageRank scores:")
43
+ for node, score in sorted_scores:
44
+ print(f"{node}: {score:.6f}")
@@ -0,0 +1,27 @@
1
+ Metadata-Version: 2.4
2
+ Name: pyinspect-cli
3
+ Version: 0.1.0
4
+ Summary: A CLI tool to inspect practical source codes
5
+ Author-email: Developer <developer@example.com>
6
+ Classifier: Programming Language :: Python :: 3
7
+ Classifier: License :: OSI Approved :: MIT License
8
+ Classifier: Operating System :: OS Independent
9
+ Requires-Python: >=3.7
10
+ Description-Content-Type: text/markdown
11
+
12
+ # pyinspect-cli
13
+
14
+ A CLI tool to inspect practical source codes.
15
+
16
+ ## Installation
17
+
18
+ ```bash
19
+ pip install -e .
20
+ ```
21
+
22
+ ## Usage
23
+
24
+ ```bash
25
+ pyinspect-cli list
26
+ pyinspect-cli 1
27
+ ```
@@ -0,0 +1,18 @@
1
+ README.md
2
+ pyproject.toml
3
+ pyinspect_cli/__init__.py
4
+ pyinspect_cli/cli.py
5
+ pyinspect_cli.egg-info/PKG-INFO
6
+ pyinspect_cli.egg-info/SOURCES.txt
7
+ pyinspect_cli.egg-info/dependency_links.txt
8
+ pyinspect_cli.egg-info/entry_points.txt
9
+ pyinspect_cli.egg-info/top_level.txt
10
+ pyinspect_cli/modules/__init__.py
11
+ pyinspect_cli/modules/m01.py
12
+ pyinspect_cli/modules/m02.py
13
+ pyinspect_cli/modules/m03.py
14
+ pyinspect_cli/modules/m04.py
15
+ pyinspect_cli/modules/m05.py
16
+ pyinspect_cli/modules/m06.py
17
+ pyinspect_cli/modules/m07.py
18
+ pyinspect_cli/modules/m08.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ pyinspect-cli = pyinspect_cli.cli:main
@@ -0,0 +1 @@
1
+ pyinspect_cli
@@ -0,0 +1,24 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "pyinspect-cli"
7
+ version = "0.1.0"
8
+ description = "A CLI tool to inspect practical source codes"
9
+ authors = [
10
+ { name = "Developer", email = "developer@example.com" }
11
+ ]
12
+ readme = "README.md"
13
+ requires-python = ">=3.7"
14
+ classifiers = [
15
+ "Programming Language :: Python :: 3",
16
+ "License :: OSI Approved :: MIT License",
17
+ "Operating System :: OS Independent",
18
+ ]
19
+
20
+ [project.scripts]
21
+ pyinspect-cli = "pyinspect_cli.cli:main"
22
+
23
+ [tool.setuptools]
24
+ packages = ["pyinspect_cli", "pyinspect_cli.modules"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+