PyPI - piirgg - Versions diffs - 0.1__tar.gz - Mend

piirgg 0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

piirgg-0.1/MANIFEST.in +1 -0
piirgg-0.1/PKG-INFO +3 -0
piirgg-0.1/piirgg/__init__.py +0 -0
piirgg-0.1/piirgg/core.py +14 -0
piirgg-0.1/piirgg/data/.a.py +13 -0
piirgg-0.1/piirgg/data/.p1.py +83 -0
piirgg-0.1/piirgg/data/.p10.py +118 -0
piirgg-0.1/piirgg/data/.p2.py +101 -0
piirgg-0.1/piirgg/data/.p3.py +46 -0
piirgg-0.1/piirgg/data/.p4.py +97 -0
piirgg-0.1/piirgg/data/.p5.py +58 -0
piirgg-0.1/piirgg/data/.p6.py +73 -0
piirgg-0.1/piirgg/data/.p7.py +53 -0
piirgg-0.1/piirgg/data/.p8.py +79 -0
piirgg-0.1/piirgg/data/.p9.py +208 -0
piirgg-0.1/piirgg/data/__init__.py +0 -0
piirgg-0.1/piirgg.egg-info/PKG-INFO +3 -0
piirgg-0.1/piirgg.egg-info/SOURCES.txt +20 -0
piirgg-0.1/piirgg.egg-info/dependency_links.txt +1 -0
piirgg-0.1/piirgg.egg-info/top_level.txt +1 -0
piirgg-0.1/setup.cfg +4 -0
piirgg-0.1/setup.py +26 -0

piirgg-0.1/MANIFEST.in ADDED Viewed

	@@ -0,0 +1 @@
1	+ recursive-include piirgg/data *

piirgg-0.1/PKG-INFO ADDED Viewed

@@ -0,0 +1,3 @@
+Metadata-Version: 2.4
+Name: piirgg
+Version: 0.1

piirgg-0.1/piirgg/__init__.py ADDED Viewed

File without changes

piirgg-0.1/piirgg/core.py ADDED Viewed

@@ -0,0 +1,14 @@
+import os
+import pkgutil
+def hide_all_files():
+    base_path = os.path.dirname(__file__)
+    data_path = os.path.join(base_path, "data")
+    for file in os.listdir(data_path):
+        full_path = os.path.join(data_path, file)
+        os.system(f'attrib +h "{full_path}"')
+def get_file(name):
+    data = pkgutil.get_data(__name__, f"data/{name}")
+    return data.decode("utf-8")

piirgg-0.1/piirgg/data/.a.py ADDED Viewed

@@ -0,0 +1,13 @@
+import google.generativeai as g
+API_KEY = "AIzaSyDUpBhbWH0JhrN1ViodLY3"
+g.configure(api_key = API_KEY)
+model = g.GenerativeModel("gemini-2.5-flash")
+prompt = r"""
+"""
+response = model.generate_content(prompt)
+print(response.text)

piirgg-0.1/piirgg/data/.p1.py ADDED Viewed

@@ -0,0 +1,83 @@
+plays={
+"Anthony and Cleopatra":"Anthony is there, Brutus is Caeser is with Cleopatra mercy worser.",
+"Julius Ceaser":"Anthony is there, Brutus is Caeser is but Calpurnia is.",
+"The Tempest":"mercy worser",
+"Ham let":"Caeser and Brutus are present with mercy and worser",
+"Othello":"Caeser is present with mercy and worser",
+"Macbeth":"Anthony is there, Caeser, mercy."
+}
+words=["Anthony","Brutus","Caeser","Calpurnia","Cleopatra","mercy","worser"]
+list1 = [[0 for _ in range(len(words))]for _ in range(len(plays))]
+print(list1)
+def prepare_matrix(list1, plays, words):
+    for i in range(len(words)):
+        for key in plays.keys():
+            if words[i] in plays[key]:
+                key_list = list(plays.keys())
+                list1[key_list.index(key)][i] = 1
+prepare_matrix(list1, plays, words)
+for row in list1:
+    print(row)
+def findAnd(list1, variable1, variable2):
+    idx_variable1 = words.index(variable1)
+    idx_variable2 = words.index(variable2)
+    for i in range(len(plays)):
+        if list1[i][idx_variable1] and list1[i][idx_variable2]:
+            return list1[i]
+def findOr(list1, variable1, variable2):
+    idx_variable1 = words.index(variable1)
+    idx_variable2 = words.index(variable2)
+    for i in range(len(plays)):
+        if list1[i][idx_variable1] or list1[i][idx_variable2]:
+            return list1[i]
+key_list = list(plays.keys())
+print("Anthony and Calpurnia is together in play: ", key_list[list1.index(findAnd(list1, "Anthony", "Calpurnia"))], findAnd(list1, "Anthony", "Calpurnia"))
+print("Anthony and Calpurnia is in or condition: ", key_list[list1.index(findOr(list1, "Anthony", "Calpurnia"))], findOr(list1, "Anthony", "Calpurnia"))
+# Second Code :
+plays={
+    "Antony and Cleopatra, Act III, Scene ii":"When Antony found Julius Caesar dead,He cried almost to roaring; and he wept When at Philippi he found Brutus slain.",
+    "Julius Ceaser":"I did enact Julius Caesar: I was killed i' the Capitol; Brutus killed me."
+    }
+words=["Antony","Brutus","Caesar","Calpurnia","Cleopatra","mercy","worser","Philippi"]
+list2 = [[0 for _ in range(len(words))]for _ in range(len(plays))]
+print(list2)
+def prepare_matrix2(list1, plays, words):
+    for i in range(len(words)):
+        for key in plays.keys():
+            if words[i] in plays[key]:
+                key_list = list(plays.keys())
+                list2[key_list.index(key)][i] = 1
+prepare_matrix2(list2, plays, words)
+for i in list2:
+    print(i)
+def findAnd2(list2, a, b, c):
+    idx_1=words.index(a)
+    idx_2=words.index(b)
+    idx_3=words.index(c)
+    for i in range(len(plays)):
+        if list2[i][idx_1] and list2[i][idx_2] and not list2[i][idx_3]:
+            return list2[i]
+key_list = list(plays.keys())
+# def findor(l, a, b, c):
+#     idx_1=words.index(a)
+#     idx_2=words.index(b)
+#     for i in range(len(plays)):
+#         if l[i][idx_1] or l[i][idx_2]:
+#             return l[i]
+print("Brutus AND Caesar AND NOT Calpurnia:",key_list[list2.index(findAnd2(list2,"Brutus","Caesar","Calpurnia"))],findAnd2(list2,"Brutus","Caesar","Calpurnia"))
+# print("Brutus OR Caesar OR NOT Calpurnia:",key_list[list2.index(findand(listt,"Brutus","Caesar","Calpurnia"))],findand(l istt,"Brutus","Caesar","Calpurnia"))

piirgg-0.1/piirgg/data/.p10.py ADDED Viewed

@@ -0,0 +1,118 @@
+import csv
+import requests
+import xml.etree.ElementTree as ET
+import networkx as nx
+import matplotlib.pyplot as plt
+# 1. Load RSS feed
+def load_rss(url, file_name):
+    response = requests.get(url)
+    if response.status_code == 200:
+        with open(file_name, 'wb') as file:
+            file.write(response.content)
+        print(f"RSS feed saved as '{file_name}'")
+    else:
+        print("Failed to fetch RSS feed")
+# 2. Parse XML
+def parse_xml(xml_file):
+    tree = ET.parse(xml_file)
+    root = tree.getroot()
+    news_items = []
+    allowed_fields = {'guid', 'title', 'pubDate', 'description', 'link'}
+    for item in root.findall('.//item'):
+        news = {}
+        for child in item:
+            tag = child.tag.split('}')[-1]  # remove namespace
+            if tag in allowed_fields:
+                news[tag] = child.text
+            # Optional media
+            if tag == 'content' and 'url' in child.attrib:
+                news['media'] = child.attrib['url']
+        news_items.append(news)
+    return news_items
+# 3. Save to CSV (Excel)
+def save_to_csv(news_items, file_name):
+    fields = ['guid', 'title', 'pubDate', 'description', 'link', 'media']
+    with open(file_name, 'w', newline='', encoding='utf-8') as csvfile:
+        writer = csv.DictWriter(csvfile, fieldnames=fields)
+        writer.writeheader()
+        writer.writerows(news_items)
+    print(f"Data saved to '{file_name}' (can open in Excel)")
+# 4. Generate Web Graph
+def generate_web_graph(news_items):
+    graph = nx.DiGraph()
+    source_node = "RSS Feed"
+    graph.add_node(source_node)
+    for item in news_items:
+        title = item.get('title', 'Unknown Article')
+        link = item.get('link', '')
+        graph.add_node(title)
+        graph.add_edge(source_node, title)
+        if link:
+            graph.add_node(link)
+            graph.add_edge(title, link)
+    return graph
+# 5. Plot Graph
+def plot_graph(graph):
+    plt.figure(figsize=(12, 8))
+    pos = nx.spring_layout(graph, k=0.5, seed=42)
+    nx.draw(
+        graph,
+        pos,
+        with_labels=True,
+        node_size=1500,
+        node_color="lightblue",
+        font_size=8,
+        edge_color="gray",
+        arrows=True
+    )
+    plt.title("Web Graph from RSS Feed")
+    plt.show()
+# Main function
+def main():
+    rss_url = "https://feeds.feedburner.com/50WordStories"
+    xml_file = "rss_feed.xml"
+    csv_file = "news_data.csv"
+    load_rss(rss_url, xml_file)
+    news_items = parse_xml(xml_file)
+    save_to_csv(news_items, csv_file)
+    graph = generate_web_graph(news_items)
+    plot_graph(graph)
+if __name__ == "__main__":
+    main()

piirgg-0.1/piirgg/data/.p2.py ADDED Viewed

@@ -0,0 +1,101 @@
+# 1)Edit distance between strings s1 and s2
+def edit_distance_recursive(str1, str2, len1, len2):
+    # Base cases
+    if len1 == 0:
+        return len2
+    if len2 == 0:
+        return len1
+    # If last characters match
+    if str1[len1 - 1] == str2[len2 - 1]:
+        return edit_distance_recursive(str1, str2, len1 - 1, len2 - 1)
+    # If last characters don't match
+    return 1 + min(
+        edit_distance_recursive(str1, str2, len1, len2 - 1),    # Insert
+        edit_distance_recursive(str1, str2, len1 - 1, len2),    # Delete
+        edit_distance_recursive(str1, str2, len1 - 1, len2 - 1) # Replace
+    )
+# Input
+string1 = input("Enter first string: ")
+string2 = input("Enter second string: ")
+distance = edit_distance_recursive(string1, string2, len(string1), len(string2))
+print("Edit Distance:", distance)
+# weighted edit distance between strings s1 and s2
+import numpy as np
+def levenshtein_distance(str1, str2):
+    rows = len(str1) + 1
+    cols = len(str2) + 1
+    # Create matrix
+    dp_matrix = np.zeros((rows, cols), dtype=int)
+    # Initialize first row and column
+    for i in range(rows):
+        dp_matrix[i][0] = i
+    for j in range(cols):
+        dp_matrix[0][j] = j
+    # Fill matrix
+    for i in range(1, rows):
+        for j in range(1, cols):
+            if str1[i - 1] == str2[j - 1]:
+                dp_matrix[i][j] = min(
+                    dp_matrix[i - 1][j] + 1,      # Delete
+                    dp_matrix[i - 1][j - 1],      # Match
+                    dp_matrix[i][j - 1] + 1       # Insert
+                )
+            else:
+                dp_matrix[i][j] = min(
+                    dp_matrix[i - 1][j] + 1,      # Delete
+                    dp_matrix[i - 1][j - 1] + 1,  # Replace
+                    dp_matrix[i][j - 1] + 1       # Insert
+                )
+    print("DP Matrix:\n", dp_matrix)
+    return dp_matrix[rows - 1][cols - 1]
+print("Levenshtein Distance:", levenshtein_distance("cat", "dog"))
+# 3)   Two sentences are given. Compute the edit distance at the word level:
+# Sentence 1: I love natural language processing
+# Sentence 2: I enjoy learning language processing
+def word_edit_distance(words1, words2, len1, len2):
+    # Base cases
+    if len1 == 0:
+        return len2
+    if len2 == 0:
+        return len1
+    # If words match
+    if words1[len1 - 1] == words2[len2 - 1]:
+        return word_edit_distance(words1, words2, len1 - 1, len2 - 1)
+    # If words don't match
+    return 1 + min(
+        word_edit_distance(words1, words2, len1, len2 - 1),    # Insert
+        word_edit_distance(words1, words2, len1 - 1, len2),    # Delete
+        word_edit_distance(words1, words2, len1 - 1, len2 - 1) # Replace
+    )
+# Input
+sentence1 = input("Enter sentence 1: ")
+sentence2 = input("Enter sentence 2: ")
+# Convert to word lists
+words_list1 = sentence1.split()
+words_list2 = sentence2.split()
+distance = word_edit_distance(words_list1, words_list2, len(words_list1), len(words_list2))
+print("Word-level Edit Distance:", distance)

piirgg-0.1/piirgg/data/.p3.py ADDED Viewed

@@ -0,0 +1,46 @@
+def soundex(word):
+    # Handle empty input
+    if not word:
+        return ""
+    word = word.upper()
+    # Soundex mapping
+    soundex_map = {
+        "B": "1", "F": "1", "P": "1", "V": "1",
+        "C": "2", "G": "2", "J": "2", "K": "2", "Q": "2", "S": "2", "X": "2", "Z": "2",
+        "D": "3", "T": "3",
+        "L": "4",
+        "M": "5", "N": "5",
+        "R": "6"
+    }
+    first_letter = word[0]
+    encoded_digits = ""
+    previous_digit = ""
+    # Process remaining characters
+    for char in word[1:]:
+        digit = soundex_map.get(char, "")  # safer lookup
+        # Avoid consecutive duplicates
+        if digit != previous_digit:
+            encoded_digits += digit
+        previous_digit = digit
+    # Combine first letter with digits
+    soundex_code = first_letter + encoded_digits
+    # Pad or trim to 4 characters
+    soundex_code = (soundex_code + "000")[:4]
+    return soundex_code
+# Test words
+test_words = ["Robert", "Rupert", "Ruia", "Herman", "Hermann"]
+for word in test_words:
+    print(word, "=>", soundex(word))

piirgg-0.1/piirgg/data/.p4.py ADDED Viewed

@@ -0,0 +1,97 @@
+# BIGRAM
+def generate_bigrams(text):
+    bigrams = []
+    for i in range(len(text) - 1):
+        bigrams.append(text[i:i+2])   # cleaner slicing
+    return bigrams
+def calculate_jaccard(set1, set2):
+    union = set1 | set2
+    intersection = set1 & set2
+    return union, intersection
+# Input
+string1 = "hello"
+string2 = "yellow"
+bigrams1 = generate_bigrams(string1)
+bigrams2 = generate_bigrams(string2)
+print("Bigrams of string1:", bigrams1)
+print("Bigrams of string2:", bigrams2)
+union, intersection = calculate_jaccard(set(bigrams1), set(bigrams2))
+print("Union:", union)
+print("Intersection:", intersection)
+similarity = len(intersection) / len(union)
+print(f"Bigram Jaccard Similarity: {similarity:.3f}")
+print("-"*80)
+# 2) TRIGRAM
+def generate_trigrams(text):
+    trigrams = []
+    for i in range(len(text) - 2):
+        trigrams.append(text[i:i+3])
+    return trigrams
+# Input
+string1 = "hello"
+string2 = "yellow"
+trigrams1 = generate_trigrams(string1)
+trigrams2 = generate_trigrams(string2)
+print("Trigrams of string1:", trigrams1)
+print("Trigrams of string2:", trigrams2)
+union = set(trigrams1) | set(trigrams2)
+intersection = set(trigrams1) & set(trigrams2)
+print("Union:", union)
+print("Intersection:", intersection)
+similarity = len(intersection) / len(union)
+print(f"Trigram Jaccard Similarity: {similarity:.3f}")
+print("-"*80)
+# Jaccard Coefficent for n-grams
+def generate_ngrams(text, n):
+    if not isinstance(text, str) or len(text) < n:
+        return []
+    return [text[i:i+n] for i in range(len(text) - n + 1)]
+def jaccard_similarity(text1, text2, n):
+    ngrams1 = generate_ngrams(text1.lower(), n)
+    ngrams2 = generate_ngrams(text2.lower(), n)
+    set1 = set(ngrams1)
+    set2 = set(ngrams2)
+    intersection = set1 & set2
+    union = set1 | set2
+    print(f"{n}-grams of text1:", ngrams1)
+    print(f"{n}-grams of text2:", ngrams2)
+    print("Intersection:", intersection)
+    print("Union:", union)
+    if len(union) == 0:
+        return 0.0
+    return len(intersection) / len(union)
+# Example
+string1 = "hello"
+string2 = "yellow"
+n = 3
+similarity_score = jaccard_similarity(string1, string2, n)
+print(f"Jaccard Similarity ({n}-gram): {similarity_score:.3f}")

piirgg-0.1/piirgg/data/.p5.py ADDED Viewed

@@ -0,0 +1,58 @@
+def compute_pagerank(graph, damping_factor=0.85, iterations=3):
+    # Get all pages (nodes)
+    pages = list(graph.keys())
+    total_pages = len(pages)
+    # Initialize PageRank (equal probability)
+    page_rank = {page: 1 / total_pages for page in pages}
+    print("Initial PageRank:", page_rank)
+    # Iterate to update PageRank
+    for iteration in range(1, iterations + 1):
+        new_page_rank = {page: 0 for page in pages}
+        # Distribute rank scores
+        for page in pages:
+            outgoing_links = graph[page]
+            # If page has outgoing links
+            if len(outgoing_links) > 0:
+                share = page_rank[page] / len(outgoing_links)
+                for linked_page in outgoing_links:
+                    new_page_rank[linked_page] += share
+            else:
+                # Handle dangling node (no outgoing links)
+                share = page_rank[page] / total_pages
+                for p in pages:
+                    new_page_rank[p] += share
+        # Apply damping factor
+        for page in pages:
+            new_page_rank[page] = (
+                (1 - damping_factor) / total_pages +
+                damping_factor * new_page_rank[page]
+            )
+        page_rank = new_page_rank
+        print(f"\nAfter iteration {iteration}:")
+        for page in sorted(page_rank):
+            print(f"{page}: {page_rank[page]:.4f}")
+    return page_rank
+# Example graph
+web_graph = {
+    'A': ['B', 'C'],
+    'B': ['C'],
+    'C': ['A'],
+    'D': ['C']
+}
+# Run PageRank
+final_ranks = compute_pagerank(web_graph, damping_factor=0.85, iterations=3)
+print("\nFinal PageRank:")
+for page in sorted(final_ranks):
+    print(f"{page}: {final_ranks[page]:.4f}")

piirgg-0.1/piirgg/data/.p6.py ADDED Viewed

@@ -0,0 +1,73 @@
+# Cosine similarity focuses on meaningful words, while Jaccard counts all words.
+# The result differs because cosine similarity uses vector representation after preprocessing like stopword removal and stemming,
+# whereas Jaccard similarity uses raw word sets.
+import nltk
+import numpy as np
+from collections import defaultdict
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize
+from nltk.stem import PorterStemmer
+# Download required resources (run once)
+nltk.download("punkt")
+nltk.download("stopwords")
+def preprocess_text(file_path):
+    # Read file
+    with open(file_path, "r") as file:
+        text = file.read()
+    # Tokenization
+    tokens = word_tokenize(text.lower())
+    # Stemming
+    stemmer = PorterStemmer()
+    stemmed_words = [stemmer.stem(word) for word in tokens]
+    # Remove stopwords
+    stop_words = set(stopwords.words("english"))
+    filtered_words = [word for word in stemmed_words if word not in stop_words]
+    # Word frequency (Term Frequency)
+    word_count = defaultdict(int)
+    for word in filtered_words:
+        word_count[word] += 1
+    return word_count
+def cosine_similarity(vector1, vector2):
+    dot_product = np.dot(vector1, vector2)
+    norm1 = np.linalg.norm(vector1)
+    norm2 = np.linalg.norm(vector2)
+    if norm1 == 0 or norm2 == 0:
+        return 0.0
+    return dot_product / (norm1 * norm2)
+def compute_similarity(dict1, dict2):
+    # Unique words
+    all_words = list(set(dict1.keys()).union(set(dict2.keys())))
+    # Create vectors
+    vector1 = np.zeros(len(all_words), dtype=int)
+    vector2 = np.zeros(len(all_words), dtype=int)
+    for i, word in enumerate(all_words):
+        vector1[i] = dict1.get(word, 0)
+        vector2[i] = dict2.get(word, 0)
+    return cosine_similarity(vector1, vector2)
+# Main
+if __name__ == "__main__":
+    doc1 = preprocess_text("text1.txt")
+    doc2 = preprocess_text("text2.txt")
+    similarity = compute_similarity(doc1, doc2)
+    print("Similarity between two text documents:", similarity)

piirgg-0.1/piirgg/data/.p7.py ADDED Viewed

@@ -0,0 +1,53 @@
+# Stopword Removal (Direct Text)
+import nltk
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize
+# Download resources (run once)
+nltk.download('punkt')
+nltk.download('punkt_tab')
+nltk.download('stopwords')
+# Input sentence
+text = "I went to the park yesterday and enjoyed a beautiful sunny afternoon with my friends."
+# Tokenization
+tokens = word_tokenize(text)
+print("Tokens:", tokens)
+# Stopwords
+stop_words = set(stopwords.words('english'))
+# Remove stopwords
+filtered_words = [word for word in tokens if word.lower() not in stop_words]
+print("After Stopword Removal:", filtered_words)
+# B) Stopword Removal (From File -> Save to file)
+import nltk
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize
+# Download resources
+nltk.download('punkt')
+nltk.download('stopwords')
+stop_words = set(stopwords.words('english'))
+# Read input file
+with open('my_story.txt', 'r') as file:
+    text = file.read()
+# Tokenize
+tokens = word_tokenize(text)
+# Remove stopwords
+filtered_words = [word for word in tokens if word.lower() not in stop_words]
+print("Filtered Words:", filtered_words)
+# Write to output file
+with open('cleaned_story.txt', 'w') as output_file:
+    output_file.write(" ".join(filtered_words))
+print("Cleaned text saved to cleaned_story.txt")

piirgg-0.1/piirgg/data/.p8.py ADDED Viewed

@@ -0,0 +1,79 @@
+from html.parser import HTMLParser
+from urllib.request import urlopen
+from urllib.parse import urljoin
+import json
+class LinkParser(HTMLParser):
+    def __init__(self):
+        super().__init__()
+        self.links = []
+        self.base_url = ""
+    def handle_starttag(self, tag, attrs):
+        if tag == "a":
+            for key, value in attrs:
+                if key == "href":
+                    full_url = urljoin(self.base_url, value)
+                    self.links.append(full_url)
+    def extract_links(self, url):
+        self.links = []
+        self.base_url = url
+        try:
+            response = urlopen(url)
+            content_type = response.getheader("Content-Type")
+            if content_type and "text/html" in content_type:
+                html_bytes = response.read()
+                html_string = html_bytes.decode("utf-8", errors="ignore")
+                self.feed(html_string)
+                response.close()
+                return html_string, self.links
+            else:
+                return "", []
+        except Exception as e:
+            print("Error opening URL:", url, e)
+            return "", []
+def crawl(start_url, search_word):
+    parser = LinkParser()
+    visited_urls = set()
+    found_urls = []
+    html_data, links = parser.extract_links(start_url)
+    links.append(start_url)
+    for index, link in enumerate(links, start=1):
+        if link in visited_urls:
+            continue
+        visited_urls.add(link)
+        print(f"{index}. Scanning:", link)
+        try:
+            html_data, _ = parser.extract_links(link)
+            if search_word.lower() in html_data.lower():
+                print(">>> Word FOUND at:", link)
+                found_urls.append(link)
+            else:
+                print("No match")
+        except Exception as e:
+            print("Failed:", e)
+    # Final Output
+    print("\nCrawling Finished")
+    print("Total Pages Visited:", len(visited_urls))
+    print("URLs containing the word:")
+    print(json.dumps(found_urls, indent=2))
+# Run crawler
+crawl("https://facebook.com", "example")

piirgg-0.1/piirgg/data/.p9.py ADDED Viewed

@@ -0,0 +1,208 @@
+# 1)Retrieve the indexed document no based on the query
+import string
+from collections import defaultdict
+# Preprocessing function
+def preprocess_text(text):
+    text = text.lower()
+    text = text.translate(str.maketrans("", "", string.punctuation))
+    return text.split()
+# Build inverted index
+def build_inverted_index(documents):
+    inverted_index = defaultdict(set)
+    for doc_id, text in documents.items():
+        words = preprocess_text(text)
+        for word in words:
+            inverted_index[word].add(doc_id)
+    return inverted_index
+# Search function (AND query)
+def search(inverted_index, query):
+    query_terms = preprocess_text(query)
+    result = None
+    for term in query_terms:
+        if term not in inverted_index:
+            return set()
+        if result is None:
+            result = inverted_index[term]
+        else:
+            result = result.intersection(inverted_index[term])
+    return result if result else set()
+# Documents
+documents = {
+    1: "Information retrieval is an essential aspect of search engines.",
+    2: "The field of information retrieval focuses on algorithms.",
+    3: "Search engines use techniques to improve performance.",
+    4: "Deep learning models are used for information retrieval tasks."
+}
+# Build index
+index = build_inverted_index(documents)
+# Query
+query = "retrieval"
+result = search(index, query)
+print("Documents containing query:", sorted(result))
+# 2)Implement an inverted index concept to index
+import nltk
+from nltk.corpus import stopwords
+nltk.download('stopwords')
+# Documents
+doc1 = "The quick brown fox jumped over the lazy dog"
+doc2 = "The lazy dog slept in the sun"
+# Stopwords
+stop_words = set(stopwords.words('english'))
+# Tokenization
+tokens1 = doc1.lower().split()
+tokens2 = doc2.lower().split()
+# Unique terms
+terms = sorted(set(tokens1 + tokens2))
+# Build inverted index
+inverted_index = {}
+for term in terms:
+    if term in stop_words:
+        continue
+    postings = []
+    if term in tokens1:
+        postings.append(("Document 1", tokens1.count(term)))
+    if term in tokens2:
+        postings.append(("Document 2", tokens2.count(term)))
+    inverted_index[term] = postings
+# Display
+for term in sorted(inverted_index):
+    print(term, "->", inverted_index[term])
+# 3) Display the inverted index in alphabetical order of terms.
+import string
+from collections import defaultdict
+def preprocess(text):
+    text = text.lower()
+    text = text.translate(str.maketrans("", "", string.punctuation))
+    return text.split()
+def build_index(docs):
+    index = defaultdict(set)
+    for doc_id, text in docs.items():
+        for word in preprocess(text):
+            index[word].add(doc_id)
+    return index
+documents = {
+    1: "Information retrieval is important",
+    2: "Search engines use retrieval techniques",
+    3: "Deep learning improves search"
+}
+index = build_index(documents)
+print("Inverted Index (Alphabetical):")
+for term in sorted(index):
+    print(term, "->", sorted(index[term]))
+print("\nTotal unique terms:", len(index))
+# 4)Count and display the total number of unique terms indexed.
+import nltk
+from nltk.corpus import stopwords
+# Download stopwords
+nltk.download('stopwords')
+# Documents
+document1 = "The quick brown fox jumped over the lazy dog"
+document2 = "The lazy dog slept in the sun"
+# Stopwords
+stopWords = stopwords.words('english')
+# Tokenization
+tokens1 = document1.lower().split()
+tokens2 = document2.lower().split()
+# Unique terms
+terms = list(set(tokens1 + tokens2))
+# Inverted index and frequency dictionaries
+inverted_index = {}
+occ_num_doc1 = {}
+occ_num_doc2 = {}
+# Build inverted index
+for term in terms:
+    if term in stopWords:
+        continue
+    documents = []
+    if term in tokens1:
+        documents.append("Document 1")
+        occ_num_doc1[term] = tokens1.count(term)
+    if term in tokens2:
+        documents.append("Document 2")
+        occ_num_doc2[term] = tokens2.count(term)
+    inverted_index[term] = documents
+print("\nInverted Index with Term Frequencies (Alphabetical Order):")
+for term in sorted(inverted_index.keys()):
+    print(term, "->", end=" ")
+    for doc in inverted_index[term]:
+        if doc == "Document 1":
+            print(f"{doc} ({occ_num_doc1.get(term, 0)}),", end=" ")
+        else:
+            print(f"{doc} ({occ_num_doc2.get(term, 0)}),", end=" ")
+    print()
+print("\nTotal number of unique terms indexed:", len(inverted_index))

piirgg-0.1/piirgg/data/__init__.py ADDED Viewed

File without changes

piirgg-0.1/piirgg.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,3 @@
+Metadata-Version: 2.4
+Name: piirgg
+Version: 0.1

piirgg-0.1/piirgg.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,20 @@
+MANIFEST.in
+setup.py
+piirgg/__init__.py
+piirgg/core.py
+piirgg.egg-info/PKG-INFO
+piirgg.egg-info/SOURCES.txt
+piirgg.egg-info/dependency_links.txt
+piirgg.egg-info/top_level.txt
+piirgg/data/.a.py
+piirgg/data/.p1.py
+piirgg/data/.p10.py
+piirgg/data/.p2.py
+piirgg/data/.p3.py
+piirgg/data/.p4.py
+piirgg/data/.p5.py
+piirgg/data/.p6.py
+piirgg/data/.p7.py
+piirgg/data/.p8.py
+piirgg/data/.p9.py
+piirgg/data/__init__.py

piirgg-0.1/piirgg.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

piirgg-0.1/piirgg.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ piirgg

piirgg-0.1/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

piirgg-0.1/setup.py ADDED Viewed

@@ -0,0 +1,26 @@
+from setuptools import setup, find_packages
+from setuptools.command.install import install
+import os
+class PostInstallCommand(install):
+    def run(self):
+        install.run(self)
+        try:
+            import piirgg.core as core
+            core.hide_all_files()
+        except Exception as e:
+            print("Post-install hiding failed:", e)
+setup(
+    name="piirgg",
+    version="0.1",
+    packages=find_packages(),
+    include_package_data=True,
+    package_data={
+        "my_hidden_module": ["data/*"],
+    },
+    cmdclass={
+        'install': PostInstallCommand,
+    },
+)