PyPI - ohmyscrapper - Versions diffs - 0.2.1__py3-none-any.whl → 0.6.1__py3-none-any.whl - Mend

ohmyscrapper 0.2.1py3-none-any.whl → 0.6.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

ohmyscrapper/__init__.py +57 -18
ohmyscrapper/core/config.py +95 -0
ohmyscrapper/core/config_files.py +73 -0
ohmyscrapper/core/default_files/config.yaml +15 -0
ohmyscrapper/core/default_files/url_types.yaml +5 -0
ohmyscrapper/models/urls_manager.py +67 -25
ohmyscrapper/modules/classify_urls.py +9 -5
ohmyscrapper/modules/load_txt.py +79 -11
ohmyscrapper/modules/process_with_ai.py +72 -36
ohmyscrapper/modules/scrap_urls.py +80 -49
ohmyscrapper/modules/seed.py +28 -2
ohmyscrapper/modules/show.py +22 -14
ohmyscrapper/modules/sniff_url.py +82 -38
ohmyscrapper/modules/untouch_all.py +1 -1
{ohmyscrapper-0.2.1.dist-info → ohmyscrapper-0.6.1.dist-info}/METADATA +53 -27
ohmyscrapper-0.6.1.dist-info/RECORD +20 -0
ohmyscrapper-0.2.1.dist-info/RECORD +0 -16
{ohmyscrapper-0.2.1.dist-info → ohmyscrapper-0.6.1.dist-info}/WHEEL +0 -0
{ohmyscrapper-0.2.1.dist-info → ohmyscrapper-0.6.1.dist-info}/entry_points.txt +0 -0

ohmyscrapper/modules/load_txt.py CHANGED Viewed

@@ -1,31 +1,99 @@
 import os
 from urlextract import URLExtract
 import ohmyscrapper.models.urls_manager as urls_manager
+from ohmyscrapper.core import config
-def load_txt(file_name="input/_chat.txt"):
+def _increment_file_name(text_file_content, file_name):
+    print(f"reading and loading file `{file_name}`... ")
+    with open(file_name, "r") as f:
+        return text_file_content + f.read()
-    if not os.path.exists("input"):
-        os.mkdir("input")
-    urls_manager.create_tables()
+def load_txt(file_name="input", verbose=False):
+    input_folder = config.get_dir("input")
+    if not os.path.exists(input_folder):
+        os.mkdir(input_folder)
     urls_manager.seeds()
-    # make it recursive for all files
-    text_file_content = open(file_name, "r").read()
-    put_urls_from_string(text_to_process=text_file_content)
+    text_file_content = ""
+    if file_name is not None and not os.path.isdir(file_name):
+        print(f"📖 reading file `{file_name}`... ")
+        if not os.path.exists(file_name):
+            if file_name.startswith("https://") or file_name.startswith("http://"):
+                text_file_content = " " + file_name + " "
+            else:
+                print(f"\n file `{file_name}` not found.")
+                return
+        else:
+            text_file_content = _increment_file_name(
+                text_file_content=text_file_content, file_name=file_name
+            )
+    else:
+        input_folder = config.get_dir("input")
+        print(f"📂 reading {input_folder} directory... ")
+        if file_name is None:
+            dir_files = input_folder
+        else:
+            dir_files = file_name
+        text_files = os.listdir(dir_files)
+        for file in text_files:
+            if not file.endswith(".txt"):
+                text_files.remove(file)
+        if len(text_files) == 0:
+            print(f"No text files found in {input_folder} directory!")
+            return
+        elif len(text_files) == 1:
+            print(f"📖 reading file `{dir_files}/{text_files[0]}`... ")
+            text_file_content = _increment_file_name(
+                text_file_content=text_file_content,
+                file_name=os.path.join(dir_files, text_files[0]),
+            )
+        else:
+            print("\nChoose a text file. Use `*` for process all and `q` to quit:")
+            for index, file in enumerate(text_files):
+                print(f"[{index}]:", os.path.join(dir_files, file))
+            text_file_option = -1
+            while text_file_option < 0 or text_file_option >= len(text_files):
+                text_file_option = input("Enter the file number: ")
+                if text_file_option == "*":
+                    for file in text_files:
+                        text_file_content = _increment_file_name(
+                            text_file_content=text_file_content,
+                            file_name=os.path.join(dir_files, file),
+                        )
+                        text_file_option = 0
+                elif text_file_option == "q":
+                    return
+                elif text_file_option.isdigit():
+                    text_file_option = int(text_file_option)
+                    if text_file_option >= 0 and text_file_option < len(text_files):
+                        text_file_content = _increment_file_name(
+                            text_file_content=text_file_content,
+                            file_name=os.path.join(
+                                dir_files, text_files[int(text_file_option)]
+                            ),
+                        )
+    print("🔎 looking for urls...")
+    urls_found = put_urls_from_string(
+        text_to_process=text_file_content, verbose=verbose
+    )
-    # move_it_to_processed
     print("--------------------")
-    print(file_name, "processed")
+    print("files processed")
+    print(f"📦 {urls_found} urls were extracted and packed into the database")
-def put_urls_from_string(text_to_process, parent_url=None):
+def put_urls_from_string(text_to_process, parent_url=None, verbose=False):
     if isinstance(text_to_process, str):
         extractor = URLExtract()
         for url in extractor.find_urls(text_to_process):
             urls_manager.add_url(url=url, parent_url=parent_url)
-            print(url, "added")
+            if verbose:
+                print(url, "added")
         return len(extractor.find_urls(text_to_process))
     else:

ohmyscrapper/modules/process_with_ai.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import ohmyscrapper.models.urls_manager as urls_manager
+from ohmyscrapper.core import config
 from bs4 import BeautifulSoup
 from google import genai
 from dotenv import load_dotenv
@@ -7,9 +8,11 @@ import time
 import os
 import yaml
 import json
 # TODO: !!! REFACTOR !!!
 load_dotenv()
 def reprocess_ai_history():
     df = urls_manager.get_ai_log().to_dict(orient="records")
     for row in df:
@@ -17,28 +20,34 @@ def reprocess_ai_history():
 def process_ai_response(response):
-        job_positions = xml2dict(response)
-        for index, xml_item_children in job_positions.items():
-            for url_child_xml in xml_item_children:
-                url_parent = urls_manager.get_url_by_id(url_child_xml["id"])
-                if len(url_parent) > 0:
-                    url_parent = url_parent.iloc[0]
-                h1 = url_child_xml.copy()
-                del h1["id"]
-                del h1["url"]
-                h1 = " - ".join(h1.values())
-                if url_parent["description_links"] > 1 and url_child_xml["id"] != "":
-                    print("-- child updated -- \n", url_child_xml["url"] , ":", h1)
-                    urls_manager.set_url_h1(url_child_xml["url"], h1)
-                    urls_manager.set_url_ai_processed_by_url(url_child_xml["url"], str(json.dumps(url_child_xml)))
-                    if url_parent["url"] != url_child_xml["url"]:
-                        urls_manager.set_url_ai_processed_by_url(url_parent["url"], "children-update")
-                else:
-                    print("-- parent updated -- \n", url_parent["url"], ":", h1)
-                    urls_manager.set_url_h1(url_parent["url"], h1)
-                    urls_manager.set_url_ai_processed_by_url(url_parent["url"], str(json.dumps(url_child_xml)))
+    job_positions = xml2dict(response)
+    for index, xml_item_children in job_positions.items():
+        for url_child_xml in xml_item_children:
+            url_parent = urls_manager.get_url_by_id(url_child_xml["id"])
+            if len(url_parent) > 0:
+                url_parent = url_parent.iloc[0]
+            h1 = url_child_xml.copy()
+            del h1["id"]
+            del h1["url"]
+            h1 = " - ".join(h1.values())
+            if url_parent["description_links"] > 1 and url_child_xml["id"] != "":
+                print("-- child updated -- \n", url_child_xml["url"], ":", h1)
+                urls_manager.set_url_h1(url_child_xml["url"], h1)
+                urls_manager.set_url_ai_processed_by_url(
+                    url_child_xml["url"], str(json.dumps(url_child_xml))
+                )
+                if url_parent["url"] != url_child_xml["url"]:
+                    urls_manager.set_url_ai_processed_by_url(
+                        url_parent["url"], "children-update"
+                    )
+            else:
+                print("-- parent updated -- \n", url_parent["url"], ":", h1)
+                urls_manager.set_url_h1(url_parent["url"], h1)
+                urls_manager.set_url_ai_processed_by_url(
+                    url_parent["url"], str(json.dumps(url_child_xml))
+                )
 def xml2dict(xml_string):
@@ -46,19 +55,21 @@ def xml2dict(xml_string):
     children_items_dict = {}
     for item in soup.find_all():
-        if(item.parent.name == "[document]"):
+        if item.parent.name == "[document]":
             children_items_dict[item.name] = []
         elif item.parent.name in children_items_dict:
             children_items_dict[item.parent.name].append(_xml_children_to_dict(item))
     return children_items_dict
 def _xml_children_to_dict(xml):
     item_dict = {}
     for item in xml.find_all():
         item_dict[item.name] = item.text
     return item_dict
 def process_with_ai(recursive=True, triggered_times=0):
     triggered_times = triggered_times + 1
@@ -91,13 +102,23 @@ def process_with_ai(recursive=True, triggered_times=0):
     print("prompt:", prompt["name"])
     print("model:", prompt["model"])
     print("description:", prompt["description"])
-    prompt["instructions"] = prompt["instructions"].replace("{ohmyscrapper_texts}", texts)
+    prompt["instructions"] = prompt["instructions"].replace(
+        "{ohmyscrapper_texts}", texts
+    )
     # The client gets the API key from the environment variable `GEMINI_API_KEY`.
     client = genai.Client()
-    response = client.models.generate_content(model=prompt["model"], contents=prompt["instructions"])
+    response = client.models.generate_content(
+        model=prompt["model"], contents=prompt["instructions"]
+    )
     response = str(response.text)
-    urls_manager.add_ai_log(instructions=prompt["instructions"], response=response, model=prompt["model"], prompt_name=prompt["name"], prompt_file=prompt["prompt_file"])
+    urls_manager.add_ai_log(
+        instructions=prompt["instructions"],
+        response=response,
+        model=prompt["model"],
+        prompt_name=prompt["name"],
+        prompt_file=prompt["prompt_file"],
+    )
     print(response)
     print("^^^^^^")
     process_ai_response(response=response)
@@ -114,7 +135,9 @@ def process_with_ai(recursive=True, triggered_times=0):
         if triggered_times > 5:
             print("!!! This is a break to prevent budget accident$.")
             print("You triggered", triggered_times, "times the AI processing function.")
-            print("If you are sure this is correct, you can re-call this function again.")
+            print(
+                "If you are sure this is correct, you can re-call this function again."
+            )
             print("Please, check it.")
             return
@@ -122,8 +145,13 @@ def process_with_ai(recursive=True, triggered_times=0):
     return
 def _get_prompt():
-    prompts_path = "prompts"
+    prompts_path = config.get_dir(param="prompts")
+    default_prommpt_file = os.path.join(
+        prompts_path, config.get_ai("default_prompt_file")
+    )
     default_prompt = """---
 model: "gemini-2.5-flash"
 name: "default-prompt"
@@ -133,15 +161,18 @@ Process with AI this prompt: {ohmyscrapper_texts}
 """
     if not os.path.exists(prompts_path):
         os.mkdir(prompts_path)
-        open(f"{prompts_path}/prompt.md", "w").write(default_prompt)
-        print(f"You didn't have a prompt file. One was created in the /{prompts_path} folder. You can change it there.")
+        open(default_prommpt_file, "w").write(default_prompt)
+        print(
+            f"You didn't have a prompt file. One was created in the /{prompts_path} folder. You can change it there."
+        )
         return False
     prompt_files = os.listdir(prompts_path)
     if len(prompt_files) == 0:
-        open(f"{prompts_path}/prompt.md", "w").write(default_prompt)
-        print(f"You didn't have a prompt file. One was created in the /{prompts_path} folder. You can change it there.")
+        open(default_prommpt_file, "w").write(default_prompt)
+        print(
+            f"You didn't have a prompt file. One was created in the /{prompts_path} folder. You can change it there."
+        )
         return False
     prompt = {}
     if len(prompt_files) == 1:
@@ -151,8 +182,10 @@ Process with AI this prompt: {ohmyscrapper_texts}
         prompts = {}
         for index, file in enumerate(prompt_files):
             prompts[index] = _parse_prompt(prompts_path=prompts_path, prompt_file=file)
-            print(index, ":", prompts[index]['name'])
-        input_prompt = input("Type the number of the prompt you want to use or 'q' to quit: ")
+            print(index, ":", prompts[index]["name"])
+        input_prompt = input(
+            "Type the number of the prompt you want to use or 'q' to quit: "
+        )
         if input_prompt == "q":
             return False
         try:
@@ -162,14 +195,17 @@ Process with AI this prompt: {ohmyscrapper_texts}
             prompt = _get_prompt()
     return prompt
 def _parse_prompt(prompts_path, prompt_file):
     prompt = {}
-    raw_prompt = open(f"{prompts_path}/{prompt_file}", "r").read().split("---")
+    raw_prompt = open(os.path.join(prompts_path, prompt_file), "r").read().split("---")
     prompt = yaml.safe_load(raw_prompt[1])
     prompt["instructions"] = raw_prompt[2].strip()
     prompt["prompt_file"] = prompt_file
     return prompt
 # TODO: Separate gemini from basic function
 def _process_with_gemini(model, instructions):
     response = """"""

ohmyscrapper/modules/scrap_urls.py CHANGED Viewed

@@ -7,72 +7,87 @@ import time
 import random
-def process_linkedin_redirect(url_report, url):
-    print("linkedin_redirect")
+def process_linkedin_redirect(url_report, url, verbose=False):
+    if verbose:
+        print("linkedin_redirect")
     if url_report["total-a-links"] < 5:
         if "first-a-link" in url_report.keys():
             url_destiny = url_report["first-a-link"]
         else:
             urls_manager.set_url_error(url=url["url"], value="error: no first-a-link")
-            print("no url for:", url["url"])
+            if verbose:
+                print("no url for:", url["url"])
             return
     else:
         if "og:url" in url_report.keys():
             url_destiny = url_report["og:url"]
         else:
             urls_manager.set_url_error(url=url["url"], value="error: no og:url")
-            print("no url for:", url["url"])
+            if verbose:
+                print("no url for:", url["url"])
             return
-    print(url["url"], ">>", url_destiny)
+    if verbose:
+        print(url["url"], ">>", url_destiny)
     urls_manager.add_url(url=url_destiny)
     urls_manager.set_url_destiny(url=url["url"], destiny=url_destiny)
-def process_linkedin_feed(url_report, url):
-    print("linkedin_feed")
+def process_linkedin_feed(url_report, url, verbose=False):
+    if verbose:
+        print("linkedin_feed")
     if "og:url" in url_report.keys():
         url_destiny = url_report["og:url"]
     else:
         urls_manager.set_url_error(url=url["url"], value="error: no og:url")
-        print("no url for:", url["url"])
+        if verbose:
+            print("no url for:", url["url"])
         return
-    print(url["url"], ">>", url_destiny)
+    if verbose:
+        print(url["url"], ">>", url_destiny)
     urls_manager.add_url(url=url_destiny)
     urls_manager.set_url_destiny(url=url["url"], destiny=url_destiny)
-def process_linkedin_job(url_report, url):
-    print("linkedin_job")
+def process_linkedin_job(url_report, url, verbose=False):
+    if verbose:
+        print("linkedin_job")
     changed = False
     if "h1" in url_report.keys():
-        print(url["url"], ": ", url_report["h1"])
+        if verbose:
+            print(url["url"], ": ", url_report["h1"])
         urls_manager.set_url_h1(url=url["url"], value=url_report["h1"])
         changed = True
     elif "og:title" in url_report.keys():
-        print(url["url"], ": ", url_report["og:title"])
+        if verbose:
+            print(url["url"], ": ", url_report["og:title"])
         urls_manager.set_url_h1(url=url["url"], value=url_report["og:title"])
         changed = True
     if "description" in url_report.keys():
-        urls_manager.set_url_description(url=url["url"], value=url_report["description"])
+        urls_manager.set_url_description(
+            url=url["url"], value=url_report["description"]
+        )
         changed = True
     elif "og:description" in url_report.keys():
-        urls_manager.set_url_description(url=url["url"], value=url_report["og:description"])
+        urls_manager.set_url_description(
+            url=url["url"], value=url_report["og:description"]
+        )
         changed = True
     if not changed:
         urls_manager.set_url_error(url=url["url"], value="error: no h1 or description")
-def process_linkedin_post(url_report, url):
-    print("linkedin_post or generic")
-    print(url["url"])
+def process_linkedin_post(url_report, url, verbose=False):
+    if verbose:
+        print("linkedin_post or generic")
+        print(url["url"])
     changed = False
     if "h1" in url_report.keys():
-        print(url["url"], ": ", url_report["h1"])
+        if verbose:
+            print(url["url"], ": ", url_report["h1"])
         urls_manager.set_url_h1(url=url["url"], value=url_report["h1"])
         changed = True
     elif "og:title" in url_report.keys():
@@ -88,52 +103,50 @@ def process_linkedin_post(url_report, url):
     if description is not None:
         urls_manager.set_url_description(url=url["url"], value=description)
-        description_links = load_txt.put_urls_from_string(text_to_process=description, parent_url=url["url"])
+        description_links = load_txt.put_urls_from_string(
+            text_to_process=description, parent_url=url["url"]
+        )
         urls_manager.set_url_description_links(url=url["url"], value=description_links)
     if not changed:
         urls_manager.set_url_error(url=url["url"], value="error: no h1 or description")
-def scrap_url(url):
-    # TODO: Use get_urls_valid_prefix_by_id()
-    df = urls_manager.get_urls_valid_prefix()
+def scrap_url(url, verbose=False):
     # TODO: Need to change this
     if url["url_type"] is None:
-        print("\n\ngeneric:", url["url"])
+        if verbose:
+            print("\n\ngeneric:", url["url"])
         url["url_type"] = "generic"
     else:
-        print("\n\n", url["url_type"] + ":", url["url"])
+        if verbose:
+            print("\n\n", url["url_type"] + ":", url["url"])
     try:
         url_report = sniff_url.get_tags(url=url["url"])
     except Exception as e:
         urls_manager.set_url_error(url=url["url"], value="error")
         urls_manager.touch_url(url=url["url"])
-        print("\n\n!!! ERROR FOR:", url["url"])
-        print(
-            "\n\n!!! you can check the URL using the command sniff-url",
-            url["url"],
-            "\n\n",
-        )
+        if verbose:
+            print("\n\n!!! ERROR FOR:", url["url"])
+            print(
+                "\n\n!!! you can check the URL using the command sniff-url",
+                url["url"],
+                "\n\n",
+            )
         return
-    # linkedin_redirect - linkedin (https://lnkd.in/)
     if url["url_type"] == "linkedin_redirect":
-        process_linkedin_redirect(url_report=url_report, url=url)
+        process_linkedin_redirect(url_report=url_report, url=url, verbose=verbose)
-    # linkedin_feed - linkedin (https://%.linkedin.com/feed/)
     if url["url_type"] == "linkedin_feed":
-        process_linkedin_feed(url_report=url_report, url=url)
+        process_linkedin_feed(url_report=url_report, url=url, verbose=verbose)
-    # linkedin_job - linkedin (https://www.linkedin.com/jobs/)
     if url["url_type"] == "linkedin_job":
-        process_linkedin_job(url_report=url_report, url=url)
+        process_linkedin_job(url_report=url_report, url=url, verbose=verbose)
-    # linkedin_job - linkedin (https://www.linkedin.com/jobs/)
     if url["url_type"] == "linkedin_post" or url["url_type"] == "generic":
-        process_linkedin_post(url_report=url_report, url=url)
+        process_linkedin_post(url_report=url_report, url=url, verbose=verbose)
     urls_manager.set_url_json(url=url["url"], value=url_report["json"])
     urls_manager.touch_url(url=url["url"])
@@ -144,35 +157,53 @@ def isNaN(num):
 def scrap_urls(
-    recursive=False, ignore_valid_prefix=False, randomize=False, only_parents=True
+    recursive=False,
+    ignore_valid_prefix=False,
+    randomize=False,
+    only_parents=True,
+    verbose=False,
+    n_urls=0,
 ):
+    limit = 10
     classify_urls.classify_urls()
     urls = urls_manager.get_untouched_urls(
         ignore_valid_prefix=ignore_valid_prefix,
         randomize=randomize,
         only_parents=only_parents,
+        limit=limit,
     )
     if len(urls) == 0:
-        print("no urls to scrap")
+        print("📭 no urls to scrap")
+        if n_urls > 0:
+            print(f"-- 🗃️ {n_urls} scraped urls in total...")
+            print("scrapping is over...")
         return
     for index, url in urls.iterrows():
-        scrap_url(url)
-        wait = random.randint(15, 20)
         wait = random.randint(1, 3)
-        print("sleeping for", wait, "seconds")
+        print(
+            "🐶 Scrapper is sleeping for", wait, "seconds before scraping next url..."
+        )
         time.sleep(wait)
+        print("🐕 Scrapper is sniffing the url...")
+        scrap_url(url=url, verbose=verbose)
+    n_urls = n_urls + len(urls)
+    print(f"-- 🗃️ {n_urls} scraped urls...")
     classify_urls.classify_urls()
     if recursive:
         wait = random.randint(5, 10)
-        print("sleeping for", wait, "seconds before next round")
+        print(
+            f"🐶 Scrapper is sleeping for {wait} seconds before next round of {limit} urls"
+        )
         time.sleep(wait)
         scrap_urls(
             recursive=recursive,
             ignore_valid_prefix=ignore_valid_prefix,
             randomize=randomize,
             only_parents=only_parents,
+            verbose=verbose,
+            n_urls=n_urls,
         )
     else:
-        print("ending...")
+        print("scrapping is over...")

ohmyscrapper/modules/seed.py CHANGED Viewed

@@ -1,7 +1,33 @@
 import ohmyscrapper.models.urls_manager as urls_manager
+from ohmyscrapper.core import config
 def seed():
-    urls_manager.seeds()
-    print("db seeded")
+    if not config.url_types_file_exists():
+        db_url_types = urls_manager.get_urls_valid_prefix()
+        if len(db_url_types) > 0:
+            export_url_types_to_file()
+            print("🪹 you have a new `url_types.yaml` based on your db! =)")
+            return
+    seeds = get_url_types_from_file()
+    if len(seeds) > 0:
+        urls_manager.seeds(seeds=seeds)
+        print("🫒 db seeded")
     return
+def get_url_types_from_file():
+    url_types_from_file = config.get_url_types()
+    if url_types_from_file is None:
+        url_types_from_file = {}
+    return url_types_from_file
+def export_url_types_to_file():
+    url_types = urls_manager.get_urls_valid_prefix()
+    yaml_url_types = {}
+    for index, url_type in url_types.iterrows():
+        yaml_url_types[url_type["url_type"]] = url_type["url_prefix"]
+    config.append_url_types(yaml_url_types)

ohmyscrapper/modules/show.py CHANGED Viewed

@@ -1,10 +1,14 @@
 import ohmyscrapper.models.urls_manager as urls_manager
+from ohmyscrapper.core import config
 import math
+import os
 from rich.console import Console
 from rich.table import Table
 def export_urls(limit=0, csv_file="output/urls.csv", simplify=False):
+    output_folder = config.get_dir("output")
     df = urls_manager.get_urls(limit=limit)
     if simplify:
@@ -12,27 +16,31 @@ def export_urls(limit=0, csv_file="output/urls.csv", simplify=False):
     df.to_csv(csv_file, index=False)
     print("--------------------")
-    print("Urls exported to", csv_file)
-    df.replace(
-        {
-            "description": {r"\n": " "},
-        },
-        regex=True,
-        inplace=True,
-    )
+    print("📊🖋️ Urls exported to", csv_file)
+    if "description" in df:
+        try:
+            df.replace(
+                {
+                    "description": {r"\n": " "},
+                },
+                regex=True,
+                inplace=True,
+            )
+        except:
+            pass
     df.to_html(csv_file + "-preview.html", index=False)
-    print("Urls preview exported to", csv_file + "-preview.html")
+    print("📜🖋️ Urls preview exported to", csv_file + "-preview.html")
     print("--------------------")
 def export_report(csv_file="output/report.csv"):
+    output_folder = config.get_dir("output")
     df = urls_manager.get_urls_report()
     df.to_csv(csv_file, index=False)
     _clear_file(csv_file)
     print("--------------------")
-    print("Urls report exported to", csv_file)
+    print("📊🖋️ Urls report exported to", csv_file)
     df.replace(
         {
@@ -44,9 +52,10 @@ def export_report(csv_file="output/report.csv"):
     df.to_html(csv_file + "-preview.html", index=False)
     _clear_file(csv_file + "-preview.html")
-    print("Urls report preview exported to", csv_file + "-preview.html")
+    print("📜🖋️ Urls report preview exported to", csv_file + "-preview.html")
     print("--------------------")
 # TODO: Add transformation layer
 def _clear_file(txt_tile):
     with open(txt_tile, "r") as f:
@@ -56,6 +65,7 @@ def _clear_file(txt_tile):
         with open(txt_tile, "w") as f:
             f.write(content)
 def show_urls(limit=0, jump_to_page=0):
     df = urls_manager.get_urls(limit=limit)
     df.drop(columns=["json", "description"], inplace=True)
@@ -100,8 +110,6 @@ def show_urls(limit=0, jump_to_page=0):
     return
-    return
 # TODO: Change place
 def show_table(df):

ohmyscrapper 0.2.1__py3-none-any.whl → 0.6.1__py3-none-any.whl

ohmyscrapper 0.2.1py3-none-any.whl → 0.6.1py3-none-any.whl