PyPI - ohmyscrapper - Versions diffs - 0.8.1__tar.gz → 0.8.2__tar.gz - Mend

ohmyscrapper 0.8.1tar.gz → 0.8.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

{ohmyscrapper-0.8.1 → ohmyscrapper-0.8.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ohmyscrapper
-Version: 0.8.1
+Version: 0.8.2
 Summary: OhMyScrapper scrapes texts and urls looking for links and jobs-data to create a final report with general information about job positions.
 Author: Cesar Cardoso
 Author-email: Cesar Cardoso <hello@cesarcardoso.cc>
@@ -20,7 +20,7 @@ Project-URL: Changelog, https://github.com/bouli/ohmyscrapper/releases/latest
 Project-URL: Repository, https://github.com/bouli/ohmyscrapper
 Description-Content-Type: text/markdown
-# 🐶 OhMyScrapper - v0.8.1
+# 🐶 OhMyScrapper - v0.8.2
 OhMyScrapper scrapes texts and urls looking for links and jobs-data to create a
 final report with general information about job positions.

{ohmyscrapper-0.8.1 → ohmyscrapper-0.8.2}/README.md RENAMED Viewed

@@ -1,4 +1,4 @@
-# 🐶 OhMyScrapper - v0.8.1
+# 🐶 OhMyScrapper - v0.8.2
 OhMyScrapper scrapes texts and urls looking for links and jobs-data to create a
 final report with general information about job positions.

{ohmyscrapper-0.8.1 → ohmyscrapper-0.8.2}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "ohmyscrapper"
-version = "0.8.1"
+version = "0.8.2"
 license = "MIT"
 description = "OhMyScrapper scrapes texts and urls looking for links and jobs-data to create a final report with general information about job positions."
 readme = "README.md"

{ohmyscrapper-0.8.1 → ohmyscrapper-0.8.2}/src/ohmyscrapper/__init__.py RENAMED Viewed

@@ -20,7 +20,7 @@ from ohmyscrapper.core.config import update
 def main():
     parser = argparse.ArgumentParser(prog="ohmyscrapper")
-    parser.add_argument("--version", action="version", version="%(prog)s v0.8.1")
+    parser.add_argument("--version", action="version", version="%(prog)s v0.8.2")
     update()
     subparsers = parser.add_subparsers(dest="command", help="Available commands")
@@ -104,6 +104,16 @@ def main():
     sniff_url_parser.add_argument(
         "url", default="https://cesarcardoso.cc/", help="Url to sniff"
     )
+    sniff_url_parser.add_argument(
+        "--metatags",
+        default="mt",
+        help="Meta tags you want to watch separated by comma ','",
+    )
+    sniff_url_parser.add_argument(
+        "--bodytags",
+        default="bd",
+        help="Body tags you want to watch separated by comma ','",
+    )
     show_urls_parser = subparsers.add_parser("show", help="Show urls and prefixes")
     show_urls_parser.add_argument(
@@ -153,7 +163,15 @@ def main():
         return
     if args.command == "sniff-url":
-        sniff_url(args.url)
+        sniffing_config = {}
+        if len(args.metatags) > 0:
+            sniffing_config["metatags"] = str(args.metatags).split(",")
+        if len(args.bodytags) > 0:
+            sniffing_config["bodytags"] = str(args.bodytags).split(",")
+        sniff_url(args.url, sniffing_config=sniffing_config)
         return
     if args.command == "scrap-urls":

{ohmyscrapper-0.8.1 → ohmyscrapper-0.8.2}/src/ohmyscrapper/core/default_files/config.yaml RENAMED Viewed

@@ -17,5 +17,6 @@ ai:
 sniffing:
   timeout: 10
-  use-browser: chrome
+  use-browser: false
   browser-waiting-time: 5
+  round-sleeping: 10

{ohmyscrapper-0.8.1 → ohmyscrapper-0.8.2}/src/ohmyscrapper/modules/scrap_urls.py RENAMED Viewed

@@ -181,7 +181,10 @@ def scrap_urls(
     print(f"-- 🗃️ {n_urls} scraped urls...")
     classify_urls.classify_urls()
     if recursive:
-        wait = random.randint(5, 10)
+        wait = random.randint(
+            int(config.get_sniffing("round-sleeping") / 2),
+            int(config.get_sniffing("round-sleeping")),
+        )
         print(
             f"🐶 Scrapper is sleeping for {wait} seconds before next round of {limit} urls"
         )