ohmyscrapper 0.8.1__tar.gz → 0.8.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ohmyscrapper-0.8.1 → ohmyscrapper-0.8.2}/PKG-INFO +2 -2
- {ohmyscrapper-0.8.1 → ohmyscrapper-0.8.2}/README.md +1 -1
- {ohmyscrapper-0.8.1 → ohmyscrapper-0.8.2}/pyproject.toml +1 -1
- {ohmyscrapper-0.8.1 → ohmyscrapper-0.8.2}/src/ohmyscrapper/__init__.py +20 -2
- {ohmyscrapper-0.8.1 → ohmyscrapper-0.8.2}/src/ohmyscrapper/core/default_files/config.yaml +2 -1
- {ohmyscrapper-0.8.1 → ohmyscrapper-0.8.2}/src/ohmyscrapper/modules/scrap_urls.py +4 -1
- {ohmyscrapper-0.8.1 → ohmyscrapper-0.8.2}/src/ohmyscrapper/__main__.py +0 -0
- {ohmyscrapper-0.8.1 → ohmyscrapper-0.8.2}/src/ohmyscrapper/core/config.py +0 -0
- {ohmyscrapper-0.8.1 → ohmyscrapper-0.8.2}/src/ohmyscrapper/core/config_files.py +0 -0
- {ohmyscrapper-0.8.1 → ohmyscrapper-0.8.2}/src/ohmyscrapper/core/default_files/url_sniffing.yaml +0 -0
- {ohmyscrapper-0.8.1 → ohmyscrapper-0.8.2}/src/ohmyscrapper/core/default_files/url_types.yaml +0 -0
- {ohmyscrapper-0.8.1 → ohmyscrapper-0.8.2}/src/ohmyscrapper/models/urls_manager.py +0 -0
- {ohmyscrapper-0.8.1 → ohmyscrapper-0.8.2}/src/ohmyscrapper/modules/browser.py +0 -0
- {ohmyscrapper-0.8.1 → ohmyscrapper-0.8.2}/src/ohmyscrapper/modules/classify_urls.py +0 -0
- {ohmyscrapper-0.8.1 → ohmyscrapper-0.8.2}/src/ohmyscrapper/modules/load_txt.py +0 -0
- {ohmyscrapper-0.8.1 → ohmyscrapper-0.8.2}/src/ohmyscrapper/modules/merge_dbs.py +0 -0
- {ohmyscrapper-0.8.1 → ohmyscrapper-0.8.2}/src/ohmyscrapper/modules/process_with_ai.py +0 -0
- {ohmyscrapper-0.8.1 → ohmyscrapper-0.8.2}/src/ohmyscrapper/modules/seed.py +0 -0
- {ohmyscrapper-0.8.1 → ohmyscrapper-0.8.2}/src/ohmyscrapper/modules/show.py +0 -0
- {ohmyscrapper-0.8.1 → ohmyscrapper-0.8.2}/src/ohmyscrapper/modules/sniff_url.py +0 -0
- {ohmyscrapper-0.8.1 → ohmyscrapper-0.8.2}/src/ohmyscrapper/modules/untouch_all.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ohmyscrapper
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.2
|
|
4
4
|
Summary: OhMyScrapper scrapes texts and urls looking for links and jobs-data to create a final report with general information about job positions.
|
|
5
5
|
Author: Cesar Cardoso
|
|
6
6
|
Author-email: Cesar Cardoso <hello@cesarcardoso.cc>
|
|
@@ -20,7 +20,7 @@ Project-URL: Changelog, https://github.com/bouli/ohmyscrapper/releases/latest
|
|
|
20
20
|
Project-URL: Repository, https://github.com/bouli/ohmyscrapper
|
|
21
21
|
Description-Content-Type: text/markdown
|
|
22
22
|
|
|
23
|
-
# 🐶 OhMyScrapper - v0.8.
|
|
23
|
+
# 🐶 OhMyScrapper - v0.8.2
|
|
24
24
|
|
|
25
25
|
OhMyScrapper scrapes texts and urls looking for links and jobs-data to create a
|
|
26
26
|
final report with general information about job positions.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "ohmyscrapper"
|
|
3
|
-
version = "0.8.
|
|
3
|
+
version = "0.8.2"
|
|
4
4
|
license = "MIT"
|
|
5
5
|
description = "OhMyScrapper scrapes texts and urls looking for links and jobs-data to create a final report with general information about job positions."
|
|
6
6
|
readme = "README.md"
|
|
@@ -20,7 +20,7 @@ from ohmyscrapper.core.config import update
|
|
|
20
20
|
|
|
21
21
|
def main():
|
|
22
22
|
parser = argparse.ArgumentParser(prog="ohmyscrapper")
|
|
23
|
-
parser.add_argument("--version", action="version", version="%(prog)s v0.8.
|
|
23
|
+
parser.add_argument("--version", action="version", version="%(prog)s v0.8.2")
|
|
24
24
|
|
|
25
25
|
update()
|
|
26
26
|
subparsers = parser.add_subparsers(dest="command", help="Available commands")
|
|
@@ -104,6 +104,16 @@ def main():
|
|
|
104
104
|
sniff_url_parser.add_argument(
|
|
105
105
|
"url", default="https://cesarcardoso.cc/", help="Url to sniff"
|
|
106
106
|
)
|
|
107
|
+
sniff_url_parser.add_argument(
|
|
108
|
+
"--metatags",
|
|
109
|
+
default="mt",
|
|
110
|
+
help="Meta tags you want to watch separated by comma ','",
|
|
111
|
+
)
|
|
112
|
+
sniff_url_parser.add_argument(
|
|
113
|
+
"--bodytags",
|
|
114
|
+
default="bd",
|
|
115
|
+
help="Body tags you want to watch separated by comma ','",
|
|
116
|
+
)
|
|
107
117
|
|
|
108
118
|
show_urls_parser = subparsers.add_parser("show", help="Show urls and prefixes")
|
|
109
119
|
show_urls_parser.add_argument(
|
|
@@ -153,7 +163,15 @@ def main():
|
|
|
153
163
|
return
|
|
154
164
|
|
|
155
165
|
if args.command == "sniff-url":
|
|
156
|
-
|
|
166
|
+
sniffing_config = {}
|
|
167
|
+
if len(args.metatags) > 0:
|
|
168
|
+
sniffing_config["metatags"] = str(args.metatags).split(",")
|
|
169
|
+
|
|
170
|
+
if len(args.bodytags) > 0:
|
|
171
|
+
sniffing_config["bodytags"] = str(args.bodytags).split(",")
|
|
172
|
+
|
|
173
|
+
sniff_url(args.url, sniffing_config=sniffing_config)
|
|
174
|
+
|
|
157
175
|
return
|
|
158
176
|
|
|
159
177
|
if args.command == "scrap-urls":
|
|
@@ -181,7 +181,10 @@ def scrap_urls(
|
|
|
181
181
|
print(f"-- 🗃️ {n_urls} scraped urls...")
|
|
182
182
|
classify_urls.classify_urls()
|
|
183
183
|
if recursive:
|
|
184
|
-
wait = random.randint(
|
|
184
|
+
wait = random.randint(
|
|
185
|
+
int(config.get_sniffing("round-sleeping") / 2),
|
|
186
|
+
int(config.get_sniffing("round-sleeping")),
|
|
187
|
+
)
|
|
185
188
|
print(
|
|
186
189
|
f"🐶 Scrapper is sleeping for {wait} seconds before next round of {limit} urls"
|
|
187
190
|
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ohmyscrapper-0.8.1 → ohmyscrapper-0.8.2}/src/ohmyscrapper/core/default_files/url_sniffing.yaml
RENAMED
|
File without changes
|
{ohmyscrapper-0.8.1 → ohmyscrapper-0.8.2}/src/ohmyscrapper/core/default_files/url_types.yaml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|