ohmyscrapper 0.7.4__py3-none-any.whl → 0.8.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ohmyscrapper/__init__.py +20 -2
- ohmyscrapper/core/config.py +2 -2
- ohmyscrapper/core/config_files.py +13 -10
- ohmyscrapper/core/default_files/config.yaml +3 -0
- ohmyscrapper/core/default_files/url_sniffing.yaml +1 -1
- ohmyscrapper/models/urls_manager.py +1 -1
- ohmyscrapper/modules/browser.py +27 -0
- ohmyscrapper/modules/scrap_urls.py +13 -4
- ohmyscrapper/modules/sniff_url.py +50 -11
- {ohmyscrapper-0.7.4.dist-info → ohmyscrapper-0.8.2.dist-info}/METADATA +3 -2
- ohmyscrapper-0.8.2.dist-info/RECORD +22 -0
- ohmyscrapper-0.7.4.dist-info/RECORD +0 -21
- {ohmyscrapper-0.7.4.dist-info → ohmyscrapper-0.8.2.dist-info}/WHEEL +0 -0
- {ohmyscrapper-0.7.4.dist-info → ohmyscrapper-0.8.2.dist-info}/entry_points.txt +0 -0
ohmyscrapper/__init__.py
CHANGED
|
@@ -20,7 +20,7 @@ from ohmyscrapper.core.config import update
|
|
|
20
20
|
|
|
21
21
|
def main():
|
|
22
22
|
parser = argparse.ArgumentParser(prog="ohmyscrapper")
|
|
23
|
-
parser.add_argument("--version", action="version", version="%(prog)s v0.
|
|
23
|
+
parser.add_argument("--version", action="version", version="%(prog)s v0.8.2")
|
|
24
24
|
|
|
25
25
|
update()
|
|
26
26
|
subparsers = parser.add_subparsers(dest="command", help="Available commands")
|
|
@@ -104,6 +104,16 @@ def main():
|
|
|
104
104
|
sniff_url_parser.add_argument(
|
|
105
105
|
"url", default="https://cesarcardoso.cc/", help="Url to sniff"
|
|
106
106
|
)
|
|
107
|
+
sniff_url_parser.add_argument(
|
|
108
|
+
"--metatags",
|
|
109
|
+
default="mt",
|
|
110
|
+
help="Meta tags you want to watch separated by comma ','",
|
|
111
|
+
)
|
|
112
|
+
sniff_url_parser.add_argument(
|
|
113
|
+
"--bodytags",
|
|
114
|
+
default="bd",
|
|
115
|
+
help="Body tags you want to watch separated by comma ','",
|
|
116
|
+
)
|
|
107
117
|
|
|
108
118
|
show_urls_parser = subparsers.add_parser("show", help="Show urls and prefixes")
|
|
109
119
|
show_urls_parser.add_argument(
|
|
@@ -153,7 +163,15 @@ def main():
|
|
|
153
163
|
return
|
|
154
164
|
|
|
155
165
|
if args.command == "sniff-url":
|
|
156
|
-
|
|
166
|
+
sniffing_config = {}
|
|
167
|
+
if len(args.metatags) > 0:
|
|
168
|
+
sniffing_config["metatags"] = str(args.metatags).split(",")
|
|
169
|
+
|
|
170
|
+
if len(args.bodytags) > 0:
|
|
171
|
+
sniffing_config["bodytags"] = str(args.bodytags).split(",")
|
|
172
|
+
|
|
173
|
+
sniff_url(args.url, sniffing_config=sniffing_config)
|
|
174
|
+
|
|
157
175
|
return
|
|
158
176
|
|
|
159
177
|
if args.command == "scrap-urls":
|
ohmyscrapper/core/config.py
CHANGED
|
@@ -69,14 +69,14 @@ def url_types_file_exists():
|
|
|
69
69
|
def get_url_types():
|
|
70
70
|
url_types_file = get_files("url_types")
|
|
71
71
|
return config_files.create_and_read_config_file(
|
|
72
|
-
url_types_file, default_app_dir=default_app_dir
|
|
72
|
+
url_types_file, default_app_dir=default_app_dir, complete_file=False
|
|
73
73
|
)
|
|
74
74
|
|
|
75
75
|
|
|
76
76
|
def get_url_sniffing():
|
|
77
77
|
file = get_files("url_sniffing")
|
|
78
78
|
return config_files.create_and_read_config_file(
|
|
79
|
-
file, default_app_dir=default_app_dir
|
|
79
|
+
file, default_app_dir=default_app_dir, complete_file=False
|
|
80
80
|
)
|
|
81
81
|
|
|
82
82
|
|
|
@@ -2,7 +2,9 @@ import os
|
|
|
2
2
|
import yaml
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
def create_and_read_config_file(
|
|
5
|
+
def create_and_read_config_file(
|
|
6
|
+
file_name, default_app_dir, force_default=False, complete_file=True
|
|
7
|
+
):
|
|
6
8
|
config_file = config_file_path(file_name, default_app_dir)
|
|
7
9
|
default_config_params = _get_default_file(default_file=file_name)
|
|
8
10
|
if force_default or not os.path.exists(config_file):
|
|
@@ -15,17 +17,18 @@ def create_and_read_config_file(file_name, default_app_dir, force_default=False)
|
|
|
15
17
|
else:
|
|
16
18
|
with open(config_file, "r") as f:
|
|
17
19
|
config_params = yaml.safe_load(f.read())
|
|
18
|
-
if
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
default_app_dir=default_app_dir,
|
|
23
|
-
):
|
|
24
|
-
config_params = create_and_read_config_file(
|
|
20
|
+
if complete_file:
|
|
21
|
+
if complete_config_file(
|
|
22
|
+
config_params=config_params,
|
|
23
|
+
default_config_params=default_config_params,
|
|
25
24
|
file_name=file_name,
|
|
26
25
|
default_app_dir=default_app_dir,
|
|
27
|
-
|
|
28
|
-
|
|
26
|
+
):
|
|
27
|
+
config_params = create_and_read_config_file(
|
|
28
|
+
file_name=file_name,
|
|
29
|
+
default_app_dir=default_app_dir,
|
|
30
|
+
force_default=force_default,
|
|
31
|
+
)
|
|
29
32
|
|
|
30
33
|
if config_params is None:
|
|
31
34
|
config_params = create_and_read_config_file(
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from selenium import webdriver
|
|
2
|
+
from ohmyscrapper.core.config import get_sniffing
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def get_driver():
|
|
6
|
+
if get_sniffing("use-browser") == "safari":
|
|
7
|
+
from selenium.webdriver.safari.options import Options
|
|
8
|
+
|
|
9
|
+
options = Options()
|
|
10
|
+
driver = webdriver.Safari(options=options)
|
|
11
|
+
elif get_sniffing("use-browser") == "firefox":
|
|
12
|
+
from selenium.webdriver.firefox.options import Options
|
|
13
|
+
|
|
14
|
+
options = Options()
|
|
15
|
+
driver = webdriver.Firefox(options=options)
|
|
16
|
+
elif get_sniffing("use-browser") == "ie":
|
|
17
|
+
from selenium.webdriver.ie.options import Options
|
|
18
|
+
|
|
19
|
+
options = Options()
|
|
20
|
+
driver = webdriver.Ie(options=options)
|
|
21
|
+
else: # default: chrome
|
|
22
|
+
from selenium.webdriver.chrome.options import Options
|
|
23
|
+
|
|
24
|
+
options = Options()
|
|
25
|
+
driver = webdriver.Chrome(options=options)
|
|
26
|
+
|
|
27
|
+
return driver
|
|
@@ -2,13 +2,14 @@ import ohmyscrapper.models.urls_manager as urls_manager
|
|
|
2
2
|
import ohmyscrapper.modules.sniff_url as sniff_url
|
|
3
3
|
import ohmyscrapper.modules.load_txt as load_txt
|
|
4
4
|
import ohmyscrapper.modules.classify_urls as classify_urls
|
|
5
|
+
import ohmyscrapper.modules.browser as browser
|
|
5
6
|
from ohmyscrapper.core import config
|
|
6
7
|
|
|
7
8
|
import time
|
|
8
9
|
import random
|
|
9
10
|
|
|
10
11
|
|
|
11
|
-
def scrap_url(url, verbose=False):
|
|
12
|
+
def scrap_url(url, verbose=False, driver=None):
|
|
12
13
|
if url["url_type"] is None:
|
|
13
14
|
url["url_type"] = "generic"
|
|
14
15
|
|
|
@@ -32,7 +33,7 @@ def scrap_url(url, verbose=False):
|
|
|
32
33
|
sniffing_config = config.get_url_sniffing()
|
|
33
34
|
|
|
34
35
|
url_report = sniff_url.get_tags(
|
|
35
|
-
url=url["url"], sniffing_config=sniffing_config[url_type]
|
|
36
|
+
url=url["url"], sniffing_config=sniffing_config[url_type], driver=driver
|
|
36
37
|
)
|
|
37
38
|
except Exception as e:
|
|
38
39
|
urls_manager.set_url_error(url=url["url"], value="error on scrapping")
|
|
@@ -147,6 +148,7 @@ def scrap_urls(
|
|
|
147
148
|
only_parents=True,
|
|
148
149
|
verbose=False,
|
|
149
150
|
n_urls=0,
|
|
151
|
+
driver=None,
|
|
150
152
|
):
|
|
151
153
|
limit = 10
|
|
152
154
|
classify_urls.classify_urls()
|
|
@@ -170,13 +172,19 @@ def scrap_urls(
|
|
|
170
172
|
time.sleep(wait)
|
|
171
173
|
|
|
172
174
|
print("🐕 Scrapper is sniffing the url...")
|
|
173
|
-
|
|
175
|
+
|
|
176
|
+
if driver is None and config.get_sniffing("use-browser"):
|
|
177
|
+
driver = browser.get_driver()
|
|
178
|
+
scrap_url(url=url, verbose=verbose, driver=driver)
|
|
174
179
|
|
|
175
180
|
n_urls = n_urls + len(urls)
|
|
176
181
|
print(f"-- 🗃️ {n_urls} scraped urls...")
|
|
177
182
|
classify_urls.classify_urls()
|
|
178
183
|
if recursive:
|
|
179
|
-
wait = random.randint(
|
|
184
|
+
wait = random.randint(
|
|
185
|
+
int(config.get_sniffing("round-sleeping") / 2),
|
|
186
|
+
int(config.get_sniffing("round-sleeping")),
|
|
187
|
+
)
|
|
180
188
|
print(
|
|
181
189
|
f"🐶 Scrapper is sleeping for {wait} seconds before next round of {limit} urls"
|
|
182
190
|
)
|
|
@@ -188,6 +196,7 @@ def scrap_urls(
|
|
|
188
196
|
only_parents=only_parents,
|
|
189
197
|
verbose=verbose,
|
|
190
198
|
n_urls=n_urls,
|
|
199
|
+
driver=driver,
|
|
191
200
|
)
|
|
192
201
|
else:
|
|
193
202
|
print("scrapping is over...")
|
|
@@ -2,15 +2,17 @@ import requests
|
|
|
2
2
|
from bs4 import BeautifulSoup
|
|
3
3
|
import json
|
|
4
4
|
from ohmyscrapper.core import config
|
|
5
|
+
import ohmyscrapper.modules.browser as browser
|
|
6
|
+
import time
|
|
5
7
|
|
|
6
8
|
|
|
7
9
|
def sniff_url(
|
|
8
10
|
url="https://www.linkedin.com/in/cesardesouzacardoso/",
|
|
9
11
|
silent=False,
|
|
10
12
|
sniffing_config={},
|
|
13
|
+
driver=None,
|
|
11
14
|
):
|
|
12
15
|
final_report = {}
|
|
13
|
-
final_report["error"] = None
|
|
14
16
|
if "metatags" in sniffing_config:
|
|
15
17
|
metatags_to_search = sniffing_config["metatags"]
|
|
16
18
|
else:
|
|
@@ -45,13 +47,14 @@ def sniff_url(
|
|
|
45
47
|
print("checking url:", url)
|
|
46
48
|
|
|
47
49
|
try:
|
|
48
|
-
r =
|
|
49
|
-
soup = BeautifulSoup(r
|
|
50
|
+
r = get_url(url=url, driver=driver)
|
|
51
|
+
soup = BeautifulSoup(r, "html.parser")
|
|
50
52
|
except requests.exceptions.ReadTimeout:
|
|
51
53
|
url_domain = url.split("/")[2]
|
|
52
54
|
final_report["error"] = (
|
|
53
55
|
f"!!! timeout (10 seconds) while checking the url with domain: `{url_domain}` !!!"
|
|
54
56
|
)
|
|
57
|
+
|
|
55
58
|
print(f"\n\n{final_report['error']}\n\n")
|
|
56
59
|
soup = BeautifulSoup("", "html.parser")
|
|
57
60
|
|
|
@@ -69,14 +72,14 @@ def sniff_url(
|
|
|
69
72
|
soup=soup, silent=silent, body_tags_to_search=body_tags_to_search
|
|
70
73
|
)
|
|
71
74
|
)
|
|
72
|
-
final_report["a_links"] = _extract_a_tags(soup=soup, silent=silent)
|
|
75
|
+
final_report["a_links"] = _extract_a_tags(soup=soup, silent=silent, url=url)
|
|
73
76
|
final_report = _complementary_report(final_report, soup, silent).copy()
|
|
74
77
|
final_report["json"] = json.dumps(final_report)
|
|
75
78
|
|
|
76
79
|
return final_report
|
|
77
80
|
|
|
78
81
|
|
|
79
|
-
def _extract_a_tags(soup, silent):
|
|
82
|
+
def _extract_a_tags(soup, silent, url=None):
|
|
80
83
|
a_links = []
|
|
81
84
|
if not silent:
|
|
82
85
|
print("\n\n\n\n---- all <a> links ---")
|
|
@@ -84,12 +87,18 @@ def _extract_a_tags(soup, silent):
|
|
|
84
87
|
i = 0
|
|
85
88
|
for a_tag in soup.find_all("a"):
|
|
86
89
|
i = i + 1
|
|
87
|
-
|
|
90
|
+
|
|
91
|
+
href = a_tag.get("href")
|
|
92
|
+
if url is not None and href[:1] == "/":
|
|
93
|
+
domain = url.split("//")[0] + "//" + url.split("//")[1].split("/")[0]
|
|
94
|
+
href = domain + href
|
|
95
|
+
|
|
96
|
+
a_links.append({"text": a_tag.text, "href": href})
|
|
88
97
|
if not silent:
|
|
89
98
|
print("\n-- <a> link", i, "-- ")
|
|
90
99
|
print("target:", a_tag.get("target"))
|
|
91
100
|
print("text:", str(a_tag.text).strip())
|
|
92
|
-
print("href:",
|
|
101
|
+
print("href:", href)
|
|
93
102
|
print("-------------- ")
|
|
94
103
|
return a_links
|
|
95
104
|
|
|
@@ -124,9 +133,21 @@ def _extract_text_tags(soup, silent, body_tags_to_search):
|
|
|
124
133
|
print("\n\n\n\n---- all <text> tags ---\n")
|
|
125
134
|
i = 0
|
|
126
135
|
for text_tag, separator in body_tags_to_search.items():
|
|
127
|
-
|
|
136
|
+
tag = text_tag
|
|
137
|
+
tag_class = None
|
|
138
|
+
tag_id = None
|
|
139
|
+
|
|
140
|
+
if len(text_tag.split(".")) > 1:
|
|
141
|
+
tag = text_tag.split(".")[0]
|
|
142
|
+
tag_class = text_tag.split(".")[1]
|
|
143
|
+
|
|
144
|
+
if len(text_tag.split("#")) > 1:
|
|
145
|
+
tag = text_tag.split("#")[0]
|
|
146
|
+
tag_id = text_tag.split("#")[1]
|
|
147
|
+
|
|
148
|
+
if len(soup.find_all(tag, class_=tag_class, id=tag_id)) > 0:
|
|
128
149
|
valid_text_tags[text_tag] = []
|
|
129
|
-
for obj_tag in soup.find_all(
|
|
150
|
+
for obj_tag in soup.find_all(tag, class_=tag_class, id=tag_id):
|
|
130
151
|
valid_text_tags[text_tag].append(obj_tag.text.strip())
|
|
131
152
|
valid_text_tags[text_tag] = separator.join(valid_text_tags[text_tag])
|
|
132
153
|
i = i + 1
|
|
@@ -161,5 +182,23 @@ def _complementary_report(final_report, soup, silent):
|
|
|
161
182
|
return final_report
|
|
162
183
|
|
|
163
184
|
|
|
164
|
-
def get_tags(url, sniffing_config={}):
|
|
165
|
-
return sniff_url(
|
|
185
|
+
def get_tags(url, sniffing_config={}, driver=None):
|
|
186
|
+
return sniff_url(
|
|
187
|
+
url=url, silent=True, sniffing_config=sniffing_config, driver=driver
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def get_url(url, driver=None):
|
|
192
|
+
if driver is None and config.get_sniffing("use-browser"):
|
|
193
|
+
driver = browser.get_driver()
|
|
194
|
+
|
|
195
|
+
if driver is not None:
|
|
196
|
+
try:
|
|
197
|
+
driver.get(url)
|
|
198
|
+
time.sleep(config.get_sniffing("browser-waiting-time"))
|
|
199
|
+
driver.implicitly_wait(config.get_sniffing("browser-waiting-time"))
|
|
200
|
+
return driver.page_source
|
|
201
|
+
except:
|
|
202
|
+
print("error")
|
|
203
|
+
pass
|
|
204
|
+
return requests.get(url=url, timeout=config.get_sniffing("timeout")).text
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ohmyscrapper
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.8.2
|
|
4
4
|
Summary: OhMyScrapper scrapes texts and urls looking for links and jobs-data to create a final report with general information about job positions.
|
|
5
5
|
Author: Cesar Cardoso
|
|
6
6
|
Author-email: Cesar Cardoso <hello@cesarcardoso.cc>
|
|
@@ -13,13 +13,14 @@ Requires-Dist: python-dotenv>=1.2.1
|
|
|
13
13
|
Requires-Dist: pyyaml>=6.0.3
|
|
14
14
|
Requires-Dist: requests>=2.32.5
|
|
15
15
|
Requires-Dist: rich>=14.2.0
|
|
16
|
+
Requires-Dist: selenium>=4.39.0
|
|
16
17
|
Requires-Dist: urlextract>=1.9.0
|
|
17
18
|
Requires-Python: >=3.11
|
|
18
19
|
Project-URL: Changelog, https://github.com/bouli/ohmyscrapper/releases/latest
|
|
19
20
|
Project-URL: Repository, https://github.com/bouli/ohmyscrapper
|
|
20
21
|
Description-Content-Type: text/markdown
|
|
21
22
|
|
|
22
|
-
# 🐶 OhMyScrapper - v0.
|
|
23
|
+
# 🐶 OhMyScrapper - v0.8.2
|
|
23
24
|
|
|
24
25
|
OhMyScrapper scrapes texts and urls looking for links and jobs-data to create a
|
|
25
26
|
final report with general information about job positions.
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
ohmyscrapper/__init__.py,sha256=WzXXhhlVkyAPbqeo7NgezLb6TbEJcuFf8JYAdcf3zBE,7678
|
|
2
|
+
ohmyscrapper/__main__.py,sha256=5BjNuyet8AY-POwoF5rGt722rHQ7tJ0Vf0UFUfzzi-I,58
|
|
3
|
+
ohmyscrapper/core/config.py,sha256=bfmoTr1j4SnIhKFZC_F9fh4Y90VqSPFf_g6Rm-aNui4,3184
|
|
4
|
+
ohmyscrapper/core/config_files.py,sha256=3mIXVxurmyXCpKueyyGsZ6lUnV8VJ2gnLU2QaqhWhhI,3410
|
|
5
|
+
ohmyscrapper/core/default_files/config.yaml,sha256=y54QAjOnogpl8LEzhmn89tAfRzle4ZWWtIbYRjxX8Rk,341
|
|
6
|
+
ohmyscrapper/core/default_files/url_sniffing.yaml,sha256=HUwmGUwuJy7t97bJHgNiZOl1thvD9bLaelPgbEr5bMY,465
|
|
7
|
+
ohmyscrapper/core/default_files/url_types.yaml,sha256=20kvv8_iWRT-pLa014RXYpAmPSonn6tDnG302rx7l-o,228
|
|
8
|
+
ohmyscrapper/models/urls_manager.py,sha256=sP2T4k1HOj8ccaVGWbuhfw1BDfOUSVL4_WR9vRyHjOA,12115
|
|
9
|
+
ohmyscrapper/modules/browser.py,sha256=6AaNFQ7jV91DvHqbsBT6It_-tNbVN2qJC_c1vXTweJY,856
|
|
10
|
+
ohmyscrapper/modules/classify_urls.py,sha256=GhiosAQUITy1DQe_PksYV9QRKVTgpkSE28dkutzbWVA,1038
|
|
11
|
+
ohmyscrapper/modules/load_txt.py,sha256=pkWBIdh6vORPfENDZ6wGM89vswnOnc1flqKfkLs9RD8,4138
|
|
12
|
+
ohmyscrapper/modules/merge_dbs.py,sha256=0pK3PPUGSbnaDkdpQUGCHemOVaKO37bfHwnsy_EVpWQ,115
|
|
13
|
+
ohmyscrapper/modules/process_with_ai.py,sha256=kl39Jzl-PUwh6AfmTZ9SLFUYs9Sk4biqgt8rNz3X1FA,7255
|
|
14
|
+
ohmyscrapper/modules/scrap_urls.py,sha256=_e4jT7eBWGP6cqI6RaD0xzNX1vCFBx96JIBPGW3mAV4,6627
|
|
15
|
+
ohmyscrapper/modules/seed.py,sha256=hHEGSoPXsmclTaRPeIcK2oC1Xpg3_JqBv_YFMD0m5Jw,1044
|
|
16
|
+
ohmyscrapper/modules/show.py,sha256=jsAs4g8ouA9wymkBfkDCbpVWKD-m_20uKG-m1cZAUGA,3877
|
|
17
|
+
ohmyscrapper/modules/sniff_url.py,sha256=BZphbr2V7MDyFPW7APlh7_CLTtc_u3kcB7DY2QjVVQo,6579
|
|
18
|
+
ohmyscrapper/modules/untouch_all.py,sha256=DAwWYfqMFifHPtFCxSamu0AxHCgk6aJbTnBy6wLucXM,167
|
|
19
|
+
ohmyscrapper-0.8.2.dist-info/WHEEL,sha256=xDCZ-UyfvkGuEHPeI7BcJzYKIZzdqN8A8o1M5Om8IyA,79
|
|
20
|
+
ohmyscrapper-0.8.2.dist-info/entry_points.txt,sha256=BZud6D16XkfjelDa4Z33mji-KJbbZXgq2FoLrzjru5I,52
|
|
21
|
+
ohmyscrapper-0.8.2.dist-info/METADATA,sha256=2OnXXefFcRT_ChEVdL6LKZoe6iNKyFqjoN10LBUBB34,4293
|
|
22
|
+
ohmyscrapper-0.8.2.dist-info/RECORD,,
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
ohmyscrapper/__init__.py,sha256=x3wLMhIU744W9DRtXoTrPpWghb7UdC3UJSYZh_gpzlw,7095
|
|
2
|
-
ohmyscrapper/__main__.py,sha256=5BjNuyet8AY-POwoF5rGt722rHQ7tJ0Vf0UFUfzzi-I,58
|
|
3
|
-
ohmyscrapper/core/config.py,sha256=aaSLxk6Fuzp88EMax6MAOX3WszH4OfYLz_dJoXlu0ME,3142
|
|
4
|
-
ohmyscrapper/core/config_files.py,sha256=C79-Vgz1E5_jUWtob-yrCyBxsqWEXxqPI_r6TL7D1_Q,3314
|
|
5
|
-
ohmyscrapper/core/default_files/config.yaml,sha256=gi8tqhSumQYJIl8QDisJ6eaib2tdcBNT-GFU-e6Dtns,273
|
|
6
|
-
ohmyscrapper/core/default_files/url_sniffing.yaml,sha256=RU5GYWmC1PdBl4nn7HUfRBwuXz8Rlap75d4W3zWDzPM,465
|
|
7
|
-
ohmyscrapper/core/default_files/url_types.yaml,sha256=20kvv8_iWRT-pLa014RXYpAmPSonn6tDnG302rx7l-o,228
|
|
8
|
-
ohmyscrapper/models/urls_manager.py,sha256=k0N1If4YoRUWHX80OyBNEeJNIzDROc2ur6j8q2OBlqo,12103
|
|
9
|
-
ohmyscrapper/modules/classify_urls.py,sha256=GhiosAQUITy1DQe_PksYV9QRKVTgpkSE28dkutzbWVA,1038
|
|
10
|
-
ohmyscrapper/modules/load_txt.py,sha256=pkWBIdh6vORPfENDZ6wGM89vswnOnc1flqKfkLs9RD8,4138
|
|
11
|
-
ohmyscrapper/modules/merge_dbs.py,sha256=0pK3PPUGSbnaDkdpQUGCHemOVaKO37bfHwnsy_EVpWQ,115
|
|
12
|
-
ohmyscrapper/modules/process_with_ai.py,sha256=kl39Jzl-PUwh6AfmTZ9SLFUYs9Sk4biqgt8rNz3X1FA,7255
|
|
13
|
-
ohmyscrapper/modules/scrap_urls.py,sha256=uN5j0dychVMGu7n1rcpYdba4sqc47ssyCn0tVaiz-Ic,6264
|
|
14
|
-
ohmyscrapper/modules/seed.py,sha256=hHEGSoPXsmclTaRPeIcK2oC1Xpg3_JqBv_YFMD0m5Jw,1044
|
|
15
|
-
ohmyscrapper/modules/show.py,sha256=jsAs4g8ouA9wymkBfkDCbpVWKD-m_20uKG-m1cZAUGA,3877
|
|
16
|
-
ohmyscrapper/modules/sniff_url.py,sha256=1QnxEdCWLjLh0uM72dlPzst64qglqg2MHA_xYlNcLSA,5435
|
|
17
|
-
ohmyscrapper/modules/untouch_all.py,sha256=DAwWYfqMFifHPtFCxSamu0AxHCgk6aJbTnBy6wLucXM,167
|
|
18
|
-
ohmyscrapper-0.7.4.dist-info/WHEEL,sha256=xDCZ-UyfvkGuEHPeI7BcJzYKIZzdqN8A8o1M5Om8IyA,79
|
|
19
|
-
ohmyscrapper-0.7.4.dist-info/entry_points.txt,sha256=BZud6D16XkfjelDa4Z33mji-KJbbZXgq2FoLrzjru5I,52
|
|
20
|
-
ohmyscrapper-0.7.4.dist-info/METADATA,sha256=CVE8WUcraUtONy9UVIU0y8Y7wjsk4zEmMVfpA_al1CU,4261
|
|
21
|
-
ohmyscrapper-0.7.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|