ohmyscrapper 0.7.0__py3-none-any.whl → 0.7.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ohmyscrapper/__init__.py +25 -3
- ohmyscrapper/core/config.py +6 -0
- ohmyscrapper/core/config_files.py +29 -2
- ohmyscrapper/core/default_files/config.yaml +3 -0
- ohmyscrapper/core/default_files/url_sniffing.yaml +4 -0
- ohmyscrapper/models/urls_manager.py +16 -2
- ohmyscrapper/modules/load_txt.py +7 -3
- ohmyscrapper/modules/scrap_urls.py +12 -6
- ohmyscrapper/modules/seed.py +4 -1
- ohmyscrapper/modules/sniff_url.py +20 -10
- {ohmyscrapper-0.7.0.dist-info → ohmyscrapper-0.7.4.dist-info}/METADATA +6 -3
- ohmyscrapper-0.7.4.dist-info/RECORD +21 -0
- ohmyscrapper-0.7.0.dist-info/RECORD +0 -21
- {ohmyscrapper-0.7.0.dist-info → ohmyscrapper-0.7.4.dist-info}/WHEEL +0 -0
- {ohmyscrapper-0.7.0.dist-info → ohmyscrapper-0.7.4.dist-info}/entry_points.txt +0 -0
ohmyscrapper/__init__.py
CHANGED
|
@@ -20,7 +20,7 @@ from ohmyscrapper.core.config import update
|
|
|
20
20
|
|
|
21
21
|
def main():
|
|
22
22
|
parser = argparse.ArgumentParser(prog="ohmyscrapper")
|
|
23
|
-
parser.add_argument("--version", action="version", version="%(prog)s v0.7.
|
|
23
|
+
parser.add_argument("--version", action="version", version="%(prog)s v0.7.4")
|
|
24
24
|
|
|
25
25
|
update()
|
|
26
26
|
subparsers = parser.add_subparsers(dest="command", help="Available commands")
|
|
@@ -28,6 +28,9 @@ def main():
|
|
|
28
28
|
"start",
|
|
29
29
|
help="Make the entire process of 📦 loading, 🐶 scraping and 📜🖋️ exporting with the default configuration.",
|
|
30
30
|
)
|
|
31
|
+
start_parser.add_argument(
|
|
32
|
+
"-input", default=None, help="File/Folder path or url for pre-loading."
|
|
33
|
+
)
|
|
31
34
|
|
|
32
35
|
start_parser.add_argument(
|
|
33
36
|
"--ai",
|
|
@@ -50,6 +53,14 @@ def main():
|
|
|
50
53
|
help="Add all `url_types` from the bank to the `/ohmyscrapper/url_types.yaml` file.",
|
|
51
54
|
action="store_true",
|
|
52
55
|
)
|
|
56
|
+
|
|
57
|
+
seed_parser.add_argument(
|
|
58
|
+
"--reset",
|
|
59
|
+
default=False,
|
|
60
|
+
help="Reset all `url_types`.",
|
|
61
|
+
action="store_true",
|
|
62
|
+
)
|
|
63
|
+
|
|
53
64
|
untouch_parser = subparsers.add_parser(
|
|
54
65
|
"untouch-all", help="Untouch all urls. That resets classification"
|
|
55
66
|
)
|
|
@@ -85,6 +96,9 @@ def main():
|
|
|
85
96
|
scrap_urls_parser.add_argument(
|
|
86
97
|
"--verbose", default=False, help="Run in verbose mode", action="store_true"
|
|
87
98
|
)
|
|
99
|
+
scrap_urls_parser.add_argument(
|
|
100
|
+
"-input", default=None, help="File/Folder path or url for pre-loading."
|
|
101
|
+
)
|
|
88
102
|
|
|
89
103
|
sniff_url_parser = subparsers.add_parser("sniff-url", help="🐕 Sniff/Check url")
|
|
90
104
|
sniff_url_parser.add_argument(
|
|
@@ -131,7 +145,7 @@ def main():
|
|
|
131
145
|
if args.export:
|
|
132
146
|
export_url_types_to_file()
|
|
133
147
|
else:
|
|
134
|
-
seed()
|
|
148
|
+
seed(args.reset)
|
|
135
149
|
return
|
|
136
150
|
|
|
137
151
|
if args.command == "untouch-all":
|
|
@@ -143,6 +157,9 @@ def main():
|
|
|
143
157
|
return
|
|
144
158
|
|
|
145
159
|
if args.command == "scrap-urls":
|
|
160
|
+
if args.input != None:
|
|
161
|
+
load_txt(file_name=args.input, verbose=args.verbose)
|
|
162
|
+
|
|
146
163
|
scrap_urls(
|
|
147
164
|
recursive=args.recursive,
|
|
148
165
|
ignore_valid_prefix=args.ignore_type,
|
|
@@ -182,7 +199,12 @@ def main():
|
|
|
182
199
|
return
|
|
183
200
|
|
|
184
201
|
if args.command == "start":
|
|
185
|
-
|
|
202
|
+
seed()
|
|
203
|
+
if args.input != None:
|
|
204
|
+
load_txt(file_name=args.input)
|
|
205
|
+
else:
|
|
206
|
+
load_txt()
|
|
207
|
+
|
|
186
208
|
scrap_urls(
|
|
187
209
|
recursive=True,
|
|
188
210
|
ignore_valid_prefix=True,
|
ohmyscrapper/core/config.py
CHANGED
|
@@ -39,6 +39,12 @@ def get_ai(param):
|
|
|
39
39
|
)
|
|
40
40
|
|
|
41
41
|
|
|
42
|
+
def get_sniffing(param):
|
|
43
|
+
return config_files.get_param(
|
|
44
|
+
parent_param="sniffing", param=param, default_app_dir=default_app_dir
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
42
48
|
def load_config(force_default=False):
|
|
43
49
|
config_file_name = "config.yaml"
|
|
44
50
|
config_params = config_files.create_and_read_config_file(
|
|
@@ -4,14 +4,29 @@ import yaml
|
|
|
4
4
|
|
|
5
5
|
def create_and_read_config_file(file_name, default_app_dir, force_default=False):
|
|
6
6
|
config_file = config_file_path(file_name, default_app_dir)
|
|
7
|
+
default_config_params = _get_default_file(default_file=file_name)
|
|
7
8
|
if force_default or not os.path.exists(config_file):
|
|
8
|
-
config_params = _get_default_file(default_file=file_name)
|
|
9
9
|
overwrite_config_file(
|
|
10
|
-
data=
|
|
10
|
+
data=default_config_params,
|
|
11
|
+
file_name=file_name,
|
|
12
|
+
default_app_dir=default_app_dir,
|
|
11
13
|
)
|
|
14
|
+
config_params = default_config_params
|
|
12
15
|
else:
|
|
13
16
|
with open(config_file, "r") as f:
|
|
14
17
|
config_params = yaml.safe_load(f.read())
|
|
18
|
+
if complete_config_file(
|
|
19
|
+
config_params=config_params,
|
|
20
|
+
default_config_params=default_config_params,
|
|
21
|
+
file_name=file_name,
|
|
22
|
+
default_app_dir=default_app_dir,
|
|
23
|
+
):
|
|
24
|
+
config_params = create_and_read_config_file(
|
|
25
|
+
file_name=file_name,
|
|
26
|
+
default_app_dir=default_app_dir,
|
|
27
|
+
force_default=force_default,
|
|
28
|
+
)
|
|
29
|
+
|
|
15
30
|
if config_params is None:
|
|
16
31
|
config_params = create_and_read_config_file(
|
|
17
32
|
file_name=file_name, default_app_dir=default_app_dir, force_default=True
|
|
@@ -19,6 +34,18 @@ def create_and_read_config_file(file_name, default_app_dir, force_default=False)
|
|
|
19
34
|
return config_params
|
|
20
35
|
|
|
21
36
|
|
|
37
|
+
def complete_config_file(
|
|
38
|
+
config_params, default_config_params, file_name, default_app_dir
|
|
39
|
+
):
|
|
40
|
+
has_updated = False
|
|
41
|
+
for key, values in default_config_params.items():
|
|
42
|
+
if key not in config_params.keys():
|
|
43
|
+
has_updated = True
|
|
44
|
+
data = {key: values}
|
|
45
|
+
append_config_file(data, file_name, default_app_dir)
|
|
46
|
+
return has_updated
|
|
47
|
+
|
|
48
|
+
|
|
22
49
|
def overwrite_config_file(data, file_name, default_app_dir):
|
|
23
50
|
config_file = config_file_path(file_name, default_app_dir)
|
|
24
51
|
with open(config_file, "+w") as f:
|
|
@@ -69,6 +69,14 @@ def seeds(seeds={}):
|
|
|
69
69
|
return True
|
|
70
70
|
|
|
71
71
|
|
|
72
|
+
@use_connection
|
|
73
|
+
def reset_seeds():
|
|
74
|
+
sql = "DELETE FROM urls_valid_prefix"
|
|
75
|
+
c = conn.cursor()
|
|
76
|
+
c.execute(sql)
|
|
77
|
+
conn.commit()
|
|
78
|
+
|
|
79
|
+
|
|
72
80
|
@use_connection
|
|
73
81
|
def add_urls_valid_prefix(url_prefix, url_type):
|
|
74
82
|
|
|
@@ -198,6 +206,8 @@ def get_url_like_unclassified(like_condition):
|
|
|
198
206
|
|
|
199
207
|
@use_connection
|
|
200
208
|
def add_url(url, title=None, parent_url=None):
|
|
209
|
+
if url[:1] == "/":
|
|
210
|
+
return
|
|
201
211
|
url = clean_url(url)
|
|
202
212
|
c = conn.cursor()
|
|
203
213
|
|
|
@@ -340,7 +350,9 @@ def set_url_error(url, value):
|
|
|
340
350
|
@use_connection
|
|
341
351
|
def set_url_type_by_id(url_id, url_type):
|
|
342
352
|
c = conn.cursor()
|
|
343
|
-
c.execute(
|
|
353
|
+
c.execute(
|
|
354
|
+
f"UPDATE urls SET url_type = '{url_type}', last_touch = NULL WHERE id = {url_id}"
|
|
355
|
+
)
|
|
344
356
|
conn.commit()
|
|
345
357
|
|
|
346
358
|
|
|
@@ -392,8 +404,10 @@ def touch_url(url):
|
|
|
392
404
|
@use_connection
|
|
393
405
|
def untouch_url(url):
|
|
394
406
|
url = clean_url(url)
|
|
407
|
+
url = str(url.strip())
|
|
408
|
+
|
|
395
409
|
c = conn.cursor()
|
|
396
|
-
c.execute("UPDATE urls SET last_touch = NULL WHERE url =
|
|
410
|
+
c.execute(f"UPDATE urls SET last_touch = NULL, url_type = NULL WHERE url = '{url}'")
|
|
397
411
|
conn.commit()
|
|
398
412
|
|
|
399
413
|
|
ohmyscrapper/modules/load_txt.py
CHANGED
|
@@ -19,14 +19,16 @@ def load_txt(file_name="input", verbose=False):
|
|
|
19
19
|
|
|
20
20
|
text_file_content = ""
|
|
21
21
|
if file_name is not None and not os.path.isdir(file_name):
|
|
22
|
-
print(f"📖 reading file `{file_name}`... ")
|
|
23
22
|
if not os.path.exists(file_name):
|
|
24
23
|
if file_name.startswith("https://") or file_name.startswith("http://"):
|
|
24
|
+
print(f"📖 reading url `{file_name}`... ")
|
|
25
25
|
text_file_content = " " + file_name + " "
|
|
26
|
+
urls_manager.untouch_url(url=file_name)
|
|
26
27
|
else:
|
|
27
28
|
print(f"\n file `{file_name}` not found.")
|
|
28
29
|
return
|
|
29
30
|
else:
|
|
31
|
+
print(f"📖 reading file `{file_name}`... ")
|
|
30
32
|
text_file_content = _increment_file_name(
|
|
31
33
|
text_file_content=text_file_content, file_name=file_name
|
|
32
34
|
)
|
|
@@ -51,13 +53,15 @@ def load_txt(file_name="input", verbose=False):
|
|
|
51
53
|
file_name=os.path.join(dir_files, text_files[0]),
|
|
52
54
|
)
|
|
53
55
|
else:
|
|
54
|
-
print("\
|
|
56
|
+
print("\nFiles list:")
|
|
55
57
|
for index, file in enumerate(text_files):
|
|
56
58
|
print(f"[{index}]:", os.path.join(dir_files, file))
|
|
57
59
|
|
|
58
60
|
text_file_option = -1
|
|
59
61
|
while text_file_option < 0 or text_file_option >= len(text_files):
|
|
60
|
-
text_file_option = input(
|
|
62
|
+
text_file_option = input(
|
|
63
|
+
"Choose a text file. Use `*` for process all and `q` to quit. Enter the file number: "
|
|
64
|
+
)
|
|
61
65
|
if text_file_option == "*":
|
|
62
66
|
for file in text_files:
|
|
63
67
|
text_file_content = _increment_file_name(
|
|
@@ -21,12 +21,12 @@ def scrap_url(url, verbose=False):
|
|
|
21
21
|
|
|
22
22
|
if url_type not in sniffing_config:
|
|
23
23
|
default_type_sniffing = {
|
|
24
|
-
"bodytags":
|
|
25
|
-
"metatags":
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
24
|
+
"bodytags": {"h1": "title"},
|
|
25
|
+
"metatags": {
|
|
26
|
+
"og:title": "title",
|
|
27
|
+
"og:description": "description",
|
|
28
|
+
"description": "description",
|
|
29
|
+
},
|
|
30
30
|
}
|
|
31
31
|
config.append_url_sniffing({url_type: default_type_sniffing})
|
|
32
32
|
sniffing_config = config.get_url_sniffing()
|
|
@@ -104,6 +104,12 @@ def process_sniffed_url(url_report, url, sniffing_config, verbose=False):
|
|
|
104
104
|
):
|
|
105
105
|
if "first-a-link" in url_report.keys():
|
|
106
106
|
db_fields["url_destiny"] = url_report["first-a-link"]
|
|
107
|
+
if (
|
|
108
|
+
"atags" in sniffing_config.keys()
|
|
109
|
+
and "load_links" in sniffing_config["atags"].keys()
|
|
110
|
+
):
|
|
111
|
+
for a_link in url_report["a_links"]:
|
|
112
|
+
urls_manager.add_url(url=a_link["href"], parent_url=url["url"])
|
|
107
113
|
|
|
108
114
|
if db_fields["title"] is not None:
|
|
109
115
|
urls_manager.set_url_title(url=url["url"], value=db_fields["title"])
|
ohmyscrapper/modules/seed.py
CHANGED
|
@@ -2,7 +2,10 @@ import ohmyscrapper.models.urls_manager as urls_manager
|
|
|
2
2
|
from ohmyscrapper.core import config
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
def seed():
|
|
5
|
+
def seed(reset=False):
|
|
6
|
+
if reset:
|
|
7
|
+
urls_manager.reset_seeds()
|
|
8
|
+
|
|
6
9
|
if not config.url_types_file_exists():
|
|
7
10
|
db_url_types = urls_manager.get_urls_valid_prefix()
|
|
8
11
|
if len(db_url_types) > 0:
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import requests
|
|
2
2
|
from bs4 import BeautifulSoup
|
|
3
3
|
import json
|
|
4
|
+
from ohmyscrapper.core import config
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
def sniff_url(
|
|
@@ -8,6 +9,8 @@ def sniff_url(
|
|
|
8
9
|
silent=False,
|
|
9
10
|
sniffing_config={},
|
|
10
11
|
):
|
|
12
|
+
final_report = {}
|
|
13
|
+
final_report["error"] = None
|
|
11
14
|
if "metatags" in sniffing_config:
|
|
12
15
|
metatags_to_search = sniffing_config["metatags"]
|
|
13
16
|
else:
|
|
@@ -41,10 +44,17 @@ def sniff_url(
|
|
|
41
44
|
if not silent:
|
|
42
45
|
print("checking url:", url)
|
|
43
46
|
|
|
44
|
-
|
|
45
|
-
|
|
47
|
+
try:
|
|
48
|
+
r = requests.get(url=url, timeout=config.get_sniffing("timeout"))
|
|
49
|
+
soup = BeautifulSoup(r.text, "html.parser")
|
|
50
|
+
except requests.exceptions.ReadTimeout:
|
|
51
|
+
url_domain = url.split("/")[2]
|
|
52
|
+
final_report["error"] = (
|
|
53
|
+
f"!!! timeout (10 seconds) while checking the url with domain: `{url_domain}` !!!"
|
|
54
|
+
)
|
|
55
|
+
print(f"\n\n{final_report['error']}\n\n")
|
|
56
|
+
soup = BeautifulSoup("", "html.parser")
|
|
46
57
|
|
|
47
|
-
final_report = {}
|
|
48
58
|
final_report["scrapped-url"] = url
|
|
49
59
|
if len(metatags_to_search) > 0:
|
|
50
60
|
final_report.update(
|
|
@@ -119,13 +129,13 @@ def _extract_text_tags(soup, silent, body_tags_to_search):
|
|
|
119
129
|
for obj_tag in soup.find_all(text_tag):
|
|
120
130
|
valid_text_tags[text_tag].append(obj_tag.text.strip())
|
|
121
131
|
valid_text_tags[text_tag] = separator.join(valid_text_tags[text_tag])
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
132
|
+
i = i + 1
|
|
133
|
+
if not silent:
|
|
134
|
+
print("-- text tag", i, "--")
|
|
135
|
+
print("name:", text_tag)
|
|
136
|
+
print("separator:", separator)
|
|
137
|
+
print("texts:", valid_text_tags[text_tag])
|
|
138
|
+
print("---------------- \n")
|
|
129
139
|
return valid_text_tags
|
|
130
140
|
|
|
131
141
|
|
|
@@ -1,9 +1,10 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: ohmyscrapper
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.4
|
|
4
4
|
Summary: OhMyScrapper scrapes texts and urls looking for links and jobs-data to create a final report with general information about job positions.
|
|
5
5
|
Author: Cesar Cardoso
|
|
6
6
|
Author-email: Cesar Cardoso <hello@cesarcardoso.cc>
|
|
7
|
+
License-Expression: MIT
|
|
7
8
|
Requires-Dist: beautifulsoup4>=4.14.3
|
|
8
9
|
Requires-Dist: google-genai>=1.55.0
|
|
9
10
|
Requires-Dist: markdown>=3.10
|
|
@@ -14,9 +15,11 @@ Requires-Dist: requests>=2.32.5
|
|
|
14
15
|
Requires-Dist: rich>=14.2.0
|
|
15
16
|
Requires-Dist: urlextract>=1.9.0
|
|
16
17
|
Requires-Python: >=3.11
|
|
18
|
+
Project-URL: Changelog, https://github.com/bouli/ohmyscrapper/releases/latest
|
|
19
|
+
Project-URL: Repository, https://github.com/bouli/ohmyscrapper
|
|
17
20
|
Description-Content-Type: text/markdown
|
|
18
21
|
|
|
19
|
-
# 🐶 OhMyScrapper - v0.7.
|
|
22
|
+
# 🐶 OhMyScrapper - v0.7.4
|
|
20
23
|
|
|
21
24
|
OhMyScrapper scrapes texts and urls looking for links and jobs-data to create a
|
|
22
25
|
final report with general information about job positions.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
ohmyscrapper/__init__.py,sha256=x3wLMhIU744W9DRtXoTrPpWghb7UdC3UJSYZh_gpzlw,7095
|
|
2
|
+
ohmyscrapper/__main__.py,sha256=5BjNuyet8AY-POwoF5rGt722rHQ7tJ0Vf0UFUfzzi-I,58
|
|
3
|
+
ohmyscrapper/core/config.py,sha256=aaSLxk6Fuzp88EMax6MAOX3WszH4OfYLz_dJoXlu0ME,3142
|
|
4
|
+
ohmyscrapper/core/config_files.py,sha256=C79-Vgz1E5_jUWtob-yrCyBxsqWEXxqPI_r6TL7D1_Q,3314
|
|
5
|
+
ohmyscrapper/core/default_files/config.yaml,sha256=gi8tqhSumQYJIl8QDisJ6eaib2tdcBNT-GFU-e6Dtns,273
|
|
6
|
+
ohmyscrapper/core/default_files/url_sniffing.yaml,sha256=RU5GYWmC1PdBl4nn7HUfRBwuXz8Rlap75d4W3zWDzPM,465
|
|
7
|
+
ohmyscrapper/core/default_files/url_types.yaml,sha256=20kvv8_iWRT-pLa014RXYpAmPSonn6tDnG302rx7l-o,228
|
|
8
|
+
ohmyscrapper/models/urls_manager.py,sha256=k0N1If4YoRUWHX80OyBNEeJNIzDROc2ur6j8q2OBlqo,12103
|
|
9
|
+
ohmyscrapper/modules/classify_urls.py,sha256=GhiosAQUITy1DQe_PksYV9QRKVTgpkSE28dkutzbWVA,1038
|
|
10
|
+
ohmyscrapper/modules/load_txt.py,sha256=pkWBIdh6vORPfENDZ6wGM89vswnOnc1flqKfkLs9RD8,4138
|
|
11
|
+
ohmyscrapper/modules/merge_dbs.py,sha256=0pK3PPUGSbnaDkdpQUGCHemOVaKO37bfHwnsy_EVpWQ,115
|
|
12
|
+
ohmyscrapper/modules/process_with_ai.py,sha256=kl39Jzl-PUwh6AfmTZ9SLFUYs9Sk4biqgt8rNz3X1FA,7255
|
|
13
|
+
ohmyscrapper/modules/scrap_urls.py,sha256=uN5j0dychVMGu7n1rcpYdba4sqc47ssyCn0tVaiz-Ic,6264
|
|
14
|
+
ohmyscrapper/modules/seed.py,sha256=hHEGSoPXsmclTaRPeIcK2oC1Xpg3_JqBv_YFMD0m5Jw,1044
|
|
15
|
+
ohmyscrapper/modules/show.py,sha256=jsAs4g8ouA9wymkBfkDCbpVWKD-m_20uKG-m1cZAUGA,3877
|
|
16
|
+
ohmyscrapper/modules/sniff_url.py,sha256=1QnxEdCWLjLh0uM72dlPzst64qglqg2MHA_xYlNcLSA,5435
|
|
17
|
+
ohmyscrapper/modules/untouch_all.py,sha256=DAwWYfqMFifHPtFCxSamu0AxHCgk6aJbTnBy6wLucXM,167
|
|
18
|
+
ohmyscrapper-0.7.4.dist-info/WHEEL,sha256=xDCZ-UyfvkGuEHPeI7BcJzYKIZzdqN8A8o1M5Om8IyA,79
|
|
19
|
+
ohmyscrapper-0.7.4.dist-info/entry_points.txt,sha256=BZud6D16XkfjelDa4Z33mji-KJbbZXgq2FoLrzjru5I,52
|
|
20
|
+
ohmyscrapper-0.7.4.dist-info/METADATA,sha256=CVE8WUcraUtONy9UVIU0y8Y7wjsk4zEmMVfpA_al1CU,4261
|
|
21
|
+
ohmyscrapper-0.7.4.dist-info/RECORD,,
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
ohmyscrapper/__init__.py,sha256=w5Ty9eszf8tEv72IQrFov0YbZWMqsraq448xhX3YGQs,6493
|
|
2
|
-
ohmyscrapper/__main__.py,sha256=5BjNuyet8AY-POwoF5rGt722rHQ7tJ0Vf0UFUfzzi-I,58
|
|
3
|
-
ohmyscrapper/core/config.py,sha256=i_RA-zReNQIWWmsFar85qzRUqdqvTFMPeCP7Hya7ltU,2996
|
|
4
|
-
ohmyscrapper/core/config_files.py,sha256=KC3yChTnlclclU9EKTqFBoAu9p6XdOKuegub5NPYDDY,2434
|
|
5
|
-
ohmyscrapper/core/default_files/config.yaml,sha256=bgPBVlze2tOCbyrA47h_5BJ35UsXnqsjQszzy0vn-Pw,248
|
|
6
|
-
ohmyscrapper/core/default_files/url_sniffing.yaml,sha256=MKdVR5HQ1i2yTRw2ijzxPSmIyhUno_R4L2k17r3EBBc,417
|
|
7
|
-
ohmyscrapper/core/default_files/url_types.yaml,sha256=20kvv8_iWRT-pLa014RXYpAmPSonn6tDnG302rx7l-o,228
|
|
8
|
-
ohmyscrapper/models/urls_manager.py,sha256=FC1j72M1gzNwC_PzPqnew986b-BI6s7zUv8Z7HiM1M0,11849
|
|
9
|
-
ohmyscrapper/modules/classify_urls.py,sha256=GhiosAQUITy1DQe_PksYV9QRKVTgpkSE28dkutzbWVA,1038
|
|
10
|
-
ohmyscrapper/modules/load_txt.py,sha256=dNkUZ2ehBiPx-q4fPczRiHFvnpzCrjeycFtexhWGmEE,3967
|
|
11
|
-
ohmyscrapper/modules/merge_dbs.py,sha256=0pK3PPUGSbnaDkdpQUGCHemOVaKO37bfHwnsy_EVpWQ,115
|
|
12
|
-
ohmyscrapper/modules/process_with_ai.py,sha256=kl39Jzl-PUwh6AfmTZ9SLFUYs9Sk4biqgt8rNz3X1FA,7255
|
|
13
|
-
ohmyscrapper/modules/scrap_urls.py,sha256=CNoEC-d1r-u4qxnEVimm4ctP6MJGdU8y8VI2Nx0bBdM,6033
|
|
14
|
-
ohmyscrapper/modules/seed.py,sha256=qDUE7TWx9iNQEzqThK4p7g8pTZjdpkmoqI8kOo_zdtk,983
|
|
15
|
-
ohmyscrapper/modules/show.py,sha256=jsAs4g8ouA9wymkBfkDCbpVWKD-m_20uKG-m1cZAUGA,3877
|
|
16
|
-
ohmyscrapper/modules/sniff_url.py,sha256=zJ2Uox2aUdQibL4UFLxg3t7GqJ7WwWEl0q3QSUbMEbc,4960
|
|
17
|
-
ohmyscrapper/modules/untouch_all.py,sha256=DAwWYfqMFifHPtFCxSamu0AxHCgk6aJbTnBy6wLucXM,167
|
|
18
|
-
ohmyscrapper-0.7.0.dist-info/WHEEL,sha256=xDCZ-UyfvkGuEHPeI7BcJzYKIZzdqN8A8o1M5Om8IyA,79
|
|
19
|
-
ohmyscrapper-0.7.0.dist-info/entry_points.txt,sha256=BZud6D16XkfjelDa4Z33mji-KJbbZXgq2FoLrzjru5I,52
|
|
20
|
-
ohmyscrapper-0.7.0.dist-info/METADATA,sha256=Doakf4oDT6oskPGdSlEoRJHBxUmm9FhWaHfDlNIfNuM,4096
|
|
21
|
-
ohmyscrapper-0.7.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|