ohmyscrapper 0.8.2__py3-none-any.whl → 0.8.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ohmyscrapper/__init__.py +13 -8
- ohmyscrapper/core/config.py +1 -0
- ohmyscrapper/core/config_files.py +1 -0
- ohmyscrapper/core/default_files/config.yaml +6 -5
- ohmyscrapper/models/urls_manager.py +4 -2
- ohmyscrapper/modules/browser.py +1 -0
- ohmyscrapper/modules/cache.py +100 -0
- ohmyscrapper/modules/classify_urls.py +4 -2
- ohmyscrapper/modules/load_txt.py +2 -0
- ohmyscrapper/modules/process_with_ai.py +10 -11
- ohmyscrapper/modules/scrap_urls.py +6 -6
- ohmyscrapper/modules/show.py +4 -2
- ohmyscrapper/modules/sniff_url.py +24 -8
- {ohmyscrapper-0.8.2.dist-info → ohmyscrapper-0.8.4.dist-info}/METADATA +3 -3
- ohmyscrapper-0.8.4.dist-info/RECORD +23 -0
- {ohmyscrapper-0.8.2.dist-info → ohmyscrapper-0.8.4.dist-info}/WHEEL +2 -2
- ohmyscrapper-0.8.2.dist-info/RECORD +0 -22
- {ohmyscrapper-0.8.2.dist-info → ohmyscrapper-0.8.4.dist-info}/entry_points.txt +0 -0
ohmyscrapper/__init__.py
CHANGED
|
@@ -1,26 +1,27 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
|
|
3
|
+
from ohmyscrapper.core.config import update
|
|
4
|
+
from ohmyscrapper.modules import cache
|
|
3
5
|
from ohmyscrapper.modules.classify_urls import classify_urls
|
|
4
|
-
from ohmyscrapper.modules.sniff_url import sniff_url
|
|
5
6
|
from ohmyscrapper.modules.load_txt import load_txt
|
|
6
|
-
from ohmyscrapper.modules.
|
|
7
|
+
from ohmyscrapper.modules.merge_dbs import merge_dbs
|
|
8
|
+
from ohmyscrapper.modules.process_with_ai import process_with_ai, reprocess_ai_history
|
|
7
9
|
from ohmyscrapper.modules.scrap_urls import scrap_urls
|
|
10
|
+
from ohmyscrapper.modules.seed import export_url_types_to_file, seed
|
|
8
11
|
from ohmyscrapper.modules.show import (
|
|
12
|
+
export_report,
|
|
13
|
+
export_urls,
|
|
9
14
|
show_url,
|
|
10
15
|
show_urls,
|
|
11
16
|
show_urls_valid_prefix,
|
|
12
|
-
export_urls,
|
|
13
|
-
export_report,
|
|
14
17
|
)
|
|
18
|
+
from ohmyscrapper.modules.sniff_url import sniff_url
|
|
15
19
|
from ohmyscrapper.modules.untouch_all import untouch_all
|
|
16
|
-
from ohmyscrapper.modules.process_with_ai import process_with_ai, reprocess_ai_history
|
|
17
|
-
from ohmyscrapper.modules.merge_dbs import merge_dbs
|
|
18
|
-
from ohmyscrapper.core.config import update
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
def main():
|
|
22
23
|
parser = argparse.ArgumentParser(prog="ohmyscrapper")
|
|
23
|
-
parser.add_argument("--version", action="version", version="%(prog)s v0.8.
|
|
24
|
+
parser.add_argument("--version", action="version", version="%(prog)s v0.8.4")
|
|
24
25
|
|
|
25
26
|
update()
|
|
26
27
|
subparsers = parser.add_subparsers(dest="command", help="Available commands")
|
|
@@ -141,6 +142,7 @@ def main():
|
|
|
141
142
|
)
|
|
142
143
|
merge_parser = subparsers.add_parser("merge_dbs", help="Merge databases.")
|
|
143
144
|
|
|
145
|
+
clean_cache_parser = subparsers.add_parser("cleancache", help="Clean cache.")
|
|
144
146
|
args = parser.parse_args()
|
|
145
147
|
|
|
146
148
|
if args.command == "classify-urls":
|
|
@@ -236,6 +238,9 @@ def main():
|
|
|
236
238
|
export_report()
|
|
237
239
|
return
|
|
238
240
|
|
|
241
|
+
if args.command == "cleancache":
|
|
242
|
+
cache.clean()
|
|
243
|
+
|
|
239
244
|
|
|
240
245
|
if __name__ == "__main__":
|
|
241
246
|
main()
|
ohmyscrapper/core/config.py
CHANGED
|
@@ -2,11 +2,12 @@ db:
|
|
|
2
2
|
db_file: local.db
|
|
3
3
|
|
|
4
4
|
default_dirs:
|
|
5
|
-
db: ./
|
|
6
|
-
input: ./
|
|
7
|
-
output: ./
|
|
8
|
-
prompts: ./
|
|
9
|
-
templates: ./
|
|
5
|
+
db: ./ohmyscrapper_db
|
|
6
|
+
input: ./ohmyscrapper_input
|
|
7
|
+
output: ./ohmyscrapper_output
|
|
8
|
+
prompts: ./ohmyscrapper_prompts
|
|
9
|
+
templates: ./ohmyscrapper_templates
|
|
10
|
+
cache: ./ohmyscrapper_cache
|
|
10
11
|
|
|
11
12
|
default_files:
|
|
12
13
|
url_types: url_types.yaml
|
ohmyscrapper/modules/browser.py
CHANGED
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
from ohmyscrapper.core import config
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def safe_cache_id(func):
|
|
7
|
+
def _filter_cache_id(*args, **kwargs):
|
|
8
|
+
if "cache_id" in args:
|
|
9
|
+
args["cache_id"] = filter_cache_id(args["cache_id"])
|
|
10
|
+
|
|
11
|
+
if "cache_id" in kwargs:
|
|
12
|
+
kwargs["cache_id"] = filter_cache_id(kwargs["cache_id"])
|
|
13
|
+
|
|
14
|
+
return func(*args, **kwargs)
|
|
15
|
+
|
|
16
|
+
return _filter_cache_id
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def filter_cache_id(cache_id):
|
|
20
|
+
cache_id = cache_id.replace('"', "").replace("\\", "")
|
|
21
|
+
cache_id = f'"{cache_id}"'
|
|
22
|
+
return cache_id
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
cache_files_extension = "html"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@safe_cache_id
|
|
29
|
+
def set(text: str, cache_id: str):
|
|
30
|
+
cache_folder = config.get_dir("cache")
|
|
31
|
+
cache_index_file_path = get_cache_index_path()
|
|
32
|
+
|
|
33
|
+
cache_folder_files = os.listdir(cache_folder)
|
|
34
|
+
cached_file_index = get_index_from_file_index(_safe_cache_id=cache_id)
|
|
35
|
+
if cached_file_index is not None:
|
|
36
|
+
new_file_index = cached_file_index
|
|
37
|
+
else:
|
|
38
|
+
new_file_index = len(cache_folder_files)
|
|
39
|
+
with open(cache_index_file_path, "a") as cache_index_file_writer:
|
|
40
|
+
cache_index_file_writer.write(f"\n{new_file_index}: {cache_id}")
|
|
41
|
+
|
|
42
|
+
new_file_name = f"{new_file_index}.{cache_files_extension}"
|
|
43
|
+
new_file_path = os.path.join(cache_folder, new_file_name)
|
|
44
|
+
with open(new_file_path, "w+") as new_file_writer:
|
|
45
|
+
new_file_writer.write(text)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@safe_cache_id
|
|
49
|
+
def get(cache_id: str) -> str:
|
|
50
|
+
cached_file_index = get_index_from_file_index(_safe_cache_id=cache_id)
|
|
51
|
+
code = get_cached_file_by_index(cached_file_index=cached_file_index)
|
|
52
|
+
return code
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def get_index_from_file_index(_safe_cache_id):
|
|
56
|
+
cache_index_file = get_cache_index_file()
|
|
57
|
+
if cache_index_file.find(_safe_cache_id) < 1:
|
|
58
|
+
return None
|
|
59
|
+
cache_index_file = cache_index_file[: cache_index_file.find(_safe_cache_id) - 2]
|
|
60
|
+
cached_file_index = int(cache_index_file.split("\n")[-1].strip())
|
|
61
|
+
return cached_file_index
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def get_cache_index_path() -> str:
|
|
65
|
+
cache_index_file_name = "cache_index.yaml"
|
|
66
|
+
cache_folder = config.get_dir("cache")
|
|
67
|
+
cache_index_file_path = os.path.join(cache_folder, cache_index_file_name)
|
|
68
|
+
if not os.path.exists(cache_index_file_path):
|
|
69
|
+
with open(cache_index_file_path, "w+") as cache_index_file_writer:
|
|
70
|
+
cache_index_file_writer.write(f"0: {cache_index_file_name}")
|
|
71
|
+
|
|
72
|
+
return cache_index_file_path
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def get_cache_index_file() -> str:
|
|
76
|
+
with open(get_cache_index_path(), "r") as f:
|
|
77
|
+
cache_index_file_content = f.read()
|
|
78
|
+
|
|
79
|
+
return cache_index_file_content
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def get_cached_file_by_index(cached_file_index: int) -> str:
|
|
83
|
+
code = None
|
|
84
|
+
cache_folder = config.get_dir("cache")
|
|
85
|
+
cached_file_name = f"{cached_file_index}.{cache_files_extension}"
|
|
86
|
+
cached_file_path = os.path.join(cache_folder, cached_file_name)
|
|
87
|
+
if not os.path.exists(cached_file_path):
|
|
88
|
+
return None
|
|
89
|
+
with open(cached_file_path, "r") as cached_file_reader:
|
|
90
|
+
code = cached_file_reader.read()
|
|
91
|
+
return code
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def clean():
|
|
95
|
+
cache_folder = config.get_dir("cache")
|
|
96
|
+
cache_folder_files = os.listdir(cache_folder)
|
|
97
|
+
for file in cache_folder_files:
|
|
98
|
+
file_to_clean = os.path.join(cache_folder, file)
|
|
99
|
+
if os.path.exists(file_to_clean):
|
|
100
|
+
os.remove(file_to_clean)
|
ohmyscrapper/modules/load_txt.py
CHANGED
|
@@ -1,13 +1,15 @@
|
|
|
1
|
-
import
|
|
2
|
-
|
|
3
|
-
from bs4 import BeautifulSoup
|
|
4
|
-
from google import genai
|
|
5
|
-
from dotenv import load_dotenv
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
6
3
|
import random
|
|
7
4
|
import time
|
|
8
|
-
|
|
5
|
+
|
|
9
6
|
import yaml
|
|
10
|
-
import
|
|
7
|
+
from bs4 import BeautifulSoup
|
|
8
|
+
from dotenv import load_dotenv
|
|
9
|
+
from google import genai
|
|
10
|
+
|
|
11
|
+
import ohmyscrapper.models.urls_manager as urls_manager
|
|
12
|
+
from ohmyscrapper.core import config
|
|
11
13
|
|
|
12
14
|
# TODO: !!! REFACTOR !!!
|
|
13
15
|
load_dotenv()
|
|
@@ -85,15 +87,12 @@ def process_with_ai(recursive=True, triggered_times=0):
|
|
|
85
87
|
|
|
86
88
|
texts = ""
|
|
87
89
|
for index, row in df.iterrows():
|
|
88
|
-
texts =
|
|
89
|
-
texts
|
|
90
|
-
+ f"""
|
|
90
|
+
texts = texts + f"""
|
|
91
91
|
<text>
|
|
92
92
|
<id>{str(row['id'])}</id>
|
|
93
93
|
{row['description']}
|
|
94
94
|
</text>
|
|
95
95
|
"""
|
|
96
|
-
)
|
|
97
96
|
if texts == "":
|
|
98
97
|
print("no urls to process")
|
|
99
98
|
return
|
|
@@ -1,13 +1,13 @@
|
|
|
1
|
+
import random
|
|
2
|
+
import time
|
|
3
|
+
|
|
1
4
|
import ohmyscrapper.models.urls_manager as urls_manager
|
|
2
|
-
import ohmyscrapper.modules.sniff_url as sniff_url
|
|
3
|
-
import ohmyscrapper.modules.load_txt as load_txt
|
|
4
|
-
import ohmyscrapper.modules.classify_urls as classify_urls
|
|
5
5
|
import ohmyscrapper.modules.browser as browser
|
|
6
|
+
import ohmyscrapper.modules.classify_urls as classify_urls
|
|
7
|
+
import ohmyscrapper.modules.load_txt as load_txt
|
|
8
|
+
import ohmyscrapper.modules.sniff_url as sniff_url
|
|
6
9
|
from ohmyscrapper.core import config
|
|
7
10
|
|
|
8
|
-
import time
|
|
9
|
-
import random
|
|
10
|
-
|
|
11
11
|
|
|
12
12
|
def scrap_url(url, verbose=False, driver=None):
|
|
13
13
|
if url["url_type"] is None:
|
ohmyscrapper/modules/show.py
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
|
-
import ohmyscrapper.models.urls_manager as urls_manager
|
|
2
|
-
from ohmyscrapper.core import config
|
|
3
1
|
import math
|
|
4
2
|
import os
|
|
3
|
+
|
|
5
4
|
from rich.console import Console
|
|
6
5
|
from rich.table import Table
|
|
7
6
|
|
|
7
|
+
import ohmyscrapper.models.urls_manager as urls_manager
|
|
8
|
+
from ohmyscrapper.core import config
|
|
9
|
+
|
|
8
10
|
|
|
9
11
|
def export_urls(limit=0, csv_file="output/urls.csv", simplify=False):
|
|
10
12
|
output_folder = config.get_dir("output")
|
|
@@ -1,9 +1,13 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import time
|
|
4
|
+
|
|
1
5
|
import requests
|
|
2
6
|
from bs4 import BeautifulSoup
|
|
3
|
-
|
|
4
|
-
from ohmyscrapper.core import config
|
|
7
|
+
|
|
5
8
|
import ohmyscrapper.modules.browser as browser
|
|
6
|
-
import
|
|
9
|
+
from ohmyscrapper.core import config
|
|
10
|
+
from ohmyscrapper.modules import cache
|
|
7
11
|
|
|
8
12
|
|
|
9
13
|
def sniff_url(
|
|
@@ -89,9 +93,10 @@ def _extract_a_tags(soup, silent, url=None):
|
|
|
89
93
|
i = i + 1
|
|
90
94
|
|
|
91
95
|
href = a_tag.get("href")
|
|
92
|
-
if
|
|
93
|
-
|
|
94
|
-
|
|
96
|
+
if href is not None:
|
|
97
|
+
if url is not None and href[:1] == "/":
|
|
98
|
+
domain = url.split("//")[0] + "//" + url.split("//")[1].split("/")[0]
|
|
99
|
+
href = domain + href
|
|
95
100
|
|
|
96
101
|
a_links.append({"text": a_tag.text, "href": href})
|
|
97
102
|
if not silent:
|
|
@@ -189,6 +194,13 @@ def get_tags(url, sniffing_config={}, driver=None):
|
|
|
189
194
|
|
|
190
195
|
|
|
191
196
|
def get_url(url, driver=None):
|
|
197
|
+
cache_prefix = "sniff-urf:"
|
|
198
|
+
cached_code = cache.get(cache_id=cache_prefix + url)
|
|
199
|
+
|
|
200
|
+
if cached_code is not None:
|
|
201
|
+
print("You used the cache for this URL.")
|
|
202
|
+
return cached_code
|
|
203
|
+
|
|
192
204
|
if driver is None and config.get_sniffing("use-browser"):
|
|
193
205
|
driver = browser.get_driver()
|
|
194
206
|
|
|
@@ -197,8 +209,12 @@ def get_url(url, driver=None):
|
|
|
197
209
|
driver.get(url)
|
|
198
210
|
time.sleep(config.get_sniffing("browser-waiting-time"))
|
|
199
211
|
driver.implicitly_wait(config.get_sniffing("browser-waiting-time"))
|
|
200
|
-
|
|
212
|
+
code = driver.page_source
|
|
213
|
+
cache.set(text=code, cache_id=cache_prefix + url)
|
|
214
|
+
return code
|
|
201
215
|
except:
|
|
202
216
|
print("error")
|
|
203
217
|
pass
|
|
204
|
-
|
|
218
|
+
code = requests.get(url=url, timeout=config.get_sniffing("timeout")).text
|
|
219
|
+
cache.set(text=code, cache_id=cache_prefix + url)
|
|
220
|
+
return code
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ohmyscrapper
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.4
|
|
4
4
|
Summary: OhMyScrapper scrapes texts and urls looking for links and jobs-data to create a final report with general information about job positions.
|
|
5
5
|
Author: Cesar Cardoso
|
|
6
6
|
Author-email: Cesar Cardoso <hello@cesarcardoso.cc>
|
|
@@ -16,11 +16,11 @@ Requires-Dist: rich>=14.2.0
|
|
|
16
16
|
Requires-Dist: selenium>=4.39.0
|
|
17
17
|
Requires-Dist: urlextract>=1.9.0
|
|
18
18
|
Requires-Python: >=3.11
|
|
19
|
-
Project-URL: Changelog, https://github.com/bouli/ohmyscrapper/releases/latest
|
|
20
19
|
Project-URL: Repository, https://github.com/bouli/ohmyscrapper
|
|
20
|
+
Project-URL: Changelog, https://github.com/bouli/ohmyscrapper/releases/latest
|
|
21
21
|
Description-Content-Type: text/markdown
|
|
22
22
|
|
|
23
|
-
# 🐶 OhMyScrapper - v0.8.
|
|
23
|
+
# 🐶 OhMyScrapper - v0.8.4
|
|
24
24
|
|
|
25
25
|
OhMyScrapper scrapes texts and urls looking for links and jobs-data to create a
|
|
26
26
|
final report with general information about job positions.
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
ohmyscrapper/__init__.py,sha256=C_nhLCKrLogCLQVVHlTJNMEOSFiLDTR0bBCtW8T8kXE,7859
|
|
2
|
+
ohmyscrapper/__main__.py,sha256=5BjNuyet8AY-POwoF5rGt722rHQ7tJ0Vf0UFUfzzi-I,58
|
|
3
|
+
ohmyscrapper/core/config.py,sha256=2S8iYMdN5-fCX4EW1cmSd4-XabzqxIgbupBuymV3yjY,3185
|
|
4
|
+
ohmyscrapper/core/config_files.py,sha256=5FyPFpN7WQrlgQWr85s5NF-UbnzbyFsWEVVMOs8iyaw,3411
|
|
5
|
+
ohmyscrapper/core/default_files/config.yaml,sha256=ETuTyFM1fedjehM9cZRoKxRKcYDH0LlPFAZ3vLj0uxU,436
|
|
6
|
+
ohmyscrapper/core/default_files/url_sniffing.yaml,sha256=HUwmGUwuJy7t97bJHgNiZOl1thvD9bLaelPgbEr5bMY,465
|
|
7
|
+
ohmyscrapper/core/default_files/url_types.yaml,sha256=20kvv8_iWRT-pLa014RXYpAmPSonn6tDnG302rx7l-o,228
|
|
8
|
+
ohmyscrapper/models/urls_manager.py,sha256=XC8HODdsCEo_nn1j7nH_jy9AUTb4PpmkGlaFWV048TM,12117
|
|
9
|
+
ohmyscrapper/modules/browser.py,sha256=pH41NVqYgay_zEIZfncJbtwz_13REX5HVH8uk581sM4,857
|
|
10
|
+
ohmyscrapper/modules/cache.py,sha256=3EQnv9VYJWrE5fdLwkGEUOAHV16nprhyid6MlBpa9Gg,3228
|
|
11
|
+
ohmyscrapper/modules/classify_urls.py,sha256=oK_UhQPF976cexlarqi14pSw8tWLGYfaIMCXzbAhnpI,1040
|
|
12
|
+
ohmyscrapper/modules/load_txt.py,sha256=Gpob1W_LLfkBnNbtqxgCRNGeyufmHECreDqTlj9O_Mk,4140
|
|
13
|
+
ohmyscrapper/modules/merge_dbs.py,sha256=0pK3PPUGSbnaDkdpQUGCHemOVaKO37bfHwnsy_EVpWQ,115
|
|
14
|
+
ohmyscrapper/modules/process_with_ai.py,sha256=TqebqC3_rCx6cbvq3oQhaXLZxGUYpKvhyH3I3zjsA94,7221
|
|
15
|
+
ohmyscrapper/modules/scrap_urls.py,sha256=affq5Vx5BKrl7uL2mpcThDBOXznq0d5fz1if5xAttOA,6627
|
|
16
|
+
ohmyscrapper/modules/seed.py,sha256=hHEGSoPXsmclTaRPeIcK2oC1Xpg3_JqBv_YFMD0m5Jw,1044
|
|
17
|
+
ohmyscrapper/modules/show.py,sha256=i5l8_Zooj6vg1JLqWtvGPWHv7wL53aHZ43-SKS1sF9Y,3879
|
|
18
|
+
ohmyscrapper/modules/sniff_url.py,sha256=NpIMJxNEUzmDkFGVqDJXgVtTWEGKRE_dSiJHNz-vXoE,7027
|
|
19
|
+
ohmyscrapper/modules/untouch_all.py,sha256=DAwWYfqMFifHPtFCxSamu0AxHCgk6aJbTnBy6wLucXM,167
|
|
20
|
+
ohmyscrapper-0.8.4.dist-info/WHEEL,sha256=fAguSjoiATBe7TNBkJwOjyL1Tt4wwiaQGtNtjRPNMQA,80
|
|
21
|
+
ohmyscrapper-0.8.4.dist-info/entry_points.txt,sha256=BZud6D16XkfjelDa4Z33mji-KJbbZXgq2FoLrzjru5I,52
|
|
22
|
+
ohmyscrapper-0.8.4.dist-info/METADATA,sha256=h2Agb2KCKiBkX-HEj_8f9EuV3NOq6AJ9h1WrnPQ79iU,4293
|
|
23
|
+
ohmyscrapper-0.8.4.dist-info/RECORD,,
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
ohmyscrapper/__init__.py,sha256=WzXXhhlVkyAPbqeo7NgezLb6TbEJcuFf8JYAdcf3zBE,7678
|
|
2
|
-
ohmyscrapper/__main__.py,sha256=5BjNuyet8AY-POwoF5rGt722rHQ7tJ0Vf0UFUfzzi-I,58
|
|
3
|
-
ohmyscrapper/core/config.py,sha256=bfmoTr1j4SnIhKFZC_F9fh4Y90VqSPFf_g6Rm-aNui4,3184
|
|
4
|
-
ohmyscrapper/core/config_files.py,sha256=3mIXVxurmyXCpKueyyGsZ6lUnV8VJ2gnLU2QaqhWhhI,3410
|
|
5
|
-
ohmyscrapper/core/default_files/config.yaml,sha256=y54QAjOnogpl8LEzhmn89tAfRzle4ZWWtIbYRjxX8Rk,341
|
|
6
|
-
ohmyscrapper/core/default_files/url_sniffing.yaml,sha256=HUwmGUwuJy7t97bJHgNiZOl1thvD9bLaelPgbEr5bMY,465
|
|
7
|
-
ohmyscrapper/core/default_files/url_types.yaml,sha256=20kvv8_iWRT-pLa014RXYpAmPSonn6tDnG302rx7l-o,228
|
|
8
|
-
ohmyscrapper/models/urls_manager.py,sha256=sP2T4k1HOj8ccaVGWbuhfw1BDfOUSVL4_WR9vRyHjOA,12115
|
|
9
|
-
ohmyscrapper/modules/browser.py,sha256=6AaNFQ7jV91DvHqbsBT6It_-tNbVN2qJC_c1vXTweJY,856
|
|
10
|
-
ohmyscrapper/modules/classify_urls.py,sha256=GhiosAQUITy1DQe_PksYV9QRKVTgpkSE28dkutzbWVA,1038
|
|
11
|
-
ohmyscrapper/modules/load_txt.py,sha256=pkWBIdh6vORPfENDZ6wGM89vswnOnc1flqKfkLs9RD8,4138
|
|
12
|
-
ohmyscrapper/modules/merge_dbs.py,sha256=0pK3PPUGSbnaDkdpQUGCHemOVaKO37bfHwnsy_EVpWQ,115
|
|
13
|
-
ohmyscrapper/modules/process_with_ai.py,sha256=kl39Jzl-PUwh6AfmTZ9SLFUYs9Sk4biqgt8rNz3X1FA,7255
|
|
14
|
-
ohmyscrapper/modules/scrap_urls.py,sha256=_e4jT7eBWGP6cqI6RaD0xzNX1vCFBx96JIBPGW3mAV4,6627
|
|
15
|
-
ohmyscrapper/modules/seed.py,sha256=hHEGSoPXsmclTaRPeIcK2oC1Xpg3_JqBv_YFMD0m5Jw,1044
|
|
16
|
-
ohmyscrapper/modules/show.py,sha256=jsAs4g8ouA9wymkBfkDCbpVWKD-m_20uKG-m1cZAUGA,3877
|
|
17
|
-
ohmyscrapper/modules/sniff_url.py,sha256=BZphbr2V7MDyFPW7APlh7_CLTtc_u3kcB7DY2QjVVQo,6579
|
|
18
|
-
ohmyscrapper/modules/untouch_all.py,sha256=DAwWYfqMFifHPtFCxSamu0AxHCgk6aJbTnBy6wLucXM,167
|
|
19
|
-
ohmyscrapper-0.8.2.dist-info/WHEEL,sha256=xDCZ-UyfvkGuEHPeI7BcJzYKIZzdqN8A8o1M5Om8IyA,79
|
|
20
|
-
ohmyscrapper-0.8.2.dist-info/entry_points.txt,sha256=BZud6D16XkfjelDa4Z33mji-KJbbZXgq2FoLrzjru5I,52
|
|
21
|
-
ohmyscrapper-0.8.2.dist-info/METADATA,sha256=2OnXXefFcRT_ChEVdL6LKZoe6iNKyFqjoN10LBUBB34,4293
|
|
22
|
-
ohmyscrapper-0.8.2.dist-info/RECORD,,
|
|
File without changes
|