ohmyscrapper 0.7.0__py3-none-any.whl → 0.8.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ohmyscrapper/__init__.py CHANGED
@@ -20,7 +20,7 @@ from ohmyscrapper.core.config import update
20
20
 
21
21
  def main():
22
22
  parser = argparse.ArgumentParser(prog="ohmyscrapper")
23
- parser.add_argument("--version", action="version", version="%(prog)s v0.7.0")
23
+ parser.add_argument("--version", action="version", version="%(prog)s v0.8.2")
24
24
 
25
25
  update()
26
26
  subparsers = parser.add_subparsers(dest="command", help="Available commands")
@@ -28,6 +28,9 @@ def main():
28
28
  "start",
29
29
  help="Make the entire process of 📦 loading, 🐶 scraping and 📜🖋️ exporting with the default configuration.",
30
30
  )
31
+ start_parser.add_argument(
32
+ "-input", default=None, help="File/Folder path or url for pre-loading."
33
+ )
31
34
 
32
35
  start_parser.add_argument(
33
36
  "--ai",
@@ -50,6 +53,14 @@ def main():
50
53
  help="Add all `url_types` from the bank to the `/ohmyscrapper/url_types.yaml` file.",
51
54
  action="store_true",
52
55
  )
56
+
57
+ seed_parser.add_argument(
58
+ "--reset",
59
+ default=False,
60
+ help="Reset all `url_types`.",
61
+ action="store_true",
62
+ )
63
+
53
64
  untouch_parser = subparsers.add_parser(
54
65
  "untouch-all", help="Untouch all urls. That resets classification"
55
66
  )
@@ -85,11 +96,24 @@ def main():
85
96
  scrap_urls_parser.add_argument(
86
97
  "--verbose", default=False, help="Run in verbose mode", action="store_true"
87
98
  )
99
+ scrap_urls_parser.add_argument(
100
+ "-input", default=None, help="File/Folder path or url for pre-loading."
101
+ )
88
102
 
89
103
  sniff_url_parser = subparsers.add_parser("sniff-url", help="🐕 Sniff/Check url")
90
104
  sniff_url_parser.add_argument(
91
105
  "url", default="https://cesarcardoso.cc/", help="Url to sniff"
92
106
  )
107
+ sniff_url_parser.add_argument(
108
+ "--metatags",
109
+ default="mt",
110
+ help="Meta tags you want to watch separated by comma ','",
111
+ )
112
+ sniff_url_parser.add_argument(
113
+ "--bodytags",
114
+ default="bd",
115
+ help="Body tags you want to watch separated by comma ','",
116
+ )
93
117
 
94
118
  show_urls_parser = subparsers.add_parser("show", help="Show urls and prefixes")
95
119
  show_urls_parser.add_argument(
@@ -131,7 +155,7 @@ def main():
131
155
  if args.export:
132
156
  export_url_types_to_file()
133
157
  else:
134
- seed()
158
+ seed(args.reset)
135
159
  return
136
160
 
137
161
  if args.command == "untouch-all":
@@ -139,10 +163,21 @@ def main():
139
163
  return
140
164
 
141
165
  if args.command == "sniff-url":
142
- sniff_url(args.url)
166
+ sniffing_config = {}
167
+ if len(args.metatags) > 0:
168
+ sniffing_config["metatags"] = str(args.metatags).split(",")
169
+
170
+ if len(args.bodytags) > 0:
171
+ sniffing_config["bodytags"] = str(args.bodytags).split(",")
172
+
173
+ sniff_url(args.url, sniffing_config=sniffing_config)
174
+
143
175
  return
144
176
 
145
177
  if args.command == "scrap-urls":
178
+ if args.input != None:
179
+ load_txt(file_name=args.input, verbose=args.verbose)
180
+
146
181
  scrap_urls(
147
182
  recursive=args.recursive,
148
183
  ignore_valid_prefix=args.ignore_type,
@@ -182,7 +217,12 @@ def main():
182
217
  return
183
218
 
184
219
  if args.command == "start":
185
- load_txt()
220
+ seed()
221
+ if args.input != None:
222
+ load_txt(file_name=args.input)
223
+ else:
224
+ load_txt()
225
+
186
226
  scrap_urls(
187
227
  recursive=True,
188
228
  ignore_valid_prefix=True,
@@ -39,6 +39,12 @@ def get_ai(param):
39
39
  )
40
40
 
41
41
 
42
+ def get_sniffing(param):
43
+ return config_files.get_param(
44
+ parent_param="sniffing", param=param, default_app_dir=default_app_dir
45
+ )
46
+
47
+
42
48
  def load_config(force_default=False):
43
49
  config_file_name = "config.yaml"
44
50
  config_params = config_files.create_and_read_config_file(
@@ -63,14 +69,14 @@ def url_types_file_exists():
63
69
  def get_url_types():
64
70
  url_types_file = get_files("url_types")
65
71
  return config_files.create_and_read_config_file(
66
- url_types_file, default_app_dir=default_app_dir
72
+ url_types_file, default_app_dir=default_app_dir, complete_file=False
67
73
  )
68
74
 
69
75
 
70
76
  def get_url_sniffing():
71
77
  file = get_files("url_sniffing")
72
78
  return config_files.create_and_read_config_file(
73
- file, default_app_dir=default_app_dir
79
+ file, default_app_dir=default_app_dir, complete_file=False
74
80
  )
75
81
 
76
82
 
@@ -2,16 +2,34 @@ import os
2
2
  import yaml
3
3
 
4
4
 
5
- def create_and_read_config_file(file_name, default_app_dir, force_default=False):
5
+ def create_and_read_config_file(
6
+ file_name, default_app_dir, force_default=False, complete_file=True
7
+ ):
6
8
  config_file = config_file_path(file_name, default_app_dir)
9
+ default_config_params = _get_default_file(default_file=file_name)
7
10
  if force_default or not os.path.exists(config_file):
8
- config_params = _get_default_file(default_file=file_name)
9
11
  overwrite_config_file(
10
- data=config_params, file_name=file_name, default_app_dir=default_app_dir
12
+ data=default_config_params,
13
+ file_name=file_name,
14
+ default_app_dir=default_app_dir,
11
15
  )
16
+ config_params = default_config_params
12
17
  else:
13
18
  with open(config_file, "r") as f:
14
19
  config_params = yaml.safe_load(f.read())
20
+ if complete_file:
21
+ if complete_config_file(
22
+ config_params=config_params,
23
+ default_config_params=default_config_params,
24
+ file_name=file_name,
25
+ default_app_dir=default_app_dir,
26
+ ):
27
+ config_params = create_and_read_config_file(
28
+ file_name=file_name,
29
+ default_app_dir=default_app_dir,
30
+ force_default=force_default,
31
+ )
32
+
15
33
  if config_params is None:
16
34
  config_params = create_and_read_config_file(
17
35
  file_name=file_name, default_app_dir=default_app_dir, force_default=True
@@ -19,6 +37,18 @@ def create_and_read_config_file(file_name, default_app_dir, force_default=False)
19
37
  return config_params
20
38
 
21
39
 
40
+ def complete_config_file(
41
+ config_params, default_config_params, file_name, default_app_dir
42
+ ):
43
+ has_updated = False
44
+ for key, values in default_config_params.items():
45
+ if key not in config_params.keys():
46
+ has_updated = True
47
+ data = {key: values}
48
+ append_config_file(data, file_name, default_app_dir)
49
+ return has_updated
50
+
51
+
22
52
  def overwrite_config_file(data, file_name, default_app_dir):
23
53
  config_file = config_file_path(file_name, default_app_dir)
24
54
  with open(config_file, "+w") as f:
@@ -14,3 +14,9 @@ default_files:
14
14
 
15
15
  ai:
16
16
  default_prompt_file: prompt.md
17
+
18
+ sniffing:
19
+ timeout: 10
20
+ use-browser: false
21
+ browser-waiting-time: 5
22
+ round-sleeping: 10
@@ -23,3 +23,7 @@ linkedin_redirect:
23
23
  og:url: url_destiny
24
24
  atags:
25
25
  first-tag-as-url_destiny: 5
26
+
27
+ read_all_a_tags:
28
+ atags:
29
+ load_links: True
@@ -69,6 +69,14 @@ def seeds(seeds={}):
69
69
  return True
70
70
 
71
71
 
72
+ @use_connection
73
+ def reset_seeds():
74
+ sql = "DELETE FROM urls_valid_prefix WHERE 1 = 1"
75
+ c = conn.cursor()
76
+ c.execute(sql)
77
+ conn.commit()
78
+
79
+
72
80
  @use_connection
73
81
  def add_urls_valid_prefix(url_prefix, url_type):
74
82
 
@@ -198,6 +206,8 @@ def get_url_like_unclassified(like_condition):
198
206
 
199
207
  @use_connection
200
208
  def add_url(url, title=None, parent_url=None):
209
+ if url[:1] == "/":
210
+ return
201
211
  url = clean_url(url)
202
212
  c = conn.cursor()
203
213
 
@@ -340,7 +350,9 @@ def set_url_error(url, value):
340
350
  @use_connection
341
351
  def set_url_type_by_id(url_id, url_type):
342
352
  c = conn.cursor()
343
- c.execute(f"UPDATE urls SET url_type = '{url_type}' WHERE id = {url_id}")
353
+ c.execute(
354
+ f"UPDATE urls SET url_type = '{url_type}', last_touch = NULL WHERE id = {url_id}"
355
+ )
344
356
  conn.commit()
345
357
 
346
358
 
@@ -392,8 +404,10 @@ def touch_url(url):
392
404
  @use_connection
393
405
  def untouch_url(url):
394
406
  url = clean_url(url)
407
+ url = str(url.strip())
408
+
395
409
  c = conn.cursor()
396
- c.execute("UPDATE urls SET last_touch = NULL WHERE url = ?", (url))
410
+ c.execute(f"UPDATE urls SET last_touch = NULL, url_type = NULL WHERE url = '{url}'")
397
411
  conn.commit()
398
412
 
399
413
 
@@ -0,0 +1,27 @@
1
+ from selenium import webdriver
2
+ from ohmyscrapper.core.config import get_sniffing
3
+
4
+
5
+ def get_driver():
6
+ if get_sniffing("use-browser") == "safari":
7
+ from selenium.webdriver.safari.options import Options
8
+
9
+ options = Options()
10
+ driver = webdriver.Safari(options=options)
11
+ elif get_sniffing("use-browser") == "firefox":
12
+ from selenium.webdriver.firefox.options import Options
13
+
14
+ options = Options()
15
+ driver = webdriver.Firefox(options=options)
16
+ elif get_sniffing("use-browser") == "ie":
17
+ from selenium.webdriver.ie.options import Options
18
+
19
+ options = Options()
20
+ driver = webdriver.Ie(options=options)
21
+ else: # default: chrome
22
+ from selenium.webdriver.chrome.options import Options
23
+
24
+ options = Options()
25
+ driver = webdriver.Chrome(options=options)
26
+
27
+ return driver
@@ -19,14 +19,16 @@ def load_txt(file_name="input", verbose=False):
19
19
 
20
20
  text_file_content = ""
21
21
  if file_name is not None and not os.path.isdir(file_name):
22
- print(f"📖 reading file `{file_name}`... ")
23
22
  if not os.path.exists(file_name):
24
23
  if file_name.startswith("https://") or file_name.startswith("http://"):
24
+ print(f"📖 reading url `{file_name}`... ")
25
25
  text_file_content = " " + file_name + " "
26
+ urls_manager.untouch_url(url=file_name)
26
27
  else:
27
28
  print(f"\n file `{file_name}` not found.")
28
29
  return
29
30
  else:
31
+ print(f"📖 reading file `{file_name}`... ")
30
32
  text_file_content = _increment_file_name(
31
33
  text_file_content=text_file_content, file_name=file_name
32
34
  )
@@ -51,13 +53,15 @@ def load_txt(file_name="input", verbose=False):
51
53
  file_name=os.path.join(dir_files, text_files[0]),
52
54
  )
53
55
  else:
54
- print("\nChoose a text file. Use `*` for process all and `q` to quit:")
56
+ print("\nFiles list:")
55
57
  for index, file in enumerate(text_files):
56
58
  print(f"[{index}]:", os.path.join(dir_files, file))
57
59
 
58
60
  text_file_option = -1
59
61
  while text_file_option < 0 or text_file_option >= len(text_files):
60
- text_file_option = input("Enter the file number: ")
62
+ text_file_option = input(
63
+ "Choose a text file. Use `*` for process all and `q` to quit. Enter the file number: "
64
+ )
61
65
  if text_file_option == "*":
62
66
  for file in text_files:
63
67
  text_file_content = _increment_file_name(
@@ -2,13 +2,14 @@ import ohmyscrapper.models.urls_manager as urls_manager
2
2
  import ohmyscrapper.modules.sniff_url as sniff_url
3
3
  import ohmyscrapper.modules.load_txt as load_txt
4
4
  import ohmyscrapper.modules.classify_urls as classify_urls
5
+ import ohmyscrapper.modules.browser as browser
5
6
  from ohmyscrapper.core import config
6
7
 
7
8
  import time
8
9
  import random
9
10
 
10
11
 
11
- def scrap_url(url, verbose=False):
12
+ def scrap_url(url, verbose=False, driver=None):
12
13
  if url["url_type"] is None:
13
14
  url["url_type"] = "generic"
14
15
 
@@ -21,18 +22,18 @@ def scrap_url(url, verbose=False):
21
22
 
22
23
  if url_type not in sniffing_config:
23
24
  default_type_sniffing = {
24
- "bodytags": [{"h1": "title"}],
25
- "metatags": [
26
- {"og:title": "title"},
27
- {"og:description": "description"},
28
- {"description": "description"},
29
- ],
25
+ "bodytags": {"h1": "title"},
26
+ "metatags": {
27
+ "og:title": "title",
28
+ "og:description": "description",
29
+ "description": "description",
30
+ },
30
31
  }
31
32
  config.append_url_sniffing({url_type: default_type_sniffing})
32
33
  sniffing_config = config.get_url_sniffing()
33
34
 
34
35
  url_report = sniff_url.get_tags(
35
- url=url["url"], sniffing_config=sniffing_config[url_type]
36
+ url=url["url"], sniffing_config=sniffing_config[url_type], driver=driver
36
37
  )
37
38
  except Exception as e:
38
39
  urls_manager.set_url_error(url=url["url"], value="error on scrapping")
@@ -104,6 +105,12 @@ def process_sniffed_url(url_report, url, sniffing_config, verbose=False):
104
105
  ):
105
106
  if "first-a-link" in url_report.keys():
106
107
  db_fields["url_destiny"] = url_report["first-a-link"]
108
+ if (
109
+ "atags" in sniffing_config.keys()
110
+ and "load_links" in sniffing_config["atags"].keys()
111
+ ):
112
+ for a_link in url_report["a_links"]:
113
+ urls_manager.add_url(url=a_link["href"], parent_url=url["url"])
107
114
 
108
115
  if db_fields["title"] is not None:
109
116
  urls_manager.set_url_title(url=url["url"], value=db_fields["title"])
@@ -141,6 +148,7 @@ def scrap_urls(
141
148
  only_parents=True,
142
149
  verbose=False,
143
150
  n_urls=0,
151
+ driver=None,
144
152
  ):
145
153
  limit = 10
146
154
  classify_urls.classify_urls()
@@ -164,13 +172,19 @@ def scrap_urls(
164
172
  time.sleep(wait)
165
173
 
166
174
  print("🐕 Scrapper is sniffing the url...")
167
- scrap_url(url=url, verbose=verbose)
175
+
176
+ if driver is None and config.get_sniffing("use-browser"):
177
+ driver = browser.get_driver()
178
+ scrap_url(url=url, verbose=verbose, driver=driver)
168
179
 
169
180
  n_urls = n_urls + len(urls)
170
181
  print(f"-- 🗃️ {n_urls} scraped urls...")
171
182
  classify_urls.classify_urls()
172
183
  if recursive:
173
- wait = random.randint(5, 10)
184
+ wait = random.randint(
185
+ int(config.get_sniffing("round-sleeping") / 2),
186
+ int(config.get_sniffing("round-sleeping")),
187
+ )
174
188
  print(
175
189
  f"🐶 Scrapper is sleeping for {wait} seconds before next round of {limit} urls"
176
190
  )
@@ -182,6 +196,7 @@ def scrap_urls(
182
196
  only_parents=only_parents,
183
197
  verbose=verbose,
184
198
  n_urls=n_urls,
199
+ driver=driver,
185
200
  )
186
201
  else:
187
202
  print("scrapping is over...")
@@ -2,7 +2,10 @@ import ohmyscrapper.models.urls_manager as urls_manager
2
2
  from ohmyscrapper.core import config
3
3
 
4
4
 
5
- def seed():
5
+ def seed(reset=False):
6
+ if reset:
7
+ urls_manager.reset_seeds()
8
+
6
9
  if not config.url_types_file_exists():
7
10
  db_url_types = urls_manager.get_urls_valid_prefix()
8
11
  if len(db_url_types) > 0:
@@ -1,13 +1,18 @@
1
1
  import requests
2
2
  from bs4 import BeautifulSoup
3
3
  import json
4
+ from ohmyscrapper.core import config
5
+ import ohmyscrapper.modules.browser as browser
6
+ import time
4
7
 
5
8
 
6
9
  def sniff_url(
7
10
  url="https://www.linkedin.com/in/cesardesouzacardoso/",
8
11
  silent=False,
9
12
  sniffing_config={},
13
+ driver=None,
10
14
  ):
15
+ final_report = {}
11
16
  if "metatags" in sniffing_config:
12
17
  metatags_to_search = sniffing_config["metatags"]
13
18
  else:
@@ -41,10 +46,18 @@ def sniff_url(
41
46
  if not silent:
42
47
  print("checking url:", url)
43
48
 
44
- r = requests.get(url=url)
45
- soup = BeautifulSoup(r.text, "html.parser")
49
+ try:
50
+ r = get_url(url=url, driver=driver)
51
+ soup = BeautifulSoup(r, "html.parser")
52
+ except requests.exceptions.ReadTimeout:
53
+ url_domain = url.split("/")[2]
54
+ final_report["error"] = (
55
+ f"!!! timeout (10 seconds) while checking the url with domain: `{url_domain}` !!!"
56
+ )
57
+
58
+ print(f"\n\n{final_report['error']}\n\n")
59
+ soup = BeautifulSoup("", "html.parser")
46
60
 
47
- final_report = {}
48
61
  final_report["scrapped-url"] = url
49
62
  if len(metatags_to_search) > 0:
50
63
  final_report.update(
@@ -59,14 +72,14 @@ def sniff_url(
59
72
  soup=soup, silent=silent, body_tags_to_search=body_tags_to_search
60
73
  )
61
74
  )
62
- final_report["a_links"] = _extract_a_tags(soup=soup, silent=silent)
75
+ final_report["a_links"] = _extract_a_tags(soup=soup, silent=silent, url=url)
63
76
  final_report = _complementary_report(final_report, soup, silent).copy()
64
77
  final_report["json"] = json.dumps(final_report)
65
78
 
66
79
  return final_report
67
80
 
68
81
 
69
- def _extract_a_tags(soup, silent):
82
+ def _extract_a_tags(soup, silent, url=None):
70
83
  a_links = []
71
84
  if not silent:
72
85
  print("\n\n\n\n---- all <a> links ---")
@@ -74,12 +87,18 @@ def _extract_a_tags(soup, silent):
74
87
  i = 0
75
88
  for a_tag in soup.find_all("a"):
76
89
  i = i + 1
77
- a_links.append({"text": a_tag.text, "href": a_tag.get("href")})
90
+
91
+ href = a_tag.get("href")
92
+ if url is not None and href[:1] == "/":
93
+ domain = url.split("//")[0] + "//" + url.split("//")[1].split("/")[0]
94
+ href = domain + href
95
+
96
+ a_links.append({"text": a_tag.text, "href": href})
78
97
  if not silent:
79
98
  print("\n-- <a> link", i, "-- ")
80
99
  print("target:", a_tag.get("target"))
81
100
  print("text:", str(a_tag.text).strip())
82
- print("href:", a_tag.get("href"))
101
+ print("href:", href)
83
102
  print("-------------- ")
84
103
  return a_links
85
104
 
@@ -114,18 +133,30 @@ def _extract_text_tags(soup, silent, body_tags_to_search):
114
133
  print("\n\n\n\n---- all <text> tags ---\n")
115
134
  i = 0
116
135
  for text_tag, separator in body_tags_to_search.items():
117
- if len(soup.find_all(text_tag)) > 0:
136
+ tag = text_tag
137
+ tag_class = None
138
+ tag_id = None
139
+
140
+ if len(text_tag.split(".")) > 1:
141
+ tag = text_tag.split(".")[0]
142
+ tag_class = text_tag.split(".")[1]
143
+
144
+ if len(text_tag.split("#")) > 1:
145
+ tag = text_tag.split("#")[0]
146
+ tag_id = text_tag.split("#")[1]
147
+
148
+ if len(soup.find_all(tag, class_=tag_class, id=tag_id)) > 0:
118
149
  valid_text_tags[text_tag] = []
119
- for obj_tag in soup.find_all(text_tag):
150
+ for obj_tag in soup.find_all(tag, class_=tag_class, id=tag_id):
120
151
  valid_text_tags[text_tag].append(obj_tag.text.strip())
121
152
  valid_text_tags[text_tag] = separator.join(valid_text_tags[text_tag])
122
- i = i + 1
123
- if not silent:
124
- print("-- text tag", i, "--")
125
- print("name:", text_tag)
126
- print("separator:", separator)
127
- print("texts:", valid_text_tags[text_tag])
128
- print("---------------- \n")
153
+ i = i + 1
154
+ if not silent:
155
+ print("-- text tag", i, "--")
156
+ print("name:", text_tag)
157
+ print("separator:", separator)
158
+ print("texts:", valid_text_tags[text_tag])
159
+ print("---------------- \n")
129
160
  return valid_text_tags
130
161
 
131
162
 
@@ -151,5 +182,23 @@ def _complementary_report(final_report, soup, silent):
151
182
  return final_report
152
183
 
153
184
 
154
- def get_tags(url, sniffing_config={}):
155
- return sniff_url(url=url, silent=True, sniffing_config=sniffing_config)
185
+ def get_tags(url, sniffing_config={}, driver=None):
186
+ return sniff_url(
187
+ url=url, silent=True, sniffing_config=sniffing_config, driver=driver
188
+ )
189
+
190
+
191
+ def get_url(url, driver=None):
192
+ if driver is None and config.get_sniffing("use-browser"):
193
+ driver = browser.get_driver()
194
+
195
+ if driver is not None:
196
+ try:
197
+ driver.get(url)
198
+ time.sleep(config.get_sniffing("browser-waiting-time"))
199
+ driver.implicitly_wait(config.get_sniffing("browser-waiting-time"))
200
+ return driver.page_source
201
+ except:
202
+ print("error")
203
+ pass
204
+ return requests.get(url=url, timeout=config.get_sniffing("timeout")).text
@@ -1,9 +1,10 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: ohmyscrapper
3
- Version: 0.7.0
3
+ Version: 0.8.2
4
4
  Summary: OhMyScrapper scrapes texts and urls looking for links and jobs-data to create a final report with general information about job positions.
5
5
  Author: Cesar Cardoso
6
6
  Author-email: Cesar Cardoso <hello@cesarcardoso.cc>
7
+ License-Expression: MIT
7
8
  Requires-Dist: beautifulsoup4>=4.14.3
8
9
  Requires-Dist: google-genai>=1.55.0
9
10
  Requires-Dist: markdown>=3.10
@@ -12,11 +13,14 @@ Requires-Dist: python-dotenv>=1.2.1
12
13
  Requires-Dist: pyyaml>=6.0.3
13
14
  Requires-Dist: requests>=2.32.5
14
15
  Requires-Dist: rich>=14.2.0
16
+ Requires-Dist: selenium>=4.39.0
15
17
  Requires-Dist: urlextract>=1.9.0
16
18
  Requires-Python: >=3.11
19
+ Project-URL: Changelog, https://github.com/bouli/ohmyscrapper/releases/latest
20
+ Project-URL: Repository, https://github.com/bouli/ohmyscrapper
17
21
  Description-Content-Type: text/markdown
18
22
 
19
- # 🐶 OhMyScrapper - v0.7.0
23
+ # 🐶 OhMyScrapper - v0.8.2
20
24
 
21
25
  OhMyScrapper scrapes texts and urls looking for links and jobs-data to create a
22
26
  final report with general information about job positions.
@@ -0,0 +1,22 @@
1
+ ohmyscrapper/__init__.py,sha256=WzXXhhlVkyAPbqeo7NgezLb6TbEJcuFf8JYAdcf3zBE,7678
2
+ ohmyscrapper/__main__.py,sha256=5BjNuyet8AY-POwoF5rGt722rHQ7tJ0Vf0UFUfzzi-I,58
3
+ ohmyscrapper/core/config.py,sha256=bfmoTr1j4SnIhKFZC_F9fh4Y90VqSPFf_g6Rm-aNui4,3184
4
+ ohmyscrapper/core/config_files.py,sha256=3mIXVxurmyXCpKueyyGsZ6lUnV8VJ2gnLU2QaqhWhhI,3410
5
+ ohmyscrapper/core/default_files/config.yaml,sha256=y54QAjOnogpl8LEzhmn89tAfRzle4ZWWtIbYRjxX8Rk,341
6
+ ohmyscrapper/core/default_files/url_sniffing.yaml,sha256=HUwmGUwuJy7t97bJHgNiZOl1thvD9bLaelPgbEr5bMY,465
7
+ ohmyscrapper/core/default_files/url_types.yaml,sha256=20kvv8_iWRT-pLa014RXYpAmPSonn6tDnG302rx7l-o,228
8
+ ohmyscrapper/models/urls_manager.py,sha256=sP2T4k1HOj8ccaVGWbuhfw1BDfOUSVL4_WR9vRyHjOA,12115
9
+ ohmyscrapper/modules/browser.py,sha256=6AaNFQ7jV91DvHqbsBT6It_-tNbVN2qJC_c1vXTweJY,856
10
+ ohmyscrapper/modules/classify_urls.py,sha256=GhiosAQUITy1DQe_PksYV9QRKVTgpkSE28dkutzbWVA,1038
11
+ ohmyscrapper/modules/load_txt.py,sha256=pkWBIdh6vORPfENDZ6wGM89vswnOnc1flqKfkLs9RD8,4138
12
+ ohmyscrapper/modules/merge_dbs.py,sha256=0pK3PPUGSbnaDkdpQUGCHemOVaKO37bfHwnsy_EVpWQ,115
13
+ ohmyscrapper/modules/process_with_ai.py,sha256=kl39Jzl-PUwh6AfmTZ9SLFUYs9Sk4biqgt8rNz3X1FA,7255
14
+ ohmyscrapper/modules/scrap_urls.py,sha256=_e4jT7eBWGP6cqI6RaD0xzNX1vCFBx96JIBPGW3mAV4,6627
15
+ ohmyscrapper/modules/seed.py,sha256=hHEGSoPXsmclTaRPeIcK2oC1Xpg3_JqBv_YFMD0m5Jw,1044
16
+ ohmyscrapper/modules/show.py,sha256=jsAs4g8ouA9wymkBfkDCbpVWKD-m_20uKG-m1cZAUGA,3877
17
+ ohmyscrapper/modules/sniff_url.py,sha256=BZphbr2V7MDyFPW7APlh7_CLTtc_u3kcB7DY2QjVVQo,6579
18
+ ohmyscrapper/modules/untouch_all.py,sha256=DAwWYfqMFifHPtFCxSamu0AxHCgk6aJbTnBy6wLucXM,167
19
+ ohmyscrapper-0.8.2.dist-info/WHEEL,sha256=xDCZ-UyfvkGuEHPeI7BcJzYKIZzdqN8A8o1M5Om8IyA,79
20
+ ohmyscrapper-0.8.2.dist-info/entry_points.txt,sha256=BZud6D16XkfjelDa4Z33mji-KJbbZXgq2FoLrzjru5I,52
21
+ ohmyscrapper-0.8.2.dist-info/METADATA,sha256=2OnXXefFcRT_ChEVdL6LKZoe6iNKyFqjoN10LBUBB34,4293
22
+ ohmyscrapper-0.8.2.dist-info/RECORD,,
@@ -1,21 +0,0 @@
1
- ohmyscrapper/__init__.py,sha256=w5Ty9eszf8tEv72IQrFov0YbZWMqsraq448xhX3YGQs,6493
2
- ohmyscrapper/__main__.py,sha256=5BjNuyet8AY-POwoF5rGt722rHQ7tJ0Vf0UFUfzzi-I,58
3
- ohmyscrapper/core/config.py,sha256=i_RA-zReNQIWWmsFar85qzRUqdqvTFMPeCP7Hya7ltU,2996
4
- ohmyscrapper/core/config_files.py,sha256=KC3yChTnlclclU9EKTqFBoAu9p6XdOKuegub5NPYDDY,2434
5
- ohmyscrapper/core/default_files/config.yaml,sha256=bgPBVlze2tOCbyrA47h_5BJ35UsXnqsjQszzy0vn-Pw,248
6
- ohmyscrapper/core/default_files/url_sniffing.yaml,sha256=MKdVR5HQ1i2yTRw2ijzxPSmIyhUno_R4L2k17r3EBBc,417
7
- ohmyscrapper/core/default_files/url_types.yaml,sha256=20kvv8_iWRT-pLa014RXYpAmPSonn6tDnG302rx7l-o,228
8
- ohmyscrapper/models/urls_manager.py,sha256=FC1j72M1gzNwC_PzPqnew986b-BI6s7zUv8Z7HiM1M0,11849
9
- ohmyscrapper/modules/classify_urls.py,sha256=GhiosAQUITy1DQe_PksYV9QRKVTgpkSE28dkutzbWVA,1038
10
- ohmyscrapper/modules/load_txt.py,sha256=dNkUZ2ehBiPx-q4fPczRiHFvnpzCrjeycFtexhWGmEE,3967
11
- ohmyscrapper/modules/merge_dbs.py,sha256=0pK3PPUGSbnaDkdpQUGCHemOVaKO37bfHwnsy_EVpWQ,115
12
- ohmyscrapper/modules/process_with_ai.py,sha256=kl39Jzl-PUwh6AfmTZ9SLFUYs9Sk4biqgt8rNz3X1FA,7255
13
- ohmyscrapper/modules/scrap_urls.py,sha256=CNoEC-d1r-u4qxnEVimm4ctP6MJGdU8y8VI2Nx0bBdM,6033
14
- ohmyscrapper/modules/seed.py,sha256=qDUE7TWx9iNQEzqThK4p7g8pTZjdpkmoqI8kOo_zdtk,983
15
- ohmyscrapper/modules/show.py,sha256=jsAs4g8ouA9wymkBfkDCbpVWKD-m_20uKG-m1cZAUGA,3877
16
- ohmyscrapper/modules/sniff_url.py,sha256=zJ2Uox2aUdQibL4UFLxg3t7GqJ7WwWEl0q3QSUbMEbc,4960
17
- ohmyscrapper/modules/untouch_all.py,sha256=DAwWYfqMFifHPtFCxSamu0AxHCgk6aJbTnBy6wLucXM,167
18
- ohmyscrapper-0.7.0.dist-info/WHEEL,sha256=xDCZ-UyfvkGuEHPeI7BcJzYKIZzdqN8A8o1M5Om8IyA,79
19
- ohmyscrapper-0.7.0.dist-info/entry_points.txt,sha256=BZud6D16XkfjelDa4Z33mji-KJbbZXgq2FoLrzjru5I,52
20
- ohmyscrapper-0.7.0.dist-info/METADATA,sha256=Doakf4oDT6oskPGdSlEoRJHBxUmm9FhWaHfDlNIfNuM,4096
21
- ohmyscrapper-0.7.0.dist-info/RECORD,,