blue-assistant 4.283.1__py3-none-any.whl → 4.288.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,7 @@ ICON = "🧠"
4
4
 
5
5
  DESCRIPTION = f"{ICON} An AI Assistant."
6
6
 
7
- VERSION = "4.283.1"
7
+ VERSION = "4.288.1"
8
8
 
9
9
  REPO_NAME = "blue-assistant"
10
10
 
@@ -36,14 +36,14 @@ def web_crawl(
36
36
  seed_urls = script.vars[seed_url_var_name]
37
37
  log_list(logger, "using", seed_urls, "seed url(s)")
38
38
 
39
- visited_urls = crawl_list_of_urls(
39
+ crawl_cache = crawl_list_of_urls(
40
40
  seed_urls=seed_urls,
41
41
  object_name=script.object_name,
42
42
  max_iterations=script.nodes[node_name]["max_iterations"],
43
43
  use_cache=use_cache,
44
+ cache_prefix=node_name,
44
45
  )
45
46
 
46
- script.nodes[node_name]["visited_urls"] = visited_urls
47
- script.nodes[node_name]["output"] = "TBA"
47
+ script.nodes[node_name]["output"] = crawl_cache
48
48
 
49
49
  return True
@@ -20,6 +20,7 @@ def crawl_list_of_urls(
20
20
  max_iterations: int = 10,
21
21
  use_cache: bool = False,
22
22
  verbose: bool = False,
23
+ cache_prefix: str = "",
23
24
  ) -> Dict[str, str]:
24
25
  logger.info(
25
26
  "{}.crawl_list_of_urls({}): {} -{}> {}".format(
@@ -56,8 +57,10 @@ def crawl_list_of_urls(
56
57
  queue = queue[1:]
57
58
 
58
59
  logger.info(
59
- "{} {} ...".format(
60
+ "{} [#{:,}/{:,}]: {} ".format(
60
61
  "✅ " if url in crawl_cache else "🔗 ",
62
+ iteration,
63
+ len(queue),
61
64
  url,
62
65
  )
63
66
  )
@@ -74,23 +77,26 @@ def crawl_list_of_urls(
74
77
  file.save_yaml(
75
78
  filename=objects.path_of(
76
79
  object_name=object_name,
77
- filename="crawl_summary_cache/{}.yaml".format(url_to_filename(url)),
80
+ filename="{}-crawl_cache/{}.yaml".format(
81
+ cache_prefix,
82
+ url_to_filename(url),
83
+ ),
78
84
  ),
79
85
  data=url_summary,
80
86
  )
81
87
 
82
88
  crawl_cache[url] = content_type
83
- if "list_of_urls" in url_summary:
84
- queue = list(
85
- set(
86
- queue
87
- + [
88
- url
89
- for url in url_summary["list_of_urls"]
90
- if url not in crawl_cache.keys()
91
- ]
92
- )
93
- )
89
+
90
+ queue = (
91
+ queue
92
+ + url_summary.get("list_of_urls", [])
93
+ + [
94
+ url
95
+ for url in url_summary.get("list_of_ignored_urls", [])
96
+ if any(url.startswith(url_prefix) for url_prefix in seed_urls)
97
+ ]
98
+ )
99
+ queue = list({url for url in queue if url not in crawl_cache.keys()})
94
100
 
95
101
  iteration += 1
96
102
  if max_iterations != -1 and iteration >= max_iterations:
@@ -49,6 +49,9 @@ def fetch_links_and_text(
49
49
  for a_tag in soup.find_all("a", href=True):
50
50
  a_url = urljoin(url, a_tag["href"])
51
51
 
52
+ if "#" in a_url:
53
+ a_url = a_url.split("#", 1)[0]
54
+
52
55
  if a_url.startswith(url):
53
56
  if url not in list_of_urls:
54
57
  logger.info(f"+= {a_url}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: blue_assistant
3
- Version: 4.283.1
3
+ Version: 4.288.1
4
4
  Summary: 🧠 An AI Assistant.
5
5
  Home-page: https://github.com/kamangir/blue-assistant
6
6
  Author: Arash Abadpour (Kamangir)
@@ -124,4 +124,4 @@ Also home to [`@web`](https://raw.githubusercontent.com/kamangir/blue-assistant/
124
124
 
125
125
  [![pylint](https://github.com/kamangir/blue-assistant/actions/workflows/pylint.yml/badge.svg)](https://github.com/kamangir/blue-assistant/actions/workflows/pylint.yml) [![pytest](https://github.com/kamangir/blue-assistant/actions/workflows/pytest.yml/badge.svg)](https://github.com/kamangir/blue-assistant/actions/workflows/pytest.yml) [![bashtest](https://github.com/kamangir/blue-assistant/actions/workflows/bashtest.yml/badge.svg)](https://github.com/kamangir/blue-assistant/actions/workflows/bashtest.yml) [![PyPI version](https://img.shields.io/pypi/v/blue-assistant.svg)](https://pypi.org/project/blue-assistant/) [![PyPI - Downloads](https://img.shields.io/pypi/dd/blue-assistant)](https://pypistats.org/packages/blue-assistant)
126
126
 
127
- built by 🌀 [`blue_options-4.240.1`](https://github.com/kamangir/awesome-bash-cli), based on 🧠 [`blue_assistant-4.283.1`](https://github.com/kamangir/blue-assistant).
127
+ built by 🌀 [`blue_options-4.240.1`](https://github.com/kamangir/awesome-bash-cli), based on 🧠 [`blue_assistant-4.288.1`](https://github.com/kamangir/blue-assistant).
@@ -1,5 +1,5 @@
1
1
  blue_assistant/README.py,sha256=EJORj3I5pucJplI86lrFaZBN5C9IYNgKoG_V7h27NHw,2586
2
- blue_assistant/__init__.py,sha256=sPtxnf1X-ZphpOLGOeSuDHB2HL_PYWxMF_E6-7ERMEA,311
2
+ blue_assistant/__init__.py,sha256=-afJ8TPucaY6ohmlQR93hpbM12zWRKiLRL0i_9jeUH4,311
3
3
  blue_assistant/__main__.py,sha256=URtal70XZc0--3FDTYWcLtnGOqBYjMX9gt-L1k8hDXI,361
4
4
  blue_assistant/config.env,sha256=npodyuuhkZUHUv9FnEiQQZkKxFbg8nQb1YpOCURqV3Y,301
5
5
  blue_assistant/env.py,sha256=FTSdJ8-J4jAyI0-h3MBgOweQBWd3YEFIibBHSXpClrY,760
@@ -43,7 +43,7 @@ blue_assistant/script/actions/__init__.py,sha256=GJJCaXSrfhwW9K5A2PHmrS7iGl-Ss9U
43
43
  blue_assistant/script/actions/generate_image.py,sha256=brg3u6e-cZvBLK8B7UAh-CXgmAwyvSN-jE00EXMSF3A,1357
44
44
  blue_assistant/script/actions/generate_text.py,sha256=GQ7sF1J-vBNgr-h01RZHMDQJOGDXx8PzQWEVNQwW_TU,1910
45
45
  blue_assistant/script/actions/generic.py,sha256=EjJkDj82ZFFQbjn-Uib-2Qs-nZG8kR2NzhsEVuOpgWQ,376
46
- blue_assistant/script/actions/web_crawl.py,sha256=LoZXEKOlSEVVVJaSobMqnOjbpumtScaCta3I8TQbV-A,1532
46
+ blue_assistant/script/actions/web_crawl.py,sha256=UlieFTnXJSgnKm0ZWwPdGsdmYuFOYMcuGJ1BzsnKXAw,1510
47
47
  blue_assistant/script/repository/__init__.py,sha256=zVI3cubRqM9H6WgF0EUP9idILVLCumPFmJgKPM7iVlM,604
48
48
  blue_assistant/script/repository/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
49
  blue_assistant/script/repository/base/classes.py,sha256=gM8OB1iKRKLa_dk7esAogAR9WaNZFgLhXkpd1iTtsuQ,5376
@@ -63,11 +63,11 @@ blue_assistant/script/repository/orbital_data_explorer/__init__.py,sha256=yy5FtC
63
63
  blue_assistant/script/repository/orbital_data_explorer/classes.py,sha256=ixYd_FHWYtp8Sfd6AiZkIqePjoUlT9iLg7TvuxHIDzA,204
64
64
  blue_assistant/web/__init__.py,sha256=70_JSpnfX1mLm8Xv3xHIujfr2FfGeHPRs6HraWDP1XA,114
65
65
  blue_assistant/web/__main__.py,sha256=4s2LNikNiT4UTbzOVQzV4j2jUWfmVIbE36WS1BxTWJY,1576
66
- blue_assistant/web/crawl.py,sha256=yeo_HJhX8Pp5E1BC7ZGBzNs_c6pvMYSC3olvr5K27hU,3118
67
- blue_assistant/web/fetch.py,sha256=0hbT246VzpYVCfJ8eflIZWGFMJoxml9vj-sYRCedCH4,2469
66
+ blue_assistant/web/crawl.py,sha256=5RkAyUUU6QVRatJhar0TuvG9u8s3qS-wGu7Dp6MeFX0,3326
67
+ blue_assistant/web/fetch.py,sha256=meso5ssN6OEk2xcPo3VMmFsXLqPlBVZ2FxureWoIYag,2546
68
68
  blue_assistant/web/functions.py,sha256=uJAC_kGOn2wA9AwOB_FB2f1dFYyNaEPPC42lN3klnFc,618
69
- blue_assistant-4.283.1.dist-info/LICENSE,sha256=ogEPNDSH0_dhiv_lT3ifVIdgIzHAqNA_SemnxUfPBJk,7048
70
- blue_assistant-4.283.1.dist-info/METADATA,sha256=SF7LXKUsui8jj3MOhUDTRA54gt2AT5HQ4y_dIsRuA0Q,5380
71
- blue_assistant-4.283.1.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
72
- blue_assistant-4.283.1.dist-info/top_level.txt,sha256=ud0BkBbdOVze13bNqHuhZj1rwCztaBtDf5ChEYzASOs,15
73
- blue_assistant-4.283.1.dist-info/RECORD,,
69
+ blue_assistant-4.288.1.dist-info/LICENSE,sha256=ogEPNDSH0_dhiv_lT3ifVIdgIzHAqNA_SemnxUfPBJk,7048
70
+ blue_assistant-4.288.1.dist-info/METADATA,sha256=yK58Vl9ASNwaBc1Bac4MGYyRtwc6R07SxMlM09DVJJQ,5380
71
+ blue_assistant-4.288.1.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
72
+ blue_assistant-4.288.1.dist-info/top_level.txt,sha256=ud0BkBbdOVze13bNqHuhZj1rwCztaBtDf5ChEYzASOs,15
73
+ blue_assistant-4.288.1.dist-info/RECORD,,