blue-assistant 4.283.1__py3-none-any.whl → 4.288.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- blue_assistant/__init__.py +1 -1
- blue_assistant/script/actions/web_crawl.py +3 -3
- blue_assistant/web/crawl.py +19 -13
- blue_assistant/web/fetch.py +3 -0
- {blue_assistant-4.283.1.dist-info → blue_assistant-4.288.1.dist-info}/METADATA +2 -2
- {blue_assistant-4.283.1.dist-info → blue_assistant-4.288.1.dist-info}/RECORD +9 -9
- {blue_assistant-4.283.1.dist-info → blue_assistant-4.288.1.dist-info}/LICENSE +0 -0
- {blue_assistant-4.283.1.dist-info → blue_assistant-4.288.1.dist-info}/WHEEL +0 -0
- {blue_assistant-4.283.1.dist-info → blue_assistant-4.288.1.dist-info}/top_level.txt +0 -0
blue_assistant/__init__.py
CHANGED
|
@@ -36,14 +36,14 @@ def web_crawl(
|
|
|
36
36
|
seed_urls = script.vars[seed_url_var_name]
|
|
37
37
|
log_list(logger, "using", seed_urls, "seed url(s)")
|
|
38
38
|
|
|
39
|
-
|
|
39
|
+
crawl_cache = crawl_list_of_urls(
|
|
40
40
|
seed_urls=seed_urls,
|
|
41
41
|
object_name=script.object_name,
|
|
42
42
|
max_iterations=script.nodes[node_name]["max_iterations"],
|
|
43
43
|
use_cache=use_cache,
|
|
44
|
+
cache_prefix=node_name,
|
|
44
45
|
)
|
|
45
46
|
|
|
46
|
-
script.nodes[node_name]["
|
|
47
|
-
script.nodes[node_name]["output"] = "TBA"
|
|
47
|
+
script.nodes[node_name]["output"] = crawl_cache
|
|
48
48
|
|
|
49
49
|
return True
|
blue_assistant/web/crawl.py
CHANGED
|
@@ -20,6 +20,7 @@ def crawl_list_of_urls(
|
|
|
20
20
|
max_iterations: int = 10,
|
|
21
21
|
use_cache: bool = False,
|
|
22
22
|
verbose: bool = False,
|
|
23
|
+
cache_prefix: str = "",
|
|
23
24
|
) -> Dict[str, str]:
|
|
24
25
|
logger.info(
|
|
25
26
|
"{}.crawl_list_of_urls({}): {} -{}> {}".format(
|
|
@@ -56,8 +57,10 @@ def crawl_list_of_urls(
|
|
|
56
57
|
queue = queue[1:]
|
|
57
58
|
|
|
58
59
|
logger.info(
|
|
59
|
-
"{} {}
|
|
60
|
+
"{} [#{:,}/{:,}]: {} ".format(
|
|
60
61
|
"✅ " if url in crawl_cache else "🔗 ",
|
|
62
|
+
iteration,
|
|
63
|
+
len(queue),
|
|
61
64
|
url,
|
|
62
65
|
)
|
|
63
66
|
)
|
|
@@ -74,23 +77,26 @@ def crawl_list_of_urls(
|
|
|
74
77
|
file.save_yaml(
|
|
75
78
|
filename=objects.path_of(
|
|
76
79
|
object_name=object_name,
|
|
77
|
-
filename="
|
|
80
|
+
filename="{}-crawl_cache/{}.yaml".format(
|
|
81
|
+
cache_prefix,
|
|
82
|
+
url_to_filename(url),
|
|
83
|
+
),
|
|
78
84
|
),
|
|
79
85
|
data=url_summary,
|
|
80
86
|
)
|
|
81
87
|
|
|
82
88
|
crawl_cache[url] = content_type
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
89
|
+
|
|
90
|
+
queue = (
|
|
91
|
+
queue
|
|
92
|
+
+ url_summary.get("list_of_urls", [])
|
|
93
|
+
+ [
|
|
94
|
+
url
|
|
95
|
+
for url in url_summary.get("list_of_ignored_urls", [])
|
|
96
|
+
if any(url.startswith(url_prefix) for url_prefix in seed_urls)
|
|
97
|
+
]
|
|
98
|
+
)
|
|
99
|
+
queue = list({url for url in queue if url not in crawl_cache.keys()})
|
|
94
100
|
|
|
95
101
|
iteration += 1
|
|
96
102
|
if max_iterations != -1 and iteration >= max_iterations:
|
blue_assistant/web/fetch.py
CHANGED
|
@@ -49,6 +49,9 @@ def fetch_links_and_text(
|
|
|
49
49
|
for a_tag in soup.find_all("a", href=True):
|
|
50
50
|
a_url = urljoin(url, a_tag["href"])
|
|
51
51
|
|
|
52
|
+
if "#" in a_url:
|
|
53
|
+
a_url = a_url.split("#", 1)[0]
|
|
54
|
+
|
|
52
55
|
if a_url.startswith(url):
|
|
53
56
|
if url not in list_of_urls:
|
|
54
57
|
logger.info(f"+= {a_url}")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: blue_assistant
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.288.1
|
|
4
4
|
Summary: 🧠 An AI Assistant.
|
|
5
5
|
Home-page: https://github.com/kamangir/blue-assistant
|
|
6
6
|
Author: Arash Abadpour (Kamangir)
|
|
@@ -124,4 +124,4 @@ Also home to [`@web`](https://raw.githubusercontent.com/kamangir/blue-assistant/
|
|
|
124
124
|
|
|
125
125
|
[](https://github.com/kamangir/blue-assistant/actions/workflows/pylint.yml) [](https://github.com/kamangir/blue-assistant/actions/workflows/pytest.yml) [](https://github.com/kamangir/blue-assistant/actions/workflows/bashtest.yml) [](https://pypi.org/project/blue-assistant/) [](https://pypistats.org/packages/blue-assistant)
|
|
126
126
|
|
|
127
|
-
built by 🌀 [`blue_options-4.240.1`](https://github.com/kamangir/awesome-bash-cli), based on 🧠 [`blue_assistant-4.
|
|
127
|
+
built by 🌀 [`blue_options-4.240.1`](https://github.com/kamangir/awesome-bash-cli), based on 🧠 [`blue_assistant-4.288.1`](https://github.com/kamangir/blue-assistant).
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
blue_assistant/README.py,sha256=EJORj3I5pucJplI86lrFaZBN5C9IYNgKoG_V7h27NHw,2586
|
|
2
|
-
blue_assistant/__init__.py,sha256
|
|
2
|
+
blue_assistant/__init__.py,sha256=-afJ8TPucaY6ohmlQR93hpbM12zWRKiLRL0i_9jeUH4,311
|
|
3
3
|
blue_assistant/__main__.py,sha256=URtal70XZc0--3FDTYWcLtnGOqBYjMX9gt-L1k8hDXI,361
|
|
4
4
|
blue_assistant/config.env,sha256=npodyuuhkZUHUv9FnEiQQZkKxFbg8nQb1YpOCURqV3Y,301
|
|
5
5
|
blue_assistant/env.py,sha256=FTSdJ8-J4jAyI0-h3MBgOweQBWd3YEFIibBHSXpClrY,760
|
|
@@ -43,7 +43,7 @@ blue_assistant/script/actions/__init__.py,sha256=GJJCaXSrfhwW9K5A2PHmrS7iGl-Ss9U
|
|
|
43
43
|
blue_assistant/script/actions/generate_image.py,sha256=brg3u6e-cZvBLK8B7UAh-CXgmAwyvSN-jE00EXMSF3A,1357
|
|
44
44
|
blue_assistant/script/actions/generate_text.py,sha256=GQ7sF1J-vBNgr-h01RZHMDQJOGDXx8PzQWEVNQwW_TU,1910
|
|
45
45
|
blue_assistant/script/actions/generic.py,sha256=EjJkDj82ZFFQbjn-Uib-2Qs-nZG8kR2NzhsEVuOpgWQ,376
|
|
46
|
-
blue_assistant/script/actions/web_crawl.py,sha256=
|
|
46
|
+
blue_assistant/script/actions/web_crawl.py,sha256=UlieFTnXJSgnKm0ZWwPdGsdmYuFOYMcuGJ1BzsnKXAw,1510
|
|
47
47
|
blue_assistant/script/repository/__init__.py,sha256=zVI3cubRqM9H6WgF0EUP9idILVLCumPFmJgKPM7iVlM,604
|
|
48
48
|
blue_assistant/script/repository/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
49
49
|
blue_assistant/script/repository/base/classes.py,sha256=gM8OB1iKRKLa_dk7esAogAR9WaNZFgLhXkpd1iTtsuQ,5376
|
|
@@ -63,11 +63,11 @@ blue_assistant/script/repository/orbital_data_explorer/__init__.py,sha256=yy5FtC
|
|
|
63
63
|
blue_assistant/script/repository/orbital_data_explorer/classes.py,sha256=ixYd_FHWYtp8Sfd6AiZkIqePjoUlT9iLg7TvuxHIDzA,204
|
|
64
64
|
blue_assistant/web/__init__.py,sha256=70_JSpnfX1mLm8Xv3xHIujfr2FfGeHPRs6HraWDP1XA,114
|
|
65
65
|
blue_assistant/web/__main__.py,sha256=4s2LNikNiT4UTbzOVQzV4j2jUWfmVIbE36WS1BxTWJY,1576
|
|
66
|
-
blue_assistant/web/crawl.py,sha256=
|
|
67
|
-
blue_assistant/web/fetch.py,sha256=
|
|
66
|
+
blue_assistant/web/crawl.py,sha256=5RkAyUUU6QVRatJhar0TuvG9u8s3qS-wGu7Dp6MeFX0,3326
|
|
67
|
+
blue_assistant/web/fetch.py,sha256=meso5ssN6OEk2xcPo3VMmFsXLqPlBVZ2FxureWoIYag,2546
|
|
68
68
|
blue_assistant/web/functions.py,sha256=uJAC_kGOn2wA9AwOB_FB2f1dFYyNaEPPC42lN3klnFc,618
|
|
69
|
-
blue_assistant-4.
|
|
70
|
-
blue_assistant-4.
|
|
71
|
-
blue_assistant-4.
|
|
72
|
-
blue_assistant-4.
|
|
73
|
-
blue_assistant-4.
|
|
69
|
+
blue_assistant-4.288.1.dist-info/LICENSE,sha256=ogEPNDSH0_dhiv_lT3ifVIdgIzHAqNA_SemnxUfPBJk,7048
|
|
70
|
+
blue_assistant-4.288.1.dist-info/METADATA,sha256=yK58Vl9ASNwaBc1Bac4MGYyRtwc6R07SxMlM09DVJJQ,5380
|
|
71
|
+
blue_assistant-4.288.1.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
|
|
72
|
+
blue_assistant-4.288.1.dist-info/top_level.txt,sha256=ud0BkBbdOVze13bNqHuhZj1rwCztaBtDf5ChEYzASOs,15
|
|
73
|
+
blue_assistant-4.288.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|