blue-assistant 4.307.1__py3-none-any.whl → 4.319.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,7 @@ ICON = "🧠"
4
4
 
5
5
  DESCRIPTION = f"{ICON} An AI Assistant."
6
6
 
7
- VERSION = "4.307.1"
7
+ VERSION = "4.319.1"
8
8
 
9
9
  REPO_NAME = "blue-assistant"
10
10
 
@@ -37,6 +37,7 @@ def help_run(
37
37
  args = [
38
38
  "[--test_mode 1]",
39
39
  "[--verbose 1]",
40
+ "[--runnable <~node_1,~node_2>]",
40
41
  ]
41
42
 
42
43
  return show_usage(
@@ -48,6 +48,13 @@ parser.add_argument(
48
48
  default=1,
49
49
  help="0 | 1",
50
50
  )
51
+ parser.add_argument(
52
+ "--runnable",
53
+ type=str,
54
+ default="",
55
+ help="~node_1,~node_2",
56
+ )
57
+
51
58
  args = parser.parse_args()
52
59
 
53
60
  delim = " " if args.delim == "space" else args.delim
@@ -70,7 +77,9 @@ elif args.task == "run":
70
77
  )
71
78
 
72
79
  if success:
73
- success = script.run()
80
+ success = script.run(
81
+ runnable=args.runnable,
82
+ )
74
83
  else:
75
84
  success = None
76
85
 
@@ -4,6 +4,7 @@ from blue_options.logger import log_list
4
4
 
5
5
  from blue_assistant import NAME
6
6
  from blue_assistant.web.crawl import crawl_list_of_urls
7
+ from blue_assistant.web.functions import normalize_url
7
8
  from blue_assistant.script.repository.base.root import RootScript
8
9
  from blue_assistant.logger import logger
9
10
 
@@ -32,16 +33,16 @@ def web_crawl(
32
33
  if seed_url_var_name not in script.vars:
33
34
  logger.error(f"{node_name}: {seed_url_var_name}: seed_urls not found in vars.")
34
35
  return False
35
- seed_urls = script.vars[seed_url_var_name]
36
+ seed_urls = list({normalize_url(url) for url in script.vars[seed_url_var_name]})
36
37
  log_list(logger, "using", seed_urls, "seed url(s)")
37
38
 
38
- success, _ = crawl_list_of_urls(
39
+ success, crawl_cache = crawl_list_of_urls(
39
40
  seed_urls=seed_urls,
40
41
  object_name=script.object_name,
41
42
  max_iterations=script.nodes[node_name]["max_iterations"],
42
43
  cache_prefix=node_name,
43
44
  )
44
45
 
45
- script.nodes[node_name]["output"] = success
46
+ script.nodes[node_name]["output"] = crawl_cache
46
47
 
47
48
  return success
@@ -1,14 +1,7 @@
1
- from typing import Dict, List, Callable
2
- import os
3
- import networkx as nx
4
- from functools import reduce
5
- from tqdm import tqdm
6
1
  import copy
7
2
 
8
3
  from blueness import module
9
- from blue_objects import file, objects, path
10
- from blue_objects.metadata import post_to_object
11
- from blueflow.workflow import dot_file
4
+ from blue_objects import file, path
12
5
 
13
6
  from blue_assistant import NAME
14
7
  from blue_assistant.script.repository.base.root import RootScript
@@ -22,230 +15,7 @@ NAME = module.name(__file__, NAME)
22
15
  class BaseScript(RootScript):
23
16
  name = path.name(file.path(__file__))
24
17
 
25
- def __init__(
26
- self,
27
- object_name: str,
28
- test_mode: bool = False,
29
- verbose: bool = False,
30
- ):
31
- self.object_name = object_name
32
-
33
- self.test_mode = test_mode
34
-
35
- self.verbose = verbose
18
+ def __init__(self, **kwargs):
19
+ super().__init__(**kwargs)
36
20
 
37
21
  self.dict_of_actions = copy.deepcopy(dict_of_actions)
38
-
39
- metadata_filename = os.path.join(
40
- file.path(__file__),
41
- f"../{self.name}",
42
- "metadata.yaml",
43
- )
44
- self.metadata: Dict
45
- success, self.metadata = file.load_yaml(metadata_filename)
46
- assert success, f"cannot load {self.name}/metadata.yaml"
47
-
48
- self.metadata.setdefault("script", {})
49
- assert isinstance(
50
- self.script,
51
- dict,
52
- ), "script: expected dict, received {}.".format(
53
- self.script.__class__.__name__,
54
- )
55
-
56
- self.script.setdefault("nodes", {})
57
- assert isinstance(
58
- self.nodes,
59
- dict,
60
- ), "nodes: expected dict, received {}.".format(
61
- self.nodes.__class__.__name__,
62
- )
63
-
64
- self.script.setdefault("vars", {})
65
- assert isinstance(
66
- self.vars,
67
- dict,
68
- ), "vars: expected dict, received {}.".format(
69
- self.vars.__class__.__name__,
70
- )
71
-
72
- if self.test_mode:
73
- logger.info("🧪 test mode is on.")
74
-
75
- if "test_mode" in self.script:
76
- updates = self.script["test_mode"]
77
- logger.info(f"🧪 vars.update({updates})")
78
- self.vars.update(updates)
79
-
80
- for node_name, node in self.nodes.items():
81
- if "test_mode" in node:
82
- updates = node["test_mode"]
83
- logger.info(f"🧪 {node_name}.update({updates})")
84
- node.update(updates)
85
-
86
- logger.info(
87
- "loaded {} node(s): {}".format(
88
- len(self.nodes),
89
- ", ".join(self.nodes.keys()),
90
- )
91
- )
92
-
93
- logger.info(
94
- "loaded {} var(s): {}".format(
95
- len(self.vars),
96
- ", ".join(self.vars.keys()),
97
- )
98
- )
99
- if verbose:
100
- for var_name, var_value in self.vars.items():
101
- logger.info("{}: {}".format(var_name, var_value))
102
-
103
- assert self.generate_graph(), "cannot generate graph."
104
-
105
- def __str__(self) -> str:
106
- return "{}[{} var(s), {} node(s) -> {}]".format(
107
- self.__class__.__name__,
108
- len(self.vars),
109
- len(self.nodes),
110
- self.object_name,
111
- )
112
-
113
- def apply_vars(self, text: str) -> str:
114
- for var_name, var_value in self.vars.items():
115
- text = text.replace(f":::{var_name}", str(var_value))
116
-
117
- for node_name, node in self.nodes.items():
118
- text = text.replace(f":::{node_name}", node.get("output", ""))
119
-
120
- return text
121
-
122
- def generate_graph(
123
- self,
124
- verbose: bool = False,
125
- ) -> bool:
126
- self.G: nx.DiGraph = nx.DiGraph()
127
-
128
- list_of_nodes = list(self.nodes.keys())
129
- for node in self.nodes.values():
130
- list_of_nodes += node.get("depends-on", "").split(",")
131
-
132
- list_of_nodes = list({node_name for node_name in list_of_nodes if node_name})
133
- if verbose:
134
- logger.info(
135
- "{} node(s): {}".format(
136
- len(list_of_nodes),
137
- ", ".join(list_of_nodes),
138
- )
139
- )
140
-
141
- for node_name in list_of_nodes:
142
- self.G.add_node(node_name)
143
-
144
- for node_name, node in self.nodes.items():
145
- for dependency in node.get("depends-on", "").split(","):
146
- if dependency:
147
- self.G.add_edge(node_name, dependency)
148
-
149
- return self.save_graph()
150
-
151
- def get_context(
152
- self,
153
- node_name: str,
154
- ) -> List[str]:
155
- return reduce(
156
- lambda x, y: x + y,
157
- [self.get_context(successor) for successor in self.G.successors(node_name)],
158
- [node_name],
159
- )
160
-
161
- def perform_action(
162
- self,
163
- node_name: str,
164
- ) -> bool:
165
- action_name = self.nodes[node_name].get("action", "unknown")
166
- logger.info(f"---- node: {node_name} ---- ")
167
-
168
- if action_name in self.dict_of_actions:
169
- return self.dict_of_actions[action_name](
170
- script=self,
171
- node_name=node_name,
172
- )
173
-
174
- logger.error(f"{action_name}: action not found.")
175
- return False
176
-
177
- def run(self) -> bool:
178
- logger.info(f"{self.name}.run -> {self.object_name}")
179
-
180
- success: bool = True
181
- while (
182
- not all(self.nodes[node].get("completed", False) for node in self.nodes)
183
- and success
184
- ):
185
- for node_name in tqdm(self.nodes):
186
- if self.nodes[node_name].get("completed", False):
187
- continue
188
-
189
- if not self.nodes[node_name].get("runnable", True):
190
- logger.info(f"Not runnable, skipped: {node_name}.")
191
- self.nodes[node_name]["completed"] = True
192
- continue
193
-
194
- pending_dependencies = [
195
- node_name_
196
- for node_name_ in self.G.successors(node_name)
197
- if not self.nodes[node_name_].get("completed", False)
198
- ]
199
- if pending_dependencies:
200
- logger.info(
201
- 'node "{}": {} pending dependenci(es): {}'.format(
202
- node_name,
203
- len(pending_dependencies),
204
- ", ".join(pending_dependencies),
205
- )
206
- )
207
- continue
208
-
209
- if not self.perform_action(node_name=node_name):
210
- success = False
211
- break
212
-
213
- self.nodes[node_name]["completed"] = True
214
-
215
- if not post_to_object(
216
- self.object_name,
217
- "output",
218
- self.metadata,
219
- ):
220
- success = False
221
-
222
- return success
223
-
224
- def save_graph(self) -> bool:
225
- return dot_file.save_to_file(
226
- objects.path_of(
227
- filename="workflow.dot",
228
- object_name=self.object_name,
229
- ),
230
- self.G,
231
- caption=" | ".join(
232
- [
233
- self.name,
234
- self.object_name,
235
- ]
236
- ),
237
- add_legend=False,
238
- )
239
-
240
- # Aliases
241
- @property
242
- def script(self) -> Dict:
243
- return self.metadata["script"]
244
-
245
- @property
246
- def nodes(self) -> Dict[str, Dict]:
247
- return self.metadata["script"]["nodes"]
248
-
249
- @property
250
- def vars(self) -> Dict:
251
- return self.metadata["script"]["vars"]
@@ -1,5 +1,276 @@
1
- from blue_objects import file, objects, path
1
+ import os
2
+ from typing import Dict, List
3
+ from functools import reduce
4
+ import networkx as nx
5
+ from tqdm import tqdm
6
+
7
+ from blue_options.options import Options
8
+ from blue_objects import file, path, objects
9
+ from blue_objects.metadata import post_to_object
10
+ from blueflow.workflow import dot_file
11
+
12
+ from blue_assistant.logger import logger
2
13
 
3
14
 
4
15
  class RootScript:
5
16
  name = path.name(file.path(__file__))
17
+
18
+ def __init__(
19
+ self,
20
+ object_name: str,
21
+ test_mode: bool = False,
22
+ verbose: bool = False,
23
+ ):
24
+ self.nodes_changed = False
25
+
26
+ self.object_name = object_name
27
+
28
+ self.test_mode = test_mode
29
+
30
+ self.verbose = verbose
31
+
32
+ self.dict_of_actions = {}
33
+
34
+ metadata_filename = os.path.join(
35
+ file.path(__file__),
36
+ f"../{self.name}",
37
+ "metadata.yaml",
38
+ )
39
+ self.metadata: Dict
40
+ success, self.metadata = file.load_yaml(metadata_filename)
41
+ assert success, f"cannot load {self.name}/metadata.yaml"
42
+
43
+ self.metadata.setdefault("script", {})
44
+ assert isinstance(
45
+ self.script,
46
+ dict,
47
+ ), "script: expected dict, received {}.".format(
48
+ self.script.__class__.__name__,
49
+ )
50
+
51
+ self.script.setdefault("nodes", {})
52
+ assert isinstance(
53
+ self.nodes,
54
+ dict,
55
+ ), "nodes: expected dict, received {}.".format(
56
+ self.nodes.__class__.__name__,
57
+ )
58
+
59
+ self.script.setdefault("vars", {})
60
+ assert isinstance(
61
+ self.vars,
62
+ dict,
63
+ ), "vars: expected dict, received {}.".format(
64
+ self.vars.__class__.__name__,
65
+ )
66
+
67
+ if self.test_mode:
68
+ logger.info("🧪 test mode is on.")
69
+
70
+ if "test_mode" in self.script:
71
+ updates = self.script["test_mode"]
72
+ logger.info(f"🧪 vars.update({updates})")
73
+ self.vars.update(updates)
74
+
75
+ for node_name, node in self.nodes.items():
76
+ if "test_mode" in node:
77
+ updates = node["test_mode"]
78
+ logger.info(f"🧪 {node_name}.update({updates})")
79
+ node.update(updates)
80
+
81
+ logger.info(
82
+ "loaded {} node(s): {}".format(
83
+ len(self.nodes),
84
+ ", ".join(self.nodes.keys()),
85
+ )
86
+ )
87
+
88
+ logger.info(
89
+ "loaded {} var(s): {}".format(
90
+ len(self.vars),
91
+ ", ".join(self.vars.keys()),
92
+ )
93
+ )
94
+ if verbose:
95
+ for var_name, var_value in self.vars.items():
96
+ logger.info("{}: {}".format(var_name, var_value))
97
+
98
+ assert self.generate_graph(), "cannot generate graph."
99
+
100
+ def __str__(self) -> str:
101
+ return "{}[{} var(s), {} node(s) -> {}]".format(
102
+ self.__class__.__name__,
103
+ len(self.vars),
104
+ len(self.nodes),
105
+ self.object_name,
106
+ )
107
+
108
+ def apply_vars(self, text: str) -> str:
109
+ for var_name, var_value in self.vars.items():
110
+ text = text.replace(f":::{var_name}", str(var_value))
111
+
112
+ for node_name, node in self.nodes.items():
113
+ node_output = node.get("output", "")
114
+ if isinstance(node_output, str):
115
+ text = text.replace(f":::{node_name}", node_output)
116
+
117
+ return text
118
+
119
+ def generate_graph(
120
+ self,
121
+ verbose: bool = False,
122
+ ) -> bool:
123
+ self.G: nx.DiGraph = nx.DiGraph()
124
+
125
+ list_of_nodes = list(self.nodes.keys())
126
+ for node in self.nodes.values():
127
+ list_of_nodes += node.get("depends-on", "").split(",")
128
+
129
+ list_of_nodes = list({node_name for node_name in list_of_nodes if node_name})
130
+ if verbose:
131
+ logger.info(
132
+ "{} node(s): {}".format(
133
+ len(list_of_nodes),
134
+ ", ".join(list_of_nodes),
135
+ )
136
+ )
137
+
138
+ for node_name in list_of_nodes:
139
+ self.G.add_node(node_name)
140
+
141
+ for node_name, node in self.nodes.items():
142
+ for dependency in node.get("depends-on", "").split(","):
143
+ if dependency:
144
+ self.G.add_edge(node_name, dependency)
145
+
146
+ return self.save_graph()
147
+
148
+ def get_context(
149
+ self,
150
+ node_name: str,
151
+ ) -> List[str]:
152
+ return reduce(
153
+ lambda x, y: x + y,
154
+ [self.get_context(successor) for successor in self.G.successors(node_name)],
155
+ [node_name],
156
+ )
157
+
158
+ def perform_action(
159
+ self,
160
+ node_name: str,
161
+ ) -> bool:
162
+ action_name = self.nodes[node_name].get("action", "unknown")
163
+ logger.info(f"---- node: {node_name} ---- ")
164
+
165
+ if action_name in self.dict_of_actions:
166
+ return self.dict_of_actions[action_name](
167
+ script=self,
168
+ node_name=node_name,
169
+ )
170
+
171
+ logger.error(f"{action_name}: action not found.")
172
+ return False
173
+
174
+ def run(
175
+ self,
176
+ runnable: str = "",
177
+ ) -> bool:
178
+ logger.info(f"{self.name}.run -> {self.object_name}")
179
+
180
+ if runnable:
181
+ logger.info(f"applying runnables: {runnable}")
182
+ runnable_options = Options(runnable)
183
+ for node_name, node_is_runnable in runnable_options.items():
184
+ logger.info(f"{node_name}.runnable={node_is_runnable}")
185
+ self.nodes[node_name]["runnable"] = node_is_runnable
186
+
187
+ success: bool = True
188
+ while (
189
+ not all(self.nodes[node].get("completed", False) for node in self.nodes)
190
+ and success
191
+ ):
192
+ self.nodes_changed = False
193
+
194
+ for node_name in tqdm(self.nodes):
195
+ if self.nodes[node_name].get("completed", False):
196
+ continue
197
+
198
+ if not self.nodes[node_name].get("runnable", True):
199
+ logger.info(f"Not runnable, skipped: {node_name}.")
200
+ self.nodes[node_name]["completed"] = True
201
+ continue
202
+
203
+ pending_dependencies = [
204
+ node_name_
205
+ for node_name_ in self.G.successors(node_name)
206
+ if not self.nodes[node_name_].get("completed", False)
207
+ ]
208
+ if pending_dependencies:
209
+ logger.info(
210
+ 'node "{}": {} pending dependenci(es): {}'.format(
211
+ node_name,
212
+ len(pending_dependencies),
213
+ ", ".join(pending_dependencies),
214
+ )
215
+ )
216
+ continue
217
+
218
+ if not self.perform_action(node_name=node_name):
219
+ success = False
220
+ break
221
+
222
+ self.nodes[node_name]["completed"] = True
223
+
224
+ cache_filename = self.nodes[node_name].get("cache", "")
225
+ if cache_filename:
226
+ if not file.save_text(
227
+ objects.path_of(
228
+ object_name=self.object_name,
229
+ filename=cache_filename,
230
+ ),
231
+ [self.nodes[node_name].get("output", "")],
232
+ ):
233
+ success = False
234
+ break
235
+
236
+ if self.nodes_changed:
237
+ logger.info("🪄 nodes changed.")
238
+ break
239
+
240
+ if not post_to_object(
241
+ self.object_name,
242
+ "output",
243
+ self.metadata,
244
+ ):
245
+ success = False
246
+
247
+ return success
248
+
249
+ def save_graph(self) -> bool:
250
+ return dot_file.save_to_file(
251
+ objects.path_of(
252
+ filename="workflow.dot",
253
+ object_name=self.object_name,
254
+ ),
255
+ self.G,
256
+ caption=" | ".join(
257
+ [
258
+ self.name,
259
+ self.object_name,
260
+ ]
261
+ ),
262
+ add_legend=False,
263
+ )
264
+
265
+ # Aliases
266
+ @property
267
+ def script(self) -> Dict:
268
+ return self.metadata["script"]
269
+
270
+ @property
271
+ def nodes(self) -> Dict[str, Dict]:
272
+ return self.metadata["script"]["nodes"]
273
+
274
+ @property
275
+ def vars(self) -> Dict:
276
+ return self.metadata["script"]["vars"]
@@ -14,17 +14,8 @@ NAME = module.name(__file__, NAME)
14
14
  class BlueAmoScript(BaseScript):
15
15
  name = path.name(file.path(__file__))
16
16
 
17
- def __init__(
18
- self,
19
- object_name: str,
20
- test_mode: bool = False,
21
- verbose: bool = False,
22
- ):
23
- super().__init__(
24
- object_name=object_name,
25
- test_mode=test_mode,
26
- verbose=verbose,
27
- )
17
+ def __init__(self, **kwargs):
18
+ super().__init__(**kwargs)
28
19
 
29
20
  self.dict_of_actions.update(dict_of_actions)
30
21
 
@@ -0,0 +1,11 @@
1
+ from typing import Dict, Callable
2
+
3
+ from blue_assistant.script.repository.base.classes import BaseScript
4
+ from blue_assistant.script.repository.orbital_data_explorer.actions import (
5
+ expanding_the_extractions,
6
+ )
7
+
8
+
9
+ dict_of_actions: Dict[str, Callable[[BaseScript, str], bool]] = {
10
+ "expanding_the_extractions": expanding_the_extractions.expanding_the_extractions,
11
+ }
@@ -0,0 +1,109 @@
1
+ import copy
2
+ from typing import Dict
3
+
4
+ from blueness import module
5
+ from blue_options.logger import log_dict, log_list
6
+ from blue_objects import file, objects
7
+ from blue_objects.metadata import get_from_object
8
+
9
+ from blue_assistant import NAME
10
+ from blue_assistant.script.repository.base.classes import BaseScript
11
+ from blue_assistant.web.functions import url_to_filename
12
+ from blue_assistant.logger import logger
13
+
14
+ NAME = module.name(__file__, NAME)
15
+
16
+
17
+ def expanding_the_extractions(
18
+ script: BaseScript,
19
+ node_name: str,
20
+ ) -> bool:
21
+ map_node_name = "extraction"
22
+
23
+ crawl_cache: Dict[str, str] = get_from_object(
24
+ script.object_name,
25
+ "web_crawl_cache",
26
+ {},
27
+ )
28
+ log_dict(logger, "using", crawl_cache, "crawled url(s)")
29
+
30
+ list_of_urls = [
31
+ url
32
+ for url, content_type in crawl_cache.items()
33
+ if "html" in content_type
34
+ and not file.exists(
35
+ objects.path_of(
36
+ object_name=script.object_name,
37
+ filename="{}_cache/{}.txt".format(
38
+ map_node_name,
39
+ url_to_filename(url),
40
+ ),
41
+ )
42
+ )
43
+ ]
44
+ log_list(logger, "using", list_of_urls, "crawled unextracted html(s).")
45
+
46
+ max_nodes = min(
47
+ len(list_of_urls),
48
+ script.nodes[node_name]["max_nodes"],
49
+ )
50
+ logger.info(
51
+ "{}: expanding {} X {}...".format(
52
+ NAME,
53
+ map_node_name,
54
+ max_nodes,
55
+ )
56
+ )
57
+
58
+ map_node = script.nodes[map_node_name]
59
+ del script.nodes[map_node_name]
60
+ script.G.remove_node(map_node_name)
61
+
62
+ reduce_node_name = "generating_summary"
63
+ for index in range(max_nodes):
64
+ url = list_of_urls[index]
65
+ index_node_name = f"{map_node_name}_{index+1:03d}"
66
+
67
+ success, url_content = file.load_yaml(
68
+ filename=objects.path_of(
69
+ object_name=script.object_name,
70
+ filename="web_crawl_cache/{}.yaml".format(
71
+ url_to_filename(url),
72
+ ),
73
+ ),
74
+ )
75
+ if not success:
76
+ logger.warning(f"{url}: failed to load url content.")
77
+ continue
78
+ if "text" not in url_content:
79
+ logger.warning(f"{url}: no text found in url content.")
80
+ continue
81
+
82
+ logger.info(f"{url} -{map_node_name}-> {index_node_name}")
83
+
84
+ script.nodes[index_node_name] = copy.deepcopy(map_node)
85
+
86
+ script.nodes[index_node_name]["prompt"] = map_node["prompt"].replace(
87
+ ":::url_content",
88
+ url_content["text"],
89
+ )
90
+
91
+ script.nodes[index_node_name]["url"] = url
92
+ script.nodes[index_node_name]["cache"] = "{}_cache/{}.txt".format(
93
+ map_node_name,
94
+ url_to_filename(url),
95
+ )
96
+
97
+ script.G.add_node(index_node_name)
98
+ script.G.add_edge(
99
+ index_node_name,
100
+ node_name,
101
+ )
102
+ script.G.add_edge(
103
+ reduce_node_name,
104
+ index_node_name,
105
+ )
106
+
107
+ script.nodes_changed = True
108
+
109
+ return script.save_graph()
@@ -1,7 +1,15 @@
1
1
  from blue_objects import file, path
2
2
 
3
3
  from blue_assistant.script.repository.base.classes import BaseScript
4
+ from blue_assistant.script.repository.orbital_data_explorer.actions import (
5
+ dict_of_actions,
6
+ )
4
7
 
5
8
 
6
9
  class OrbitalDataExplorerScript(BaseScript):
7
10
  name = path.name(file.path(__file__))
11
+
12
+ def __init__(self, **kwargs):
13
+ super().__init__(**kwargs)
14
+
15
+ self.dict_of_actions.update(dict_of_actions)
@@ -35,7 +35,7 @@ def crawl_list_of_urls(
35
35
  seed_urls
36
36
  + get_from_object(
37
37
  object_name,
38
- f"{cache_prefix}_crawl_queue",
38
+ f"{cache_prefix}_queue",
39
39
  [],
40
40
  )
41
41
  )
@@ -44,7 +44,7 @@ def crawl_list_of_urls(
44
44
 
45
45
  crawl_cache: Dict[str, str] = get_from_object(
46
46
  object_name,
47
- f"{cache_prefix}_crawl_cache",
47
+ f"{cache_prefix}_cache",
48
48
  {},
49
49
  )
50
50
  log_dict(logger, "loaded cache:", crawl_cache, "url(s)")
@@ -75,7 +75,7 @@ def crawl_list_of_urls(
75
75
  if not file.save_yaml(
76
76
  filename=objects.path_of(
77
77
  object_name=object_name,
78
- filename="{}_crawl_cache/{}.yaml".format(
78
+ filename="{}_cache/{}.yaml".format(
79
79
  cache_prefix,
80
80
  url_to_filename(url),
81
81
  ),
@@ -108,12 +108,12 @@ def crawl_list_of_urls(
108
108
  if not (
109
109
  post_to_object(
110
110
  object_name,
111
- f"{cache_prefix}_crawl_cache",
111
+ f"{cache_prefix}_cache",
112
112
  crawl_cache,
113
113
  )
114
114
  and post_to_object(
115
115
  object_name,
116
- f"{cache_prefix}_crawl_queue",
116
+ f"{cache_prefix}_queue",
117
117
  queue,
118
118
  )
119
119
  ):
@@ -9,6 +9,7 @@ from blueness import module
9
9
  from blue_options.logger import log_long_text, log_list
10
10
 
11
11
  from blue_assistant import NAME
12
+ from blue_assistant.web.functions import normalize_url
12
13
  from blue_assistant.logger import logger
13
14
 
14
15
  warnings.filterwarnings("ignore", category=XMLParsedAsHTMLWarning)
@@ -49,8 +50,7 @@ def fetch_links_and_text(
49
50
  for a_tag in soup.find_all("a", href=True):
50
51
  a_url = urljoin(url, a_tag["href"])
51
52
 
52
- if "#" in a_url:
53
- a_url = a_url.split("#", 1)[0]
53
+ a_url = normalize_url(a_url)
54
54
 
55
55
  if a_url.startswith(url):
56
56
  if url not in list_of_urls:
@@ -1,4 +1,15 @@
1
1
  import re
2
+ from urllib.parse import urlparse, urlunparse
3
+
4
+
5
+ # https://chatgpt.com/c/67d733a0-4be4-8005-bf52-fb9ba32487c2
6
+ def normalize_url(url: str) -> str:
7
+ return urlunparse(
8
+ urlparse(url)._replace(
9
+ query="",
10
+ fragment="",
11
+ ),
12
+ )
2
13
 
3
14
 
4
15
  def url_to_filename(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: blue_assistant
3
- Version: 4.307.1
3
+ Version: 4.319.1
4
4
  Summary: 🧠 An AI Assistant.
5
5
  Home-page: https://github.com/kamangir/blue-assistant
6
6
  Author: Arash Abadpour (Kamangir)
@@ -124,4 +124,4 @@ Also home to [`@web`](https://raw.githubusercontent.com/kamangir/blue-assistant/
124
124
 
125
125
  [![pylint](https://github.com/kamangir/blue-assistant/actions/workflows/pylint.yml/badge.svg)](https://github.com/kamangir/blue-assistant/actions/workflows/pylint.yml) [![pytest](https://github.com/kamangir/blue-assistant/actions/workflows/pytest.yml/badge.svg)](https://github.com/kamangir/blue-assistant/actions/workflows/pytest.yml) [![bashtest](https://github.com/kamangir/blue-assistant/actions/workflows/bashtest.yml/badge.svg)](https://github.com/kamangir/blue-assistant/actions/workflows/bashtest.yml) [![PyPI version](https://img.shields.io/pypi/v/blue-assistant.svg)](https://pypi.org/project/blue-assistant/) [![PyPI - Downloads](https://img.shields.io/pypi/dd/blue-assistant)](https://pypistats.org/packages/blue-assistant)
126
126
 
127
- built by 🌀 [`blue_options-4.240.1`](https://github.com/kamangir/awesome-bash-cli), based on 🧠 [`blue_assistant-4.307.1`](https://github.com/kamangir/blue-assistant).
127
+ built by 🌀 [`blue_options-4.240.1`](https://github.com/kamangir/awesome-bash-cli), based on 🧠 [`blue_assistant-4.319.1`](https://github.com/kamangir/blue-assistant).
@@ -1,5 +1,5 @@
1
1
  blue_assistant/README.py,sha256=EJORj3I5pucJplI86lrFaZBN5C9IYNgKoG_V7h27NHw,2586
2
- blue_assistant/__init__.py,sha256=4nXZv5rPPASKgOKBQisBlassDX88WzJlwFFsjovEWNQ,311
2
+ blue_assistant/__init__.py,sha256=QCw7ze05yQxWjXRvjx1vYtxUHZldMVhjcBd1-l39zvE,311
3
3
  blue_assistant/__main__.py,sha256=URtal70XZc0--3FDTYWcLtnGOqBYjMX9gt-L1k8hDXI,361
4
4
  blue_assistant/config.env,sha256=npodyuuhkZUHUv9FnEiQQZkKxFbg8nQb1YpOCURqV3Y,301
5
5
  blue_assistant/env.py,sha256=FTSdJ8-J4jAyI0-h3MBgOweQBWd3YEFIibBHSXpClrY,760
@@ -34,22 +34,22 @@ blue_assistant/help/__init__.py,sha256=ajz1GSNU9xYVrFEDSz6Xwg7amWQ_yvW75tQa1ZvRI
34
34
  blue_assistant/help/__main__.py,sha256=cVejR7OpoWPg0qLbm-PZf5TuJS27x49jzfiyCLyzEns,241
35
35
  blue_assistant/help/functions.py,sha256=O85zVEMtnm32O7KB6W6uQRoFXnE_4dW5pwYZtMakYDg,865
36
36
  blue_assistant/help/hue.py,sha256=ZElPG24ekiS7eIGLVrP2gB_womlGUuwln2cded4Li-c,2319
37
- blue_assistant/help/script.py,sha256=UalSgd0uLCcyQL1S3429NnRN_BaR_-Mj7WS1JqIOKXs,1122
37
+ blue_assistant/help/script.py,sha256=THeHam9PoS_2I9G06ggMH1R8kB0lzz_8qOQNYPhl7Zw,1164
38
38
  blue_assistant/help/web.py,sha256=LNJRbMXipXUojJmmTghY9YAxFqPDLTCvcRCfpJrfgvk,918
39
39
  blue_assistant/script/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
- blue_assistant/script/__main__.py,sha256=eOSOo5yYTPMwIXZ0GkuWkmOcsDWrZtHvClyJizXSk2w,1657
40
+ blue_assistant/script/__main__.py,sha256=O8zPwcgil-x0YmqIl0GWbbSEcpFDrx0e_VTvqBwG8Io,1802
41
41
  blue_assistant/script/load.py,sha256=JsDY9T3HTM9vXngvKsA0Mt_erxAnRR_jI62-JhrOBMU,831
42
42
  blue_assistant/script/actions/__init__.py,sha256=rTJw42KtMcsOMU1Z-h1e5Nch2Iax-t2P84vPZ-ccR_c,573
43
43
  blue_assistant/script/actions/generate_image.py,sha256=SJLKkeduWkJgN-0Y8H3ov8xSw3MBpmjQSBTR9vwXstA,1343
44
44
  blue_assistant/script/actions/generate_text.py,sha256=LJmXHZBpLdMMnE5SJGbv03bELTlG1zLav8XW2QLtRWI,2023
45
45
  blue_assistant/script/actions/generic.py,sha256=UkyaM16qXdmTAVfduo6niCpHk5nB7rir-9oIW1VdwOg,343
46
- blue_assistant/script/actions/web_crawl.py,sha256=iA55aFt3K3RrlBgzPnMmLync5cl2mSR_JdokMmO_ATo,1446
46
+ blue_assistant/script/actions/web_crawl.py,sha256=aq-Jq2sFeUcU2EzlOnK_X_L3Lim_x8SMhxNJJ9hNV-g,1553
47
47
  blue_assistant/script/repository/__init__.py,sha256=zxqxFim6RdNhQLU3SWVytMwsf0NyhX1c_Mhi-ZUFi2w,658
48
48
  blue_assistant/script/repository/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
- blue_assistant/script/repository/base/classes.py,sha256=PpVwRQCT839jcxphLSBEBLCFujwH5zsL_e0ZyUhDPuc,7427
50
- blue_assistant/script/repository/base/root.py,sha256=x0u6y-c3z3ar4gMEA08XodvUFdFhzD1mNs4htOwIJII,107
49
+ blue_assistant/script/repository/base/classes.py,sha256=AW-fK0CiAosIDPVPA9aKOIlEgn3oQq-2L265vzTphSQ,519
50
+ blue_assistant/script/repository/base/root.py,sha256=4yGbMTlSmbQXGcOhOInElBiN1AqBY9TED02qa_ee4Hk,8347
51
51
  blue_assistant/script/repository/blue_amo/__init__.py,sha256=WjL9GIlN-DBnbUMJ8O_FxTp0rcVGlsIS3H9YtXEefTk,76
52
- blue_assistant/script/repository/blue_amo/classes.py,sha256=9YMT8UdYMQ-bXkZ_Qv-Wv4xxVSczclKQZ0d4077y3jc,1771
52
+ blue_assistant/script/repository/blue_amo/classes.py,sha256=ZYjnCUocGB-3K-gBrq4_CasR5vBQ1OfIECcpf_XZfgE,1576
53
53
  blue_assistant/script/repository/blue_amo/actions/__init__.py,sha256=JE4XK5Do64kLfAUxKTa15vkqUQ5JMCiHZfm03blBxi8,438
54
54
  blue_assistant/script/repository/blue_amo/actions/setting_frame_prompts.py,sha256=4YkEsWNhFg_5crYDMPglUVjlWpoG0ditKbUittYiFo4,1205
55
55
  blue_assistant/script/repository/blue_amo/actions/stitching_the_frames.py,sha256=mbXriat6deEAmuo5Y1ValySnUXDENR7TZS_3nVPlQ6M,3622
@@ -59,14 +59,16 @@ blue_assistant/script/repository/hue/api.py,sha256=C3KzT_MG868gsznUXpwEbUleBjnJO
59
59
  blue_assistant/script/repository/hue/classes.py,sha256=AbihLKw4Ii9MHnCQj1qOrZiZhQh6LhbDcN3kLTigQ00,179
60
60
  blue_assistant/script/repository/hue/colors.py,sha256=rUdtCroNAnzm1zUuVp8eVhvfIie1f7sd208ypsFAJ_w,625
61
61
  blue_assistant/script/repository/orbital_data_explorer/__init__.py,sha256=yy5FtCeHlr9dRfqxw4QYWr7_yRjnQpwVyuAY2vLrh4Q,110
62
- blue_assistant/script/repository/orbital_data_explorer/classes.py,sha256=Hx-74_wnWedidy9_WtR-dQLqLYK-GH-KshG1Ul8jnuY,195
62
+ blue_assistant/script/repository/orbital_data_explorer/classes.py,sha256=C75k_GNq6lG8lPiETR3n1E6T2ier52_DqXxk1HmELlY,419
63
+ blue_assistant/script/repository/orbital_data_explorer/actions/__init__.py,sha256=TKOLK61Z18ygpnVjOtjeB4h4irvyyAlc04Wlu7QrSxI,370
64
+ blue_assistant/script/repository/orbital_data_explorer/actions/expanding_the_extractions.py,sha256=MZ729BIVrTniE-r-1kh7F21QB7dqZmgbXw4cQ6Nnkjw,3060
63
65
  blue_assistant/web/__init__.py,sha256=70_JSpnfX1mLm8Xv3xHIujfr2FfGeHPRs6HraWDP1XA,114
64
66
  blue_assistant/web/__main__.py,sha256=aLkMmUpeWSOxa7YQVbtL90ZNbOcr1OeT0rymw90jx7A,1436
65
- blue_assistant/web/crawl.py,sha256=S_Y-jZ_-K9Q2yLG6r-cdYphK4p0dXPluDap4h5jBLY0,3373
66
- blue_assistant/web/fetch.py,sha256=meso5ssN6OEk2xcPo3VMmFsXLqPlBVZ2FxureWoIYag,2546
67
- blue_assistant/web/functions.py,sha256=uJAC_kGOn2wA9AwOB_FB2f1dFYyNaEPPC42lN3klnFc,618
68
- blue_assistant-4.307.1.dist-info/LICENSE,sha256=ogEPNDSH0_dhiv_lT3ifVIdgIzHAqNA_SemnxUfPBJk,7048
69
- blue_assistant-4.307.1.dist-info/METADATA,sha256=rmNwepvSbB5ifBsZjI89diZZHyzGZaQi4KpoHaebZGQ,5380
70
- blue_assistant-4.307.1.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
71
- blue_assistant-4.307.1.dist-info/top_level.txt,sha256=ud0BkBbdOVze13bNqHuhZj1rwCztaBtDf5ChEYzASOs,15
72
- blue_assistant-4.307.1.dist-info/RECORD,,
67
+ blue_assistant/web/crawl.py,sha256=w77MNqVSLDE6nm7XuwWU7JMOcm26ISdA_fjT7Ec2bi8,3343
68
+ blue_assistant/web/fetch.py,sha256=WihKsEdF4q8SVMa1IJa-O2BnYbNSr3uzNykJnVuSyrQ,2566
69
+ blue_assistant/web/functions.py,sha256=TVsQbgtkWg4Hy6E2hLJ1bJqjrL6rcmGAxmYuqLUFeSw,882
70
+ blue_assistant-4.319.1.dist-info/LICENSE,sha256=ogEPNDSH0_dhiv_lT3ifVIdgIzHAqNA_SemnxUfPBJk,7048
71
+ blue_assistant-4.319.1.dist-info/METADATA,sha256=N5fnp_I56AXZwdFeI7D5Oc8p_t3dxxO7JITgJj1fLYs,5380
72
+ blue_assistant-4.319.1.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
73
+ blue_assistant-4.319.1.dist-info/top_level.txt,sha256=ud0BkBbdOVze13bNqHuhZj1rwCztaBtDf5ChEYzASOs,15
74
+ blue_assistant-4.319.1.dist-info/RECORD,,