blue-assistant 4.243.1__py3-none-any.whl → 4.273.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- blue_assistant/.abcli/script/run.sh +3 -1
- blue_assistant/.abcli/tests/script_run.sh +3 -1
- blue_assistant/.abcli/web/crawl.sh +5 -0
- blue_assistant/README.py +22 -8
- blue_assistant/__init__.py +1 -1
- blue_assistant/help/script.py +1 -1
- blue_assistant/help/web.py +1 -1
- blue_assistant/script/__main__.py +9 -1
- blue_assistant/script/actions/__init__.py +3 -2
- blue_assistant/script/actions/generate_image.py +1 -0
- blue_assistant/script/actions/generate_text.py +1 -0
- blue_assistant/script/actions/generic.py +1 -0
- blue_assistant/script/actions/web_crawl.py +49 -0
- blue_assistant/script/repository/base/classes.py +68 -10
- blue_assistant/script/repository/blue_amo/actions/slicing_into_frames.py +1 -0
- blue_assistant/script/repository/blue_amo/actions/stitching_the_frames.py +1 -0
- blue_assistant/script/repository/blue_amo/classes.py +6 -1
- blue_assistant/script/repository/generic/classes.py +11 -3
- blue_assistant/script/repository/orbital_data_explorer/classes.py +0 -33
- blue_assistant/web/__init__.py +2 -0
- blue_assistant/web/__main__.py +10 -6
- blue_assistant/web/crawl.py +91 -0
- blue_assistant/web/fetch.py +75 -0
- blue_assistant/web/functions.py +15 -73
- {blue_assistant-4.243.1.dist-info → blue_assistant-4.273.1.dist-info}/METADATA +4 -4
- {blue_assistant-4.243.1.dist-info → blue_assistant-4.273.1.dist-info}/RECORD +29 -28
- blue_assistant/script/repository/orbital_data_explorer/actions/__init__.py +0 -11
- blue_assistant/script/repository/orbital_data_explorer/actions/researching_the_questions.py +0 -42
- {blue_assistant-4.243.1.dist-info → blue_assistant-4.273.1.dist-info}/LICENSE +0 -0
- {blue_assistant-4.243.1.dist-info → blue_assistant-4.273.1.dist-info}/WHEEL +0 -0
- {blue_assistant-4.243.1.dist-info → blue_assistant-4.273.1.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,8 @@
|
|
3
3
|
function blue_assistant_script_run() {
|
4
4
|
local options=$1
|
5
5
|
local do_dryrun=$(abcli_option_int "$options" dryrun 0)
|
6
|
-
local
|
6
|
+
local use_cache=$(abcli_option_int "$options" cache 1)
|
7
|
+
local do_download=$(abcli_option_int "$options" download $use_cache)
|
7
8
|
local do_upload=$(abcli_option_int "$options" upload $(abcli_not $do_dryrun))
|
8
9
|
|
9
10
|
local script_options=$2
|
@@ -20,6 +21,7 @@ function blue_assistant_script_run() {
|
|
20
21
|
run \
|
21
22
|
--script_name $script_name \
|
22
23
|
--object_name $object_name \
|
24
|
+
--use_cache $use_cache \
|
23
25
|
"${@:4}"
|
24
26
|
[[ $? -ne 0 ]] && return 1
|
25
27
|
|
@@ -11,11 +11,13 @@ function test_blue_assistant_script_run() {
|
|
11
11
|
for script_name in $(echo "$list_of_script_name" | tr + " "); do
|
12
12
|
abcli_log "testing $script_name ..."
|
13
13
|
|
14
|
+
local object_name=test_blue_assistant_script_run-$(abcli_string_timestamp_short)
|
15
|
+
|
14
16
|
abcli_eval ,$options \
|
15
17
|
blue_assistant_script_run \
|
16
18
|
~upload,$options \
|
17
19
|
script=$script_name \
|
18
|
-
|
20
|
+
$object_name \
|
19
21
|
"${@:2}" \
|
20
22
|
--test_mode 1 \
|
21
23
|
--verbose 1
|
@@ -3,11 +3,15 @@
|
|
3
3
|
function blue_assistant_web_crawl() {
|
4
4
|
local options=$1
|
5
5
|
local do_dryrun=$(abcli_option_int "$options" dryrun 0)
|
6
|
+
local use_cache=$(abcli_option_int "$options" cache 0)
|
7
|
+
local do_download=$(abcli_option_int "$options" download $use_cache)
|
6
8
|
local do_upload=$(abcli_option_int "$options" upload $(abcli_not $do_dryrun))
|
7
9
|
|
8
10
|
local seed_urls=${2:-void}
|
9
11
|
|
10
12
|
local object_name=$(abcli_clarify_object $3 web-crawl-$(abcli_string_timestamp_short))
|
13
|
+
[[ "$do_download" == 1 ]] &&
|
14
|
+
abcli_download - $object_name
|
11
15
|
|
12
16
|
abcli_log "crawling $seed_urls -> $object_name ..."
|
13
17
|
|
@@ -16,6 +20,7 @@ function blue_assistant_web_crawl() {
|
|
16
20
|
crawl \
|
17
21
|
--seed_urls $seed_urls \
|
18
22
|
--object_name $object_name \
|
23
|
+
--use_cache $use_cache \
|
19
24
|
"${@:4}"
|
20
25
|
[[ $? -ne 0 ]] && return 1
|
21
26
|
|
blue_assistant/README.py
CHANGED
@@ -1,14 +1,28 @@
|
|
1
1
|
import os
|
2
2
|
|
3
|
+
from blue_options.help.functions import get_help
|
3
4
|
from blue_objects import file, README
|
4
5
|
|
5
6
|
from blue_assistant import NAME, VERSION, ICON, REPO_NAME
|
7
|
+
from blue_assistant.help.functions import help_functions
|
6
8
|
|
7
9
|
|
8
10
|
items = README.Items(
|
9
11
|
[
|
10
12
|
{
|
11
|
-
"name": "
|
13
|
+
"name": "orbital-data-explorer",
|
14
|
+
"url": "./blue_assistant/script/repository/orbital_data_explorer",
|
15
|
+
"marquee": "https://github.com/kamangir/assets/blob/main/blue-assistant/orbital-data-explorer.png?raw=true",
|
16
|
+
"description": "Access to the [Orbital Data Explorer](https://ode.rsl.wustl.edu/). 🔥",
|
17
|
+
},
|
18
|
+
{
|
19
|
+
"name": "🌀 blue script",
|
20
|
+
"marquee": "https://github.com/kamangir/assets/raw/main/blue-plugin/marquee.png?raw=true",
|
21
|
+
"description": "A minimal AI DAG interface.",
|
22
|
+
"url": "./blue_assistant/script/",
|
23
|
+
},
|
24
|
+
{
|
25
|
+
"name": "@hue",
|
12
26
|
"url": "./blue_assistant/script/repository/hue",
|
13
27
|
"marquee": "https://github.com/kamangir/assets/raw/main/blue-assistant/20250314_143702.jpg?raw=true",
|
14
28
|
"description": '"send a color command to the Hue LED lights in my apartment."',
|
@@ -17,13 +31,7 @@ items = README.Items(
|
|
17
31
|
"name": "blue-amo",
|
18
32
|
"url": "./blue_assistant/script/repository/blue_amo/README.md",
|
19
33
|
"marquee": "https://github.com/kamangir/assets/raw/main/blue-amo-2025-02-03-nswnx6/stitching_the_frames-2.png?raw=true",
|
20
|
-
"description": "
|
21
|
-
},
|
22
|
-
{
|
23
|
-
"name": "orbital-data-explorer",
|
24
|
-
"url": "./blue_assistant/script/repository/orbital_data_explorer",
|
25
|
-
"marquee": "https://github.com/kamangir/assets/blob/main/blue-assistant/orbital-data-explorer.png?raw=true",
|
26
|
-
"description": "Access to the [Orbital Data Explorer](https://ode.rsl.wustl.edu/), through AI. ⏸️",
|
34
|
+
"description": "Story development and visualization.",
|
27
35
|
},
|
28
36
|
]
|
29
37
|
)
|
@@ -39,6 +47,11 @@ def build():
|
|
39
47
|
NAME=NAME,
|
40
48
|
VERSION=VERSION,
|
41
49
|
REPO_NAME=REPO_NAME,
|
50
|
+
help_function=lambda tokens: get_help(
|
51
|
+
tokens,
|
52
|
+
help_functions,
|
53
|
+
mono=True,
|
54
|
+
),
|
42
55
|
)
|
43
56
|
for readme in [
|
44
57
|
{
|
@@ -54,6 +67,7 @@ def build():
|
|
54
67
|
{"path": "script/repository/hue/docs/round-1.md"},
|
55
68
|
{"path": "script/repository/hue/docs"},
|
56
69
|
#
|
70
|
+
{"path": "script/"},
|
57
71
|
{"path": "web/"},
|
58
72
|
]
|
59
73
|
)
|
blue_assistant/__init__.py
CHANGED
blue_assistant/help/script.py
CHANGED
blue_assistant/help/web.py
CHANGED
@@ -48,6 +48,12 @@ parser.add_argument(
|
|
48
48
|
default=1,
|
49
49
|
help="0 | 1",
|
50
50
|
)
|
51
|
+
parser.add_argument(
|
52
|
+
"--use_cache",
|
53
|
+
type=int,
|
54
|
+
default=1,
|
55
|
+
help="0 | 1",
|
56
|
+
)
|
51
57
|
args = parser.parse_args()
|
52
58
|
|
53
59
|
delim = " " if args.delim == "space" else args.delim
|
@@ -70,7 +76,9 @@ elif args.task == "run":
|
|
70
76
|
)
|
71
77
|
|
72
78
|
if success:
|
73
|
-
success = script.run(
|
79
|
+
success = script.run(
|
80
|
+
use_cache=args.use_cache == 1,
|
81
|
+
)
|
74
82
|
else:
|
75
83
|
success = None
|
76
84
|
|
@@ -1,14 +1,15 @@
|
|
1
|
-
from typing import Dict, Callable
|
1
|
+
from typing import Dict, Callable
|
2
2
|
|
3
3
|
from blue_assistant.script.repository.base.classes import BaseScript
|
4
4
|
from blue_assistant.script.actions.generic import generic_action
|
5
5
|
from blue_assistant.script.actions.generate_image import generate_image
|
6
6
|
from blue_assistant.script.actions.generate_text import generate_text
|
7
|
-
from blue_assistant.
|
7
|
+
from blue_assistant.script.actions.web_crawl import web_crawl
|
8
8
|
|
9
9
|
|
10
10
|
dict_of_actions: Dict[str, Callable[[BaseScript, str], bool]] = {
|
11
11
|
"generic": generic_action,
|
12
12
|
"generate_image": generate_image,
|
13
13
|
"generate_text": generate_text,
|
14
|
+
"web_crawl": web_crawl,
|
14
15
|
}
|
@@ -0,0 +1,49 @@
|
|
1
|
+
from blueness import module
|
2
|
+
|
3
|
+
from blue_options.logger import log_list
|
4
|
+
|
5
|
+
from blue_assistant import NAME
|
6
|
+
from blue_assistant.web.crawl import crawl_list_of_urls
|
7
|
+
from blue_assistant.script.repository.base.classes import BaseScript
|
8
|
+
from blue_assistant.logger import logger
|
9
|
+
|
10
|
+
|
11
|
+
NAME = module.name(__file__, NAME)
|
12
|
+
|
13
|
+
|
14
|
+
def web_crawl(
|
15
|
+
script: BaseScript,
|
16
|
+
node_name: str,
|
17
|
+
use_cache: bool,
|
18
|
+
) -> bool:
|
19
|
+
logger.info(f"{NAME}: {script} @ {node_name} ...")
|
20
|
+
|
21
|
+
seed_url_var_name = script.nodes[node_name].get("seed_urls", "")
|
22
|
+
if not isinstance(seed_url_var_name, str):
|
23
|
+
logger.error(f"{node_name}: seed_urls must be a string.")
|
24
|
+
return False
|
25
|
+
if not seed_url_var_name:
|
26
|
+
logger.error(f"{node_name}: seed_urls not found.")
|
27
|
+
return False
|
28
|
+
|
29
|
+
# to allow both :::<var-name> and <var-name> - for convenience :)
|
30
|
+
if seed_url_var_name.startswith(":::"):
|
31
|
+
seed_url_var_name = seed_url_var_name[3:].strip()
|
32
|
+
|
33
|
+
if seed_url_var_name not in script.vars:
|
34
|
+
logger.error(f"{node_name}: {seed_url_var_name}: seed_urls not found in vars.")
|
35
|
+
return False
|
36
|
+
seed_urls = script.vars[seed_url_var_name]
|
37
|
+
log_list(logger, seed_urls, "seed url(s)")
|
38
|
+
|
39
|
+
visited_urls = crawl_list_of_urls(
|
40
|
+
seed_urls=seed_urls,
|
41
|
+
object_name=script.object_name,
|
42
|
+
max_iterations=script.nodes[node_name]["max_iterations"],
|
43
|
+
use_cache=use_cache,
|
44
|
+
)
|
45
|
+
|
46
|
+
script.nodes[node_name]["visited_urls"] = visited_urls
|
47
|
+
script.nodes[node_name]["output"] = "TBA"
|
48
|
+
|
49
|
+
return True
|
@@ -27,8 +27,6 @@ class BaseScript:
|
|
27
27
|
self.object_name = object_name
|
28
28
|
|
29
29
|
self.test_mode = test_mode
|
30
|
-
if self.test_mode:
|
31
|
-
logger.info("💰 test mode is on.")
|
32
30
|
|
33
31
|
self.verbose = verbose
|
34
32
|
|
@@ -41,14 +39,70 @@ class BaseScript:
|
|
41
39
|
success, self.metadata = file.load_yaml(metadata_filename)
|
42
40
|
assert success, f"cannot load {self.name}/metadata.yaml"
|
43
41
|
|
44
|
-
|
42
|
+
self.metadata.setdefault("script", {})
|
43
|
+
assert isinstance(
|
44
|
+
self.script,
|
45
|
+
dict,
|
46
|
+
), "script: expected dict, received {}.".format(
|
47
|
+
self.script.__class__.__name__,
|
48
|
+
)
|
49
|
+
|
50
|
+
self.script.setdefault("nodes", {})
|
51
|
+
assert isinstance(
|
52
|
+
self.nodes,
|
53
|
+
dict,
|
54
|
+
), "nodes: expected dict, received {}.".format(
|
55
|
+
self.nodes.__class__.__name__,
|
56
|
+
)
|
57
|
+
|
58
|
+
self.script.setdefault("vars", {})
|
59
|
+
assert isinstance(
|
60
|
+
self.vars,
|
61
|
+
dict,
|
62
|
+
), "vars: expected dict, received {}.".format(
|
63
|
+
self.vars.__class__.__name__,
|
64
|
+
)
|
65
|
+
|
66
|
+
if self.test_mode:
|
67
|
+
logger.info("🧪 test mode is on.")
|
68
|
+
|
69
|
+
for node_name, node in self.nodes.items():
|
70
|
+
if "test_mode" in self.script:
|
71
|
+
updates = self.script["test_mode"]
|
72
|
+
logger.info(f"🧪 vars.update({updates})")
|
73
|
+
self.vars.update(updates)
|
74
|
+
|
75
|
+
if "test_mode" in node:
|
76
|
+
updates = node["test_mode"]
|
77
|
+
logger.info(f"🧪 {node_name}.update({updates})")
|
78
|
+
node.update(updates)
|
45
79
|
|
46
|
-
logger.info(
|
80
|
+
logger.info(
|
81
|
+
"loaded {} node(s): {}".format(
|
82
|
+
len(self.nodes),
|
83
|
+
", ".join(self.nodes.keys()),
|
84
|
+
)
|
85
|
+
)
|
86
|
+
|
87
|
+
logger.info(
|
88
|
+
"loaded {} var(s){}".format(
|
89
|
+
len(self.vars),
|
90
|
+
"" if verbose else ": {}".format(", ".join(self.vars.keys())),
|
91
|
+
)
|
92
|
+
)
|
47
93
|
if verbose:
|
48
94
|
for var_name, var_value in self.vars.items():
|
49
95
|
logger.info("{}: {}".format(var_name, var_value))
|
50
96
|
|
51
|
-
assert self.generate_graph(), "cannot generate graph"
|
97
|
+
assert self.generate_graph(), "cannot generate graph."
|
98
|
+
|
99
|
+
def __str__(self) -> str:
|
100
|
+
return "{}[{} var(s), {} node(s) -> {}]".format(
|
101
|
+
self.__class__.__name__,
|
102
|
+
len(self.vars),
|
103
|
+
len(self.nodes),
|
104
|
+
self.object_name,
|
105
|
+
)
|
52
106
|
|
53
107
|
def apply_vars(self, text: str) -> str:
|
54
108
|
for var_name, var_value in self.vars.items():
|
@@ -91,12 +145,16 @@ class BaseScript:
|
|
91
145
|
[node_name],
|
92
146
|
)
|
93
147
|
|
94
|
-
def run(
|
148
|
+
def run(
|
149
|
+
self,
|
150
|
+
use_cache: bool = True,
|
151
|
+
) -> bool:
|
95
152
|
logger.info(
|
96
|
-
"{}.run: {}:{}
|
153
|
+
"{}.run: {}:{} -{}> {}".format(
|
97
154
|
NAME,
|
98
155
|
self.__class__.__name__,
|
99
156
|
self.name,
|
157
|
+
"use-cache-" if use_cache else "",
|
100
158
|
self.object_name,
|
101
159
|
)
|
102
160
|
)
|
@@ -126,12 +184,12 @@ class BaseScript:
|
|
126
184
|
# Aliases
|
127
185
|
@property
|
128
186
|
def script(self) -> Dict:
|
129
|
-
return self.metadata
|
187
|
+
return self.metadata["script"]
|
130
188
|
|
131
189
|
@property
|
132
190
|
def nodes(self) -> Dict[str, Dict]:
|
133
|
-
return self.metadata
|
191
|
+
return self.metadata["script"]["nodes"]
|
134
192
|
|
135
193
|
@property
|
136
194
|
def vars(self) -> Dict:
|
137
|
-
return self.metadata
|
195
|
+
return self.metadata["script"]["vars"]
|
@@ -66,14 +66,19 @@ class BlueAmoScript(GenericScript):
|
|
66
66
|
def perform_action(
|
67
67
|
self,
|
68
68
|
node_name: str,
|
69
|
+
use_cache: bool,
|
69
70
|
) -> bool:
|
70
|
-
if not super().perform_action(
|
71
|
+
if not super().perform_action(
|
72
|
+
node_name=node_name,
|
73
|
+
use_cache=use_cache,
|
74
|
+
):
|
71
75
|
return False
|
72
76
|
|
73
77
|
if node_name in dict_of_actions:
|
74
78
|
return dict_of_actions[node_name](
|
75
79
|
script=self,
|
76
80
|
node_name=node_name,
|
81
|
+
use_cache=use_cache,
|
77
82
|
)
|
78
83
|
|
79
84
|
return True
|
@@ -22,6 +22,7 @@ class GenericScript(BaseScript):
|
|
22
22
|
def perform_action(
|
23
23
|
self,
|
24
24
|
node_name: str,
|
25
|
+
use_cache: bool,
|
25
26
|
) -> bool:
|
26
27
|
action_name = self.nodes[node_name].get("action", "unknown")
|
27
28
|
logger.info(f"---- node: {node_name} ---- ")
|
@@ -33,10 +34,14 @@ class GenericScript(BaseScript):
|
|
33
34
|
return dict_of_actions[action_name](
|
34
35
|
script=self,
|
35
36
|
node_name=node_name,
|
37
|
+
use_cache=use_cache,
|
36
38
|
)
|
37
39
|
|
38
|
-
def run(
|
39
|
-
|
40
|
+
def run(
|
41
|
+
self,
|
42
|
+
use_cache: bool = True,
|
43
|
+
) -> bool:
|
44
|
+
if not super().run(use_cache=use_cache):
|
40
45
|
return False
|
41
46
|
|
42
47
|
success: bool = True
|
@@ -68,7 +73,10 @@ class GenericScript(BaseScript):
|
|
68
73
|
)
|
69
74
|
continue
|
70
75
|
|
71
|
-
if not self.perform_action(
|
76
|
+
if not self.perform_action(
|
77
|
+
node_name=node_name,
|
78
|
+
use_cache=use_cache,
|
79
|
+
):
|
72
80
|
success = False
|
73
81
|
break
|
74
82
|
|
@@ -1,40 +1,7 @@
|
|
1
1
|
from blue_objects import file, path
|
2
2
|
|
3
3
|
from blue_assistant.script.repository.generic.classes import GenericScript
|
4
|
-
from blue_assistant.script.repository.orbital_data_explorer.actions import (
|
5
|
-
dict_of_actions,
|
6
|
-
)
|
7
4
|
|
8
5
|
|
9
6
|
class OrbitalDataExplorerScript(GenericScript):
|
10
7
|
name = path.name(file.path(__file__))
|
11
|
-
|
12
|
-
def __init__(
|
13
|
-
self,
|
14
|
-
object_name: str,
|
15
|
-
test_mode: bool = False,
|
16
|
-
verbose: bool = False,
|
17
|
-
):
|
18
|
-
super().__init__(
|
19
|
-
object_name=object_name,
|
20
|
-
test_mode=test_mode,
|
21
|
-
verbose=verbose,
|
22
|
-
)
|
23
|
-
|
24
|
-
if self.test_mode:
|
25
|
-
self.nodes["researching_the_questions"]["max_iterations"] = 3
|
26
|
-
|
27
|
-
def perform_action(
|
28
|
-
self,
|
29
|
-
node_name: str,
|
30
|
-
) -> bool:
|
31
|
-
if not super().perform_action(node_name=node_name):
|
32
|
-
return False
|
33
|
-
|
34
|
-
if node_name in dict_of_actions:
|
35
|
-
return dict_of_actions[node_name](
|
36
|
-
script=self,
|
37
|
-
node_name=node_name,
|
38
|
-
)
|
39
|
-
|
40
|
-
return True
|
blue_assistant/web/__init__.py
CHANGED
blue_assistant/web/__main__.py
CHANGED
@@ -6,7 +6,7 @@ from blue_options.logger import log_dict
|
|
6
6
|
from blue_objects.metadata import post_to_object
|
7
7
|
|
8
8
|
from blue_assistant import NAME
|
9
|
-
from blue_assistant.web
|
9
|
+
from blue_assistant.web import crawl_list_of_urls, fetch_links_and_text
|
10
10
|
from blue_assistant.logger import logger
|
11
11
|
|
12
12
|
NAME = module.name(__file__, NAME)
|
@@ -40,6 +40,12 @@ parser.add_argument(
|
|
40
40
|
"--object_name",
|
41
41
|
type=str,
|
42
42
|
)
|
43
|
+
parser.add_argument(
|
44
|
+
"--use_cache",
|
45
|
+
type=int,
|
46
|
+
default=0,
|
47
|
+
help="0 | 1",
|
48
|
+
)
|
43
49
|
args = parser.parse_args()
|
44
50
|
|
45
51
|
success = False
|
@@ -48,6 +54,7 @@ if args.task == "crawl":
|
|
48
54
|
seed_urls=args.seed_urls.split("+"),
|
49
55
|
object_name=args.object_name,
|
50
56
|
max_iterations=args.max_iterations,
|
57
|
+
use_cache=args.use_cache == 1,
|
51
58
|
)
|
52
59
|
|
53
60
|
if args.verbose == 1:
|
@@ -59,7 +66,7 @@ if args.task == "crawl":
|
|
59
66
|
dict_of_urls,
|
60
67
|
)
|
61
68
|
elif args.task == "fetch":
|
62
|
-
|
69
|
+
summary = fetch_links_and_text(
|
63
70
|
url=args.url,
|
64
71
|
verbose=True,
|
65
72
|
)
|
@@ -67,10 +74,7 @@ elif args.task == "fetch":
|
|
67
74
|
success = post_to_object(
|
68
75
|
args.object_name,
|
69
76
|
NAME.replace(".", "-"),
|
70
|
-
|
71
|
-
"links": list(links),
|
72
|
-
"text": text,
|
73
|
-
},
|
77
|
+
summary,
|
74
78
|
)
|
75
79
|
else:
|
76
80
|
success = None
|
@@ -0,0 +1,91 @@
|
|
1
|
+
from typing import List, Dict, Set
|
2
|
+
|
3
|
+
from blueness import module
|
4
|
+
from blue_objects import file
|
5
|
+
from blue_objects import objects
|
6
|
+
from blue_objects.metadata import get_from_object, post_to_object
|
7
|
+
|
8
|
+
from blue_assistant import NAME
|
9
|
+
from blue_assistant.web.fetch import fetch_links_and_text
|
10
|
+
from blue_assistant.web.functions import url_to_filename
|
11
|
+
from blue_assistant.logger import logger
|
12
|
+
|
13
|
+
NAME = module.name(__file__, NAME)
|
14
|
+
|
15
|
+
|
16
|
+
def crawl_list_of_urls(
|
17
|
+
seed_urls: List[str],
|
18
|
+
object_name: str,
|
19
|
+
max_iterations: int = 10,
|
20
|
+
use_cache: bool = False,
|
21
|
+
verbose: bool = False,
|
22
|
+
) -> Dict[str, str]:
|
23
|
+
logger.info(
|
24
|
+
"{}.crawl_list_of_urls({}): {} -{}> {}".format(
|
25
|
+
NAME,
|
26
|
+
len(seed_urls),
|
27
|
+
", ".join(seed_urls),
|
28
|
+
"use-cache-" if use_cache else "",
|
29
|
+
object_name,
|
30
|
+
)
|
31
|
+
)
|
32
|
+
|
33
|
+
crawl_cache: Dict[str, str] = (
|
34
|
+
get_from_object(
|
35
|
+
object_name,
|
36
|
+
"crawl_cache",
|
37
|
+
{},
|
38
|
+
)
|
39
|
+
if use_cache
|
40
|
+
else {}
|
41
|
+
)
|
42
|
+
|
43
|
+
queue: Set[str] = set(seed_urls)
|
44
|
+
|
45
|
+
iteration: int = 0
|
46
|
+
while queue:
|
47
|
+
url = queue.pop()
|
48
|
+
logger.info(
|
49
|
+
"{} {} ...".format(
|
50
|
+
"✅" if url in crawl_cache else "🔗",
|
51
|
+
url,
|
52
|
+
)
|
53
|
+
)
|
54
|
+
if url in crawl_cache:
|
55
|
+
continue
|
56
|
+
|
57
|
+
url_summary = fetch_links_and_text(
|
58
|
+
url=url,
|
59
|
+
verbose=verbose,
|
60
|
+
)
|
61
|
+
content_type = url_summary.get("content_type", "unknown")
|
62
|
+
|
63
|
+
if use_cache and "html" in content_type:
|
64
|
+
file.save_yaml(
|
65
|
+
filename=objects.path_of(
|
66
|
+
object_name=object_name,
|
67
|
+
filename="crawl_summary_cache/{}.yaml".format(url_to_filename(url)),
|
68
|
+
),
|
69
|
+
data=url_summary,
|
70
|
+
)
|
71
|
+
|
72
|
+
crawl_cache[url] = content_type
|
73
|
+
if "links" in url_summary:
|
74
|
+
queue.update(url_summary["links"] - crawl_cache.keys())
|
75
|
+
|
76
|
+
iteration += 1
|
77
|
+
if max_iterations != -1 and iteration >= max_iterations:
|
78
|
+
logger.warning(f"max iteration of {max_iterations} reached.")
|
79
|
+
break
|
80
|
+
|
81
|
+
if queue:
|
82
|
+
logger.warning(f"queue: {len(queue)}")
|
83
|
+
|
84
|
+
if use_cache:
|
85
|
+
post_to_object(
|
86
|
+
object_name,
|
87
|
+
"crawl_cache",
|
88
|
+
crawl_cache,
|
89
|
+
)
|
90
|
+
|
91
|
+
return crawl_cache
|
@@ -0,0 +1,75 @@
|
|
1
|
+
from typing import Dict, Any
|
2
|
+
import requests
|
3
|
+
from bs4 import BeautifulSoup, XMLParsedAsHTMLWarning
|
4
|
+
from urllib.parse import urljoin
|
5
|
+
import re
|
6
|
+
import warnings
|
7
|
+
|
8
|
+
from blueness import module
|
9
|
+
from blue_options.logger import log_long_text, log_list
|
10
|
+
|
11
|
+
from blue_assistant import NAME
|
12
|
+
from blue_assistant.logger import logger
|
13
|
+
|
14
|
+
warnings.filterwarnings("ignore", category=XMLParsedAsHTMLWarning)
|
15
|
+
|
16
|
+
NAME = module.name(__file__, NAME)
|
17
|
+
|
18
|
+
|
19
|
+
def fetch_links_and_text(
|
20
|
+
url: str,
|
21
|
+
verbose: bool = False,
|
22
|
+
) -> Dict[str, Any]:
|
23
|
+
try:
|
24
|
+
response = requests.get(url, timeout=5)
|
25
|
+
except Exception as e:
|
26
|
+
logger.warning(e)
|
27
|
+
return {}
|
28
|
+
|
29
|
+
if response.status_code != 200:
|
30
|
+
logger.error(response)
|
31
|
+
return {}
|
32
|
+
|
33
|
+
content_type = response.headers.get("Content-Type", "")
|
34
|
+
logger.info(f"content-type: {content_type}")
|
35
|
+
|
36
|
+
links = set()
|
37
|
+
text = ""
|
38
|
+
|
39
|
+
if not any(
|
40
|
+
thing in content_type
|
41
|
+
for thing in [
|
42
|
+
"pdf",
|
43
|
+
"xml",
|
44
|
+
]
|
45
|
+
):
|
46
|
+
soup = BeautifulSoup(response.text, "html.parser")
|
47
|
+
|
48
|
+
for a_tag in soup.find_all("a", href=True):
|
49
|
+
a_url = urljoin(url, a_tag["href"])
|
50
|
+
|
51
|
+
if a_url.startswith(url):
|
52
|
+
logger.info(f"+= {a_url}")
|
53
|
+
links.add(a_url)
|
54
|
+
continue
|
55
|
+
|
56
|
+
logger.info(f"ignored: {a_url}")
|
57
|
+
|
58
|
+
text = soup.get_text(separator=" ", strip=True)
|
59
|
+
|
60
|
+
# remove non-ASCII characters
|
61
|
+
text = re.sub(r"[^\x20-\x7E]+", "", text)
|
62
|
+
for thing in ["\r", "\n", "\t"]:
|
63
|
+
text = text.replace(thing, " ")
|
64
|
+
text = re.sub(r"\s+", " ", text).strip()
|
65
|
+
|
66
|
+
if verbose:
|
67
|
+
log_list(logger, list(links), "link(s)")
|
68
|
+
log_long_text(logger, text)
|
69
|
+
|
70
|
+
return {
|
71
|
+
"url": url,
|
72
|
+
"content_type": content_type,
|
73
|
+
"links": links,
|
74
|
+
"text": text,
|
75
|
+
}
|
blue_assistant/web/functions.py
CHANGED
@@ -16,82 +16,24 @@ warnings.filterwarnings("ignore", category=XMLParsedAsHTMLWarning)
|
|
16
16
|
NAME = module.name(__file__, NAME)
|
17
17
|
|
18
18
|
|
19
|
-
def
|
19
|
+
def url_to_filename(
|
20
20
|
url: str,
|
21
|
-
|
22
|
-
) ->
|
23
|
-
|
24
|
-
|
25
|
-
response.raise_for_status()
|
26
|
-
except requests.RequestException:
|
27
|
-
return set(), ""
|
21
|
+
max_length: int = 255,
|
22
|
+
) -> str:
|
23
|
+
# Remove the URL scheme (http://, https://)
|
24
|
+
filename = re.sub(r"^https?://", "", url)
|
28
25
|
|
29
|
-
|
30
|
-
|
26
|
+
# Replace unwanted characters with an underscore
|
27
|
+
filename = re.sub(r"[^\w\s-]", "_", filename)
|
31
28
|
|
32
|
-
|
33
|
-
|
29
|
+
# Replace slashes with a hyphen to preserve some structure
|
30
|
+
filename = re.sub(r"\/", "-", filename)
|
34
31
|
|
35
|
-
|
36
|
-
|
37
|
-
links.add(a_url)
|
38
|
-
continue
|
32
|
+
# Replace spaces with underscores
|
33
|
+
filename = filename.replace(" ", "_")
|
39
34
|
|
40
|
-
|
35
|
+
# Ensure the filename length is not too long
|
36
|
+
if len(filename) > max_length:
|
37
|
+
filename = filename[:max_length]
|
41
38
|
|
42
|
-
|
43
|
-
|
44
|
-
# remove non-ASCII characters
|
45
|
-
plain_text = re.sub(r"[^\x20-\x7E]+", "", plain_text)
|
46
|
-
for thing in ["\r", "\n", "\t"]:
|
47
|
-
plain_text = plain_text.replace(thing, " ")
|
48
|
-
plain_text = re.sub(r"\s+", " ", plain_text).strip()
|
49
|
-
|
50
|
-
if verbose:
|
51
|
-
log_list(logger, list(links), "link(s)")
|
52
|
-
log_long_text(logger, plain_text)
|
53
|
-
|
54
|
-
return links, plain_text
|
55
|
-
|
56
|
-
|
57
|
-
def crawl_list_of_urls(
|
58
|
-
seed_urls: List[str],
|
59
|
-
object_name: str,
|
60
|
-
max_iterations: int = 10,
|
61
|
-
verbose: bool = False,
|
62
|
-
) -> Dict[str, str]:
|
63
|
-
logger.info(
|
64
|
-
"{}.crawl_list_of_urls({}): {} -> {}".format(
|
65
|
-
NAME,
|
66
|
-
len(seed_urls),
|
67
|
-
", ".join(seed_urls),
|
68
|
-
object_name,
|
69
|
-
)
|
70
|
-
)
|
71
|
-
|
72
|
-
visited: Dict[str, str] = {}
|
73
|
-
queue: Set[str] = set(seed_urls)
|
74
|
-
|
75
|
-
iteration: int = 0
|
76
|
-
while queue:
|
77
|
-
url = queue.pop()
|
78
|
-
if url in visited:
|
79
|
-
continue
|
80
|
-
|
81
|
-
logger.info(f"🔗 {url} ...")
|
82
|
-
url_links, url_text = fetch_links_and_text(
|
83
|
-
url=url,
|
84
|
-
verbose=verbose,
|
85
|
-
)
|
86
|
-
visited[url] = url_text
|
87
|
-
queue.update(url_links - visited.keys())
|
88
|
-
|
89
|
-
iteration += 1
|
90
|
-
if max_iterations != -1 and iteration >= max_iterations:
|
91
|
-
logger.warning(f"max iteration of {max_iterations} reached.")
|
92
|
-
break
|
93
|
-
|
94
|
-
if queue:
|
95
|
-
logger.warning(f"queue: {len(queue)}")
|
96
|
-
|
97
|
-
return visited
|
39
|
+
return filename
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: blue_assistant
|
3
|
-
Version: 4.
|
3
|
+
Version: 4.273.1
|
4
4
|
Summary: 🧠 An AI Assistant.
|
5
5
|
Home-page: https://github.com/kamangir/blue-assistant
|
6
6
|
Author: Arash Abadpour (Kamangir)
|
@@ -112,8 +112,8 @@ graph LR
|
|
112
112
|
|
113
113
|
| | |
|
114
114
|
| --- | --- |
|
115
|
-
| [`
|
116
|
-
| [`
|
115
|
+
| [`orbital-data-explorer`](https://raw.githubusercontent.com/kamangir/blue-assistant/main/blue_assistant/script/repository/orbital_data_explorer) [](https://raw.githubusercontent.com/kamangir/blue-assistant/main/blue_assistant/script/repository/orbital_data_explorer) Access to the [Orbital Data Explorer](https://ode.rsl.wustl.edu/). 🔥 | [`🌀 blue script`](https://raw.githubusercontent.com/kamangir/blue-assistant/main/blue_assistant/script/) [](https://raw.githubusercontent.com/kamangir/blue-assistant/main/blue_assistant/script/) A minimal AI DAG interface. |
|
116
|
+
| [`@hue`](https://raw.githubusercontent.com/kamangir/blue-assistant/main/blue_assistant/script/repository/hue) [](https://raw.githubusercontent.com/kamangir/blue-assistant/main/blue_assistant/script/repository/hue) "send a color command to the Hue LED lights in my apartment." | [`blue-amo`](https://raw.githubusercontent.com/kamangir/blue-assistant/main/blue_assistant/script/repository/blue_amo/README.md) [](https://raw.githubusercontent.com/kamangir/blue-assistant/main/blue_assistant/script/repository/blue_amo/README.md) Story development and visualization. |
|
117
117
|
|
118
118
|
---
|
119
119
|
|
@@ -124,4 +124,4 @@ Also home to [`@web`](https://raw.githubusercontent.com/kamangir/blue-assistant/
|
|
124
124
|
|
125
125
|
[](https://github.com/kamangir/blue-assistant/actions/workflows/pylint.yml) [](https://github.com/kamangir/blue-assistant/actions/workflows/pytest.yml) [](https://github.com/kamangir/blue-assistant/actions/workflows/bashtest.yml) [](https://pypi.org/project/blue-assistant/) [](https://pypistats.org/packages/blue-assistant)
|
126
126
|
|
127
|
-
built by 🌀 [`blue_options-4.236.1`](https://github.com/kamangir/awesome-bash-cli), based on 🧠 [`blue_assistant-4.
|
127
|
+
built by 🌀 [`blue_options-4.236.1`](https://github.com/kamangir/awesome-bash-cli), based on 🧠 [`blue_assistant-4.273.1`](https://github.com/kamangir/blue-assistant).
|
@@ -1,5 +1,5 @@
|
|
1
|
-
blue_assistant/README.py,sha256=
|
2
|
-
blue_assistant/__init__.py,sha256
|
1
|
+
blue_assistant/README.py,sha256=EJORj3I5pucJplI86lrFaZBN5C9IYNgKoG_V7h27NHw,2586
|
2
|
+
blue_assistant/__init__.py,sha256=pPnU1NLSA8YbfAPdzhjwL5J0c1GSxruUlXume0IxvIE,311
|
3
3
|
blue_assistant/__main__.py,sha256=URtal70XZc0--3FDTYWcLtnGOqBYjMX9gt-L1k8hDXI,361
|
4
4
|
blue_assistant/config.env,sha256=npodyuuhkZUHUv9FnEiQQZkKxFbg8nQb1YpOCURqV3Y,301
|
5
5
|
blue_assistant/env.py,sha256=FTSdJ8-J4jAyI0-h3MBgOweQBWd3YEFIibBHSXpClrY,760
|
@@ -20,53 +20,54 @@ blue_assistant/.abcli/hue/create_user.sh,sha256=Nh8FhnGweB2JZB7SVh-6jp8ud5YHeJSa
|
|
20
20
|
blue_assistant/.abcli/hue/list.sh,sha256=ynptjPo6jZnwm-7wAVgGx-mZvyPKZ9b5JaJoY0xidCg,268
|
21
21
|
blue_assistant/.abcli/hue/set.sh,sha256=VcADsfbjjbrxIMX9cVVHeK0MH649ZRY29V8YDTgflms,266
|
22
22
|
blue_assistant/.abcli/script/list.sh,sha256=2lcVfqDfZP50NszF8o5YCo3TrJKeDc_qo7MTAF3XTGw,131
|
23
|
-
blue_assistant/.abcli/script/run.sh,sha256=
|
23
|
+
blue_assistant/.abcli/script/run.sh,sha256=gSobH_ct9o_9aC25S6CC-Io97EOA7vLbD1suOnFsr_A,1012
|
24
24
|
blue_assistant/.abcli/tests/README.sh,sha256=Qs0YUxVB1OZZ70Nqw2kT1LKXeUnC5-XfQRMfqb8Cbwg,152
|
25
25
|
blue_assistant/.abcli/tests/help.sh,sha256=7AAZzCEo5vZ1cBAMfj4virDClabaUMdOV-NqXSJQVUM,918
|
26
26
|
blue_assistant/.abcli/tests/script_list.sh,sha256=OVOwWO9wR0eeDZTM6uub-eTKbz3eswU3vEUPWXcK-gQ,178
|
27
|
-
blue_assistant/.abcli/tests/script_run.sh,sha256=
|
27
|
+
blue_assistant/.abcli/tests/script_run.sh,sha256=5PFFNnWMpY55ZSYwIFsf9Ya0vHfncU1_8uBjY9Dke7I,809
|
28
28
|
blue_assistant/.abcli/tests/version.sh,sha256=oR2rvYR8zi-0VDPIdPJsmsmWwYaamT8dmNTqUh3-8Gw,154
|
29
29
|
blue_assistant/.abcli/tests/web_crawl.sh,sha256=sz3LbpidWvjG7kQoWxQBtdBe5yntm14ylAUsgPJWhko,372
|
30
30
|
blue_assistant/.abcli/tests/web_fetch.sh,sha256=C8PFWlmRa9heNdP9yhshriCBKG1uUlps-oxhAM70AZI,312
|
31
|
-
blue_assistant/.abcli/web/crawl.sh,sha256=
|
31
|
+
blue_assistant/.abcli/web/crawl.sh,sha256=GdDhp1MI1ZhGpH51Ie19Xb03xO8OM0cp9gGkgPjHCVE,934
|
32
32
|
blue_assistant/.abcli/web/fetch.sh,sha256=9SggFZTtpff-gnCd987zP6UqzG4So5D4px2jMg2Vicc,674
|
33
33
|
blue_assistant/help/__init__.py,sha256=ajz1GSNU9xYVrFEDSz6Xwg7amWQ_yvW75tQa1ZvRIWc,3
|
34
34
|
blue_assistant/help/__main__.py,sha256=cVejR7OpoWPg0qLbm-PZf5TuJS27x49jzfiyCLyzEns,241
|
35
35
|
blue_assistant/help/functions.py,sha256=O85zVEMtnm32O7KB6W6uQRoFXnE_4dW5pwYZtMakYDg,865
|
36
36
|
blue_assistant/help/hue.py,sha256=ZElPG24ekiS7eIGLVrP2gB_womlGUuwln2cded4Li-c,2319
|
37
|
-
blue_assistant/help/script.py,sha256=
|
38
|
-
blue_assistant/help/web.py,sha256
|
37
|
+
blue_assistant/help/script.py,sha256=FmUZuGyVseFhytJxv7KW9Gr4gnm3U4ko9u6KSonjLvA,1128
|
38
|
+
blue_assistant/help/web.py,sha256=87Ck9ofnBGuAn-akuYIMUsGFxLgEi1z0WRh-HF1156c,924
|
39
39
|
blue_assistant/script/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
40
|
-
blue_assistant/script/__main__.py,sha256=
|
40
|
+
blue_assistant/script/__main__.py,sha256=7dHvHkfQCxRY0XsG2NGtDDT7h_mEJ6mvCOs7GeeTzxE,1798
|
41
41
|
blue_assistant/script/load.py,sha256=JsDY9T3HTM9vXngvKsA0Mt_erxAnRR_jI62-JhrOBMU,831
|
42
|
-
blue_assistant/script/actions/__init__.py,sha256=
|
43
|
-
blue_assistant/script/actions/generate_image.py,sha256=
|
44
|
-
blue_assistant/script/actions/generate_text.py,sha256=
|
45
|
-
blue_assistant/script/actions/generic.py,sha256=
|
42
|
+
blue_assistant/script/actions/__init__.py,sha256=GJJCaXSrfhwW9K5A2PHmrS7iGl-Ss9UUeYHUtxni4k0,576
|
43
|
+
blue_assistant/script/actions/generate_image.py,sha256=brg3u6e-cZvBLK8B7UAh-CXgmAwyvSN-jE00EXMSF3A,1357
|
44
|
+
blue_assistant/script/actions/generate_text.py,sha256=GQ7sF1J-vBNgr-h01RZHMDQJOGDXx8PzQWEVNQwW_TU,1910
|
45
|
+
blue_assistant/script/actions/generic.py,sha256=EjJkDj82ZFFQbjn-Uib-2Qs-nZG8kR2NzhsEVuOpgWQ,376
|
46
|
+
blue_assistant/script/actions/web_crawl.py,sha256=-EYbpXHbqPzKj-5AiDiXNgYIyUn-qv3mFz6RxhgBovo,1523
|
46
47
|
blue_assistant/script/repository/__init__.py,sha256=zVI3cubRqM9H6WgF0EUP9idILVLCumPFmJgKPM7iVlM,604
|
47
48
|
blue_assistant/script/repository/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
48
|
-
blue_assistant/script/repository/base/classes.py,sha256
|
49
|
+
blue_assistant/script/repository/base/classes.py,sha256=gM8OB1iKRKLa_dk7esAogAR9WaNZFgLhXkpd1iTtsuQ,5376
|
49
50
|
blue_assistant/script/repository/blue_amo/__init__.py,sha256=WjL9GIlN-DBnbUMJ8O_FxTp0rcVGlsIS3H9YtXEefTk,76
|
50
|
-
blue_assistant/script/repository/blue_amo/classes.py,sha256=
|
51
|
+
blue_assistant/script/repository/blue_amo/classes.py,sha256=8cOBClcLHXHKZK9HyOmN8GAB0lEdog93-CTfZwdavv8,2203
|
51
52
|
blue_assistant/script/repository/blue_amo/actions/__init__.py,sha256=je2S21KvYB3QkbABs71parwUh8MCh2mdlNZfLx_QuDg,430
|
52
|
-
blue_assistant/script/repository/blue_amo/actions/slicing_into_frames.py,sha256=
|
53
|
-
blue_assistant/script/repository/blue_amo/actions/stitching_the_frames.py,sha256=
|
53
|
+
blue_assistant/script/repository/blue_amo/actions/slicing_into_frames.py,sha256=3PPdaJNbUyA0fvQ7Du6lFhRB5Uakax_CELPxhEuoGhI,1222
|
54
|
+
blue_assistant/script/repository/blue_amo/actions/stitching_the_frames.py,sha256=n_UWcUqanEI6nn8AM0dlyjSUssk3xUrFmx-lZWUjOZE,3643
|
54
55
|
blue_assistant/script/repository/generic/__init__.py,sha256=kLffGsQMQAFJTw6IZBE5eBxvshP1x9wwHHR4hsDJblo,75
|
55
|
-
blue_assistant/script/repository/generic/classes.py,sha256=
|
56
|
+
blue_assistant/script/repository/generic/classes.py,sha256=5pP7jAZ42r2t4GEilK8uxCYlgydKJctO_lgsNr0BScs,2739
|
56
57
|
blue_assistant/script/repository/hue/__init__.py,sha256=WjL9GIlN-DBnbUMJ8O_FxTp0rcVGlsIS3H9YtXEefTk,76
|
57
58
|
blue_assistant/script/repository/hue/__main__.py,sha256=jaesrONQsrpVdg8A7NzzT8xpsdXs5gmrywOTE_TWD6c,2321
|
58
59
|
blue_assistant/script/repository/hue/api.py,sha256=C3KzT_MG868gsznUXpwEbUleBjnJObWzZgzvN6wi3uo,4774
|
59
60
|
blue_assistant/script/repository/hue/classes.py,sha256=YhifmcuylnZuI0_BjBPmwrSbsO-BOHDHNJ0pSLIExiE,188
|
60
61
|
blue_assistant/script/repository/hue/colors.py,sha256=rUdtCroNAnzm1zUuVp8eVhvfIie1f7sd208ypsFAJ_w,625
|
61
62
|
blue_assistant/script/repository/orbital_data_explorer/__init__.py,sha256=yy5FtCeHlr9dRfqxw4QYWr7_yRjnQpwVyuAY2vLrh4Q,110
|
62
|
-
blue_assistant/script/repository/orbital_data_explorer/classes.py,sha256=
|
63
|
-
blue_assistant/
|
64
|
-
blue_assistant/
|
65
|
-
blue_assistant/web/
|
66
|
-
blue_assistant/web/
|
67
|
-
blue_assistant/web/functions.py,sha256=
|
68
|
-
blue_assistant-4.
|
69
|
-
blue_assistant-4.
|
70
|
-
blue_assistant-4.
|
71
|
-
blue_assistant-4.
|
72
|
-
blue_assistant-4.
|
63
|
+
blue_assistant/script/repository/orbital_data_explorer/classes.py,sha256=ixYd_FHWYtp8Sfd6AiZkIqePjoUlT9iLg7TvuxHIDzA,204
|
64
|
+
blue_assistant/web/__init__.py,sha256=70_JSpnfX1mLm8Xv3xHIujfr2FfGeHPRs6HraWDP1XA,114
|
65
|
+
blue_assistant/web/__main__.py,sha256=35RG-pqWEUcqNz0R59Efw4m00Azc46lu2r-42yX3Xqk,1663
|
66
|
+
blue_assistant/web/crawl.py,sha256=K9EXWxhKBt6I9eViqSdpcx-z2aGsroafi2cmVOhgbOE,2368
|
67
|
+
blue_assistant/web/fetch.py,sha256=b3EEMHoi-Tv2r2I5B2AVbDtHqrK0il42hvi2ZnBOKMY,1825
|
68
|
+
blue_assistant/web/functions.py,sha256=CHGPM8RF-JtZlSL2vE0NFSZJVXMMMrXs8biwFk_JsSA,1042
|
69
|
+
blue_assistant-4.273.1.dist-info/LICENSE,sha256=ogEPNDSH0_dhiv_lT3ifVIdgIzHAqNA_SemnxUfPBJk,7048
|
70
|
+
blue_assistant-4.273.1.dist-info/METADATA,sha256=W6oaQcpfGcBWJfqHPU7K3gsZbljDiw82Y3Tn1tCKIrI,5380
|
71
|
+
blue_assistant-4.273.1.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
|
72
|
+
blue_assistant-4.273.1.dist-info/top_level.txt,sha256=ud0BkBbdOVze13bNqHuhZj1rwCztaBtDf5ChEYzASOs,15
|
73
|
+
blue_assistant-4.273.1.dist-info/RECORD,,
|
@@ -1,11 +0,0 @@
|
|
1
|
-
from typing import Dict, Callable
|
2
|
-
|
3
|
-
from blue_assistant.script.repository.base.classes import BaseScript
|
4
|
-
from blue_assistant.script.repository.orbital_data_explorer.actions import (
|
5
|
-
researching_the_questions,
|
6
|
-
)
|
7
|
-
|
8
|
-
|
9
|
-
dict_of_actions: Dict[str, Callable[[BaseScript, str], bool]] = {
|
10
|
-
"researching_the_questions": researching_the_questions.researching_the_questions,
|
11
|
-
}
|
@@ -1,42 +0,0 @@
|
|
1
|
-
from typing import Dict
|
2
|
-
from blueness import module
|
3
|
-
from tqdm import tqdm
|
4
|
-
|
5
|
-
from openai_commands.text_generation import api
|
6
|
-
|
7
|
-
from blue_assistant import NAME
|
8
|
-
from blue_assistant.web.functions import crawl_list_of_urls
|
9
|
-
from blue_assistant.script.repository.base.classes import BaseScript
|
10
|
-
from blue_assistant.logger import logger
|
11
|
-
|
12
|
-
|
13
|
-
NAME = module.name(__file__, NAME)
|
14
|
-
|
15
|
-
|
16
|
-
def researching_the_questions(
|
17
|
-
script: BaseScript,
|
18
|
-
node_name: str,
|
19
|
-
) -> bool:
|
20
|
-
logger.info(f"{NAME}: ...")
|
21
|
-
|
22
|
-
visited_urls = crawl_list_of_urls(
|
23
|
-
seed_urls=script.vars["seed_urls"],
|
24
|
-
object_name=script.object_name,
|
25
|
-
max_iterations=script.nodes[node_name]["max_iterations"],
|
26
|
-
)
|
27
|
-
|
28
|
-
success, output, _ = api.generate_text(
|
29
|
-
prompt=script.nodes[node_name]["prompt"].replace(
|
30
|
-
":::input", " ".join([content for content in visited_urls.values()])
|
31
|
-
),
|
32
|
-
verbose=script.verbose,
|
33
|
-
)
|
34
|
-
if not success:
|
35
|
-
return success
|
36
|
-
|
37
|
-
logger.info(output)
|
38
|
-
|
39
|
-
script.nodes[node_name]["visited_urls"] = visited_urls
|
40
|
-
script.nodes[node_name]["output"] = output
|
41
|
-
|
42
|
-
return True
|
File without changes
|
File without changes
|
File without changes
|