blue-assistant 4.226.1__py3-none-any.whl → 4.268.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- blue_assistant/.abcli/alias.sh +2 -0
- blue_assistant/.abcli/tests/help.sh +4 -3
- blue_assistant/.abcli/tests/web_crawl.sh +3 -1
- blue_assistant/.abcli/tests/web_fetch.sh +13 -0
- blue_assistant/.abcli/web/fetch.sh +26 -0
- blue_assistant/README.py +28 -9
- blue_assistant/__init__.py +1 -1
- blue_assistant/help/web.py +22 -3
- blue_assistant/script/actions/__init__.py +3 -2
- blue_assistant/script/{repository/orbital_data_explorer/actions/researching_the_questions.py → actions/web_crawl.py} +22 -4
- blue_assistant/script/repository/base/classes.py +62 -8
- blue_assistant/script/repository/generic/classes.py +6 -3
- blue_assistant/script/repository/orbital_data_explorer/classes.py +0 -33
- blue_assistant/web/__main__.py +29 -11
- blue_assistant/web/{crawl.py → functions.py} +38 -23
- {blue_assistant-4.226.1.dist-info → blue_assistant-4.268.1.dist-info}/METADATA +19 -7
- {blue_assistant-4.226.1.dist-info → blue_assistant-4.268.1.dist-info}/RECORD +20 -19
- blue_assistant/script/repository/orbital_data_explorer/actions/__init__.py +0 -11
- {blue_assistant-4.226.1.dist-info → blue_assistant-4.268.1.dist-info}/LICENSE +0 -0
- {blue_assistant-4.226.1.dist-info → blue_assistant-4.268.1.dist-info}/WHEEL +0 -0
- {blue_assistant-4.226.1.dist-info → blue_assistant-4.268.1.dist-info}/top_level.txt +0 -0
blue_assistant/.abcli/alias.sh
CHANGED
@@ -23,15 +23,16 @@ function test_blue_assistant_help() {
|
|
23
23
|
"@assistant script list" \
|
24
24
|
"@assistant script run" \
|
25
25
|
\
|
26
|
-
"@assistant web" \
|
27
|
-
"@assistant web crawl" \
|
28
|
-
\
|
29
26
|
"@hue" \
|
30
27
|
"@hue create_user" \
|
31
28
|
"@hue list" \
|
32
29
|
"@hue set" \
|
33
30
|
"@hue test" \
|
34
31
|
\
|
32
|
+
"@web" \
|
33
|
+
"@web crawl" \
|
34
|
+
"@web fetch" \
|
35
|
+
\
|
35
36
|
"blue_assistant"; do
|
36
37
|
abcli_eval ,$options \
|
37
38
|
abcli_help $module
|
@@ -3,10 +3,12 @@
|
|
3
3
|
function test_blue_assistant_web_crawl() {
|
4
4
|
local options=$1
|
5
5
|
|
6
|
+
local object_name=test_blue_assistant_web_crawl-$(abcli_string_timestamp_short)
|
7
|
+
|
6
8
|
abcli_eval ,$options \
|
7
9
|
blue_assistant_web_crawl \
|
8
10
|
~upload \
|
9
11
|
https://ode.rsl.wustl.edu/+https://oderest.rsl.wustl.edu/ \
|
10
|
-
|
12
|
+
$object_name \
|
11
13
|
--max_iterations 3
|
12
14
|
}
|
@@ -0,0 +1,13 @@
|
|
1
|
+
#! /usr/bin/env bash
|
2
|
+
|
3
|
+
function test_blue_assistant_web_fetch() {
|
4
|
+
local options=$1
|
5
|
+
|
6
|
+
local object_name=test_blue_assistant_web_fetch-$(abcli_string_timestamp_short)
|
7
|
+
|
8
|
+
abcli_eval ,$options \
|
9
|
+
blue_assistant_web_fetch \
|
10
|
+
~upload \
|
11
|
+
https://ode.rsl.wustl.edu/ \
|
12
|
+
$object_name
|
13
|
+
}
|
@@ -0,0 +1,26 @@
|
|
1
|
+
#! /usr/bin/env bash
|
2
|
+
|
3
|
+
function blue_assistant_web_fetch() {
|
4
|
+
local options=$1
|
5
|
+
local do_dryrun=$(abcli_option_int "$options" dryrun 0)
|
6
|
+
local do_upload=$(abcli_option_int "$options" upload $(abcli_not $do_dryrun))
|
7
|
+
|
8
|
+
local url=${2:-void}
|
9
|
+
|
10
|
+
local object_name=$(abcli_clarify_object $3 web-fetch-$(abcli_string_timestamp_short))
|
11
|
+
|
12
|
+
abcli_log "fetching $url -> $object_name ..."
|
13
|
+
|
14
|
+
abcli_eval dryrun=$do_dryrun \
|
15
|
+
python3 -m blue_assistant.web \
|
16
|
+
fetch \
|
17
|
+
--url $url \
|
18
|
+
--object_name $object_name \
|
19
|
+
"${@:4}"
|
20
|
+
[[ $? -ne 0 ]] && return 1
|
21
|
+
|
22
|
+
[[ "$do_upload" == 1 ]] &&
|
23
|
+
abcli_upload - $object_name
|
24
|
+
|
25
|
+
return 0
|
26
|
+
}
|
blue_assistant/README.py
CHANGED
@@ -1,14 +1,28 @@
|
|
1
1
|
import os
|
2
2
|
|
3
|
+
from blue_options.help.functions import get_help
|
3
4
|
from blue_objects import file, README
|
4
5
|
|
5
6
|
from blue_assistant import NAME, VERSION, ICON, REPO_NAME
|
7
|
+
from blue_assistant.help.functions import help_functions
|
6
8
|
|
7
9
|
|
8
10
|
items = README.Items(
|
9
11
|
[
|
10
12
|
{
|
11
|
-
"name": "
|
13
|
+
"name": "orbital-data-explorer",
|
14
|
+
"url": "./blue_assistant/script/repository/orbital_data_explorer",
|
15
|
+
"marquee": "https://github.com/kamangir/assets/blob/main/blue-assistant/orbital-data-explorer.png?raw=true",
|
16
|
+
"description": "Access to the [Orbital Data Explorer](https://ode.rsl.wustl.edu/). 🔥",
|
17
|
+
},
|
18
|
+
{
|
19
|
+
"name": "🌀 blue script",
|
20
|
+
"marquee": "https://github.com/kamangir/assets/raw/main/blue-plugin/marquee.png?raw=true",
|
21
|
+
"description": "A minimal AI DAG interface.",
|
22
|
+
"url": "./blue_assistant/script/",
|
23
|
+
},
|
24
|
+
{
|
25
|
+
"name": "@hue",
|
12
26
|
"url": "./blue_assistant/script/repository/hue",
|
13
27
|
"marquee": "https://github.com/kamangir/assets/raw/main/blue-assistant/20250314_143702.jpg?raw=true",
|
14
28
|
"description": '"send a color command to the Hue LED lights in my apartment."',
|
@@ -17,13 +31,7 @@ items = README.Items(
|
|
17
31
|
"name": "blue-amo",
|
18
32
|
"url": "./blue_assistant/script/repository/blue_amo/README.md",
|
19
33
|
"marquee": "https://github.com/kamangir/assets/raw/main/blue-amo-2025-02-03-nswnx6/stitching_the_frames-2.png?raw=true",
|
20
|
-
"description": "
|
21
|
-
},
|
22
|
-
{
|
23
|
-
"name": "orbital-data-explorer",
|
24
|
-
"url": "./blue_assistant/script/repository/orbital_data_explorer/README.md",
|
25
|
-
"marquee": "https://github.com/kamangir/assets/blob/main/blue-assistant/orbital-data-explorer.png?raw=true",
|
26
|
-
"description": "Access to the [Orbital Data Explorer](https://ode.rsl.wustl.edu/), through AI. ⏸️",
|
34
|
+
"description": "Story development and visualization.",
|
27
35
|
},
|
28
36
|
]
|
29
37
|
)
|
@@ -39,6 +47,11 @@ def build():
|
|
39
47
|
NAME=NAME,
|
40
48
|
VERSION=VERSION,
|
41
49
|
REPO_NAME=REPO_NAME,
|
50
|
+
help_function=lambda tokens: get_help(
|
51
|
+
tokens,
|
52
|
+
help_functions,
|
53
|
+
mono=True,
|
54
|
+
),
|
42
55
|
)
|
43
56
|
for readme in [
|
44
57
|
{
|
@@ -47,8 +60,14 @@ def build():
|
|
47
60
|
"path": "..",
|
48
61
|
},
|
49
62
|
{"path": "script/repository/blue_amo"},
|
50
|
-
|
63
|
+
#
|
64
|
+
{"path": "script/repository/orbital_data_explorer/docs/round-1.md"},
|
65
|
+
{"path": "script/repository/orbital_data_explorer/docs"},
|
66
|
+
#
|
51
67
|
{"path": "script/repository/hue/docs/round-1.md"},
|
52
68
|
{"path": "script/repository/hue/docs"},
|
69
|
+
#
|
70
|
+
{"path": "script/"},
|
71
|
+
{"path": "web/"},
|
53
72
|
]
|
54
73
|
)
|
blue_assistant/__init__.py
CHANGED
blue_assistant/help/web.py
CHANGED
@@ -15,19 +15,38 @@ def help_crawl(
|
|
15
15
|
|
16
16
|
return show_usage(
|
17
17
|
[
|
18
|
-
"@
|
19
|
-
"web",
|
18
|
+
"@web",
|
20
19
|
"crawl",
|
21
20
|
f"[{options}]",
|
22
21
|
"<url-1>+<url-2>+<url-3>",
|
23
22
|
"[-|<object-name>]",
|
24
23
|
]
|
25
24
|
+ args,
|
26
|
-
"crawl the
|
25
|
+
"crawl the urls.",
|
26
|
+
mono=mono,
|
27
|
+
)
|
28
|
+
|
29
|
+
|
30
|
+
def help_fetch(
|
31
|
+
tokens: List[str],
|
32
|
+
mono: bool,
|
33
|
+
) -> str:
|
34
|
+
options = xtra("dryrun,~upload", mono=mono)
|
35
|
+
|
36
|
+
return show_usage(
|
37
|
+
[
|
38
|
+
"@web",
|
39
|
+
"fetch",
|
40
|
+
f"[{options}]",
|
41
|
+
"<url>",
|
42
|
+
"[-|<object-name>]",
|
43
|
+
],
|
44
|
+
"fetch <url>.",
|
27
45
|
mono=mono,
|
28
46
|
)
|
29
47
|
|
30
48
|
|
31
49
|
help_functions = {
|
32
50
|
"crawl": help_crawl,
|
51
|
+
"fetch": help_fetch,
|
33
52
|
}
|
@@ -1,14 +1,15 @@
|
|
1
|
-
from typing import Dict, Callable
|
1
|
+
from typing import Dict, Callable
|
2
2
|
|
3
3
|
from blue_assistant.script.repository.base.classes import BaseScript
|
4
4
|
from blue_assistant.script.actions.generic import generic_action
|
5
5
|
from blue_assistant.script.actions.generate_image import generate_image
|
6
6
|
from blue_assistant.script.actions.generate_text import generate_text
|
7
|
-
from blue_assistant.
|
7
|
+
from blue_assistant.script.actions.web_crawl import web_crawl
|
8
8
|
|
9
9
|
|
10
10
|
dict_of_actions: Dict[str, Callable[[BaseScript, str], bool]] = {
|
11
11
|
"generic": generic_action,
|
12
12
|
"generate_image": generate_image,
|
13
13
|
"generate_text": generate_text,
|
14
|
+
"web_crawl": web_crawl,
|
14
15
|
}
|
@@ -2,10 +2,11 @@ from typing import Dict
|
|
2
2
|
from blueness import module
|
3
3
|
from tqdm import tqdm
|
4
4
|
|
5
|
+
from blue_options.logger import log_list
|
5
6
|
from openai_commands.text_generation import api
|
6
7
|
|
7
8
|
from blue_assistant import NAME
|
8
|
-
from blue_assistant.web.
|
9
|
+
from blue_assistant.web.functions import crawl_list_of_urls
|
9
10
|
from blue_assistant.script.repository.base.classes import BaseScript
|
10
11
|
from blue_assistant.logger import logger
|
11
12
|
|
@@ -13,14 +14,31 @@ from blue_assistant.logger import logger
|
|
13
14
|
NAME = module.name(__file__, NAME)
|
14
15
|
|
15
16
|
|
16
|
-
def
|
17
|
+
def web_crawl(
|
17
18
|
script: BaseScript,
|
18
19
|
node_name: str,
|
19
20
|
) -> bool:
|
20
|
-
logger.info(f"{NAME}: ...")
|
21
|
+
logger.info(f"{NAME}: {script} @ {node_name} ...")
|
22
|
+
|
23
|
+
seed_url_var_name = script.nodes[node_name].get("seed_urls", "")
|
24
|
+
if not isinstance(seed_url_var_name, str):
|
25
|
+
logger.error(f"{node_name}: seed_urls must be a string.")
|
26
|
+
return False
|
27
|
+
# to allow both :::<var-name> and <var-name> - for convenience :)
|
28
|
+
if seed_url_var_name.startswith(":::"):
|
29
|
+
seed_url_var_name = seed_url_var_name[3:].strip()
|
30
|
+
if not seed_url_var_name:
|
31
|
+
logger.error(f"{node_name}: seed_urls not found.")
|
32
|
+
return False
|
33
|
+
if seed_url_var_name not in script.vars:
|
34
|
+
logger.error(f"{node_name}: {seed_url_var_name}: seed_urls not found in vars.")
|
35
|
+
return False
|
36
|
+
|
37
|
+
seed_urls = script.vars[seed_url_var_name]
|
38
|
+
log_list(logger, seed_urls, "seed url(s)")
|
21
39
|
|
22
40
|
visited_urls = crawl_list_of_urls(
|
23
|
-
seed_urls=
|
41
|
+
seed_urls=seed_urls,
|
24
42
|
object_name=script.object_name,
|
25
43
|
max_iterations=script.nodes[node_name]["max_iterations"],
|
26
44
|
)
|
@@ -27,8 +27,6 @@ class BaseScript:
|
|
27
27
|
self.object_name = object_name
|
28
28
|
|
29
29
|
self.test_mode = test_mode
|
30
|
-
if self.test_mode:
|
31
|
-
logger.info("💰 test mode is on.")
|
32
30
|
|
33
31
|
self.verbose = verbose
|
34
32
|
|
@@ -41,14 +39,70 @@ class BaseScript:
|
|
41
39
|
success, self.metadata = file.load_yaml(metadata_filename)
|
42
40
|
assert success, f"cannot load {self.name}/metadata.yaml"
|
43
41
|
|
44
|
-
|
42
|
+
self.metadata.setdefault("script", {})
|
43
|
+
assert isinstance(
|
44
|
+
self.script,
|
45
|
+
dict,
|
46
|
+
), "script: expected dict, received {}.".format(
|
47
|
+
self.script.__class__.__name__,
|
48
|
+
)
|
49
|
+
|
50
|
+
self.script.setdefault("nodes", {})
|
51
|
+
assert isinstance(
|
52
|
+
self.nodes,
|
53
|
+
dict,
|
54
|
+
), "nodes: expected dict, received {}.".format(
|
55
|
+
self.nodes.__class__.__name__,
|
56
|
+
)
|
57
|
+
|
58
|
+
self.script.setdefault("vars", {})
|
59
|
+
assert isinstance(
|
60
|
+
self.vars,
|
61
|
+
dict,
|
62
|
+
), "vars: expected dict, received {}.".format(
|
63
|
+
self.vars.__class__.__name__,
|
64
|
+
)
|
65
|
+
|
66
|
+
if self.test_mode:
|
67
|
+
logger.info("🧪 test mode is on.")
|
68
|
+
|
69
|
+
for node_name, node in self.nodes.items():
|
70
|
+
if "test_mode" in self.script:
|
71
|
+
updates = self.script["test_mode"]
|
72
|
+
logger.info(f"🧪 vars.update({updates})")
|
73
|
+
self.vars.update(updates)
|
74
|
+
|
75
|
+
if "test_mode" in node:
|
76
|
+
updates = node["test_mode"]
|
77
|
+
logger.info(f"🧪 {node_name}.update({updates})")
|
78
|
+
node.update(updates)
|
45
79
|
|
46
|
-
logger.info(
|
80
|
+
logger.info(
|
81
|
+
"loaded {} node(s): {}".format(
|
82
|
+
len(self.nodes),
|
83
|
+
", ".join(self.nodes.keys()),
|
84
|
+
)
|
85
|
+
)
|
86
|
+
|
87
|
+
logger.info(
|
88
|
+
"loaded {} var(s){}".format(
|
89
|
+
len(self.vars),
|
90
|
+
"" if verbose else ": {}".format(", ".join(self.vars.keys())),
|
91
|
+
)
|
92
|
+
)
|
47
93
|
if verbose:
|
48
94
|
for var_name, var_value in self.vars.items():
|
49
95
|
logger.info("{}: {}".format(var_name, var_value))
|
50
96
|
|
51
|
-
assert self.generate_graph(), "cannot generate graph"
|
97
|
+
assert self.generate_graph(), "cannot generate graph."
|
98
|
+
|
99
|
+
def __str__(self) -> str:
|
100
|
+
return "{}[{} var(s), {} node(s) -> {}]".format(
|
101
|
+
self.__class__.__name__,
|
102
|
+
len(self.vars),
|
103
|
+
len(self.nodes),
|
104
|
+
self.object_name,
|
105
|
+
)
|
52
106
|
|
53
107
|
def apply_vars(self, text: str) -> str:
|
54
108
|
for var_name, var_value in self.vars.items():
|
@@ -126,12 +180,12 @@ class BaseScript:
|
|
126
180
|
# Aliases
|
127
181
|
@property
|
128
182
|
def script(self) -> Dict:
|
129
|
-
return self.metadata
|
183
|
+
return self.metadata["script"]
|
130
184
|
|
131
185
|
@property
|
132
186
|
def nodes(self) -> Dict[str, Dict]:
|
133
|
-
return self.metadata
|
187
|
+
return self.metadata["script"]["nodes"]
|
134
188
|
|
135
189
|
@property
|
136
190
|
def vars(self) -> Dict:
|
137
|
-
return self.metadata
|
191
|
+
return self.metadata["script"]["vars"]
|
@@ -35,9 +35,7 @@ class GenericScript(BaseScript):
|
|
35
35
|
node_name=node_name,
|
36
36
|
)
|
37
37
|
|
38
|
-
def run(
|
39
|
-
self,
|
40
|
-
) -> bool:
|
38
|
+
def run(self) -> bool:
|
41
39
|
if not super().run():
|
42
40
|
return False
|
43
41
|
|
@@ -50,6 +48,11 @@ class GenericScript(BaseScript):
|
|
50
48
|
if self.nodes[node_name].get("completed", False):
|
51
49
|
continue
|
52
50
|
|
51
|
+
if not self.nodes[node_name].get("runnable", True):
|
52
|
+
logger.info(f"Not runnable, skipped: {node_name}.")
|
53
|
+
self.nodes[node_name]["completed"] = True
|
54
|
+
continue
|
55
|
+
|
53
56
|
pending_dependencies = [
|
54
57
|
node_name_
|
55
58
|
for node_name_ in self.G.successors(node_name)
|
@@ -1,40 +1,7 @@
|
|
1
1
|
from blue_objects import file, path
|
2
2
|
|
3
3
|
from blue_assistant.script.repository.generic.classes import GenericScript
|
4
|
-
from blue_assistant.script.repository.orbital_data_explorer.actions import (
|
5
|
-
dict_of_actions,
|
6
|
-
)
|
7
4
|
|
8
5
|
|
9
6
|
class OrbitalDataExplorerScript(GenericScript):
|
10
7
|
name = path.name(file.path(__file__))
|
11
|
-
|
12
|
-
def __init__(
|
13
|
-
self,
|
14
|
-
object_name: str,
|
15
|
-
test_mode: bool = False,
|
16
|
-
verbose: bool = False,
|
17
|
-
):
|
18
|
-
super().__init__(
|
19
|
-
object_name=object_name,
|
20
|
-
test_mode=test_mode,
|
21
|
-
verbose=verbose,
|
22
|
-
)
|
23
|
-
|
24
|
-
if self.test_mode:
|
25
|
-
self.nodes["researching_the_questions"]["max_iterations"] = 3
|
26
|
-
|
27
|
-
def perform_action(
|
28
|
-
self,
|
29
|
-
node_name: str,
|
30
|
-
) -> bool:
|
31
|
-
if not super().perform_action(node_name=node_name):
|
32
|
-
return False
|
33
|
-
|
34
|
-
if node_name in dict_of_actions:
|
35
|
-
return dict_of_actions[node_name](
|
36
|
-
script=self,
|
37
|
-
node_name=node_name,
|
38
|
-
)
|
39
|
-
|
40
|
-
return True
|
blue_assistant/web/__main__.py
CHANGED
@@ -2,9 +2,11 @@ import argparse
|
|
2
2
|
|
3
3
|
from blueness import module
|
4
4
|
from blueness.argparse.generic import sys_exit
|
5
|
+
from blue_options.logger import log_dict
|
6
|
+
from blue_objects.metadata import post_to_object
|
5
7
|
|
6
8
|
from blue_assistant import NAME
|
7
|
-
from blue_assistant.web.
|
9
|
+
from blue_assistant.web.functions import crawl_list_of_urls, fetch_links_and_text
|
8
10
|
from blue_assistant.logger import logger
|
9
11
|
|
10
12
|
NAME = module.name(__file__, NAME)
|
@@ -13,7 +15,7 @@ parser = argparse.ArgumentParser(NAME)
|
|
13
15
|
parser.add_argument(
|
14
16
|
"task",
|
15
17
|
type=str,
|
16
|
-
help="crawl",
|
18
|
+
help="crawl | fetch",
|
17
19
|
)
|
18
20
|
parser.add_argument(
|
19
21
|
"--max_iterations",
|
@@ -26,6 +28,10 @@ parser.add_argument(
|
|
26
28
|
default=0,
|
27
29
|
help="0 | 1",
|
28
30
|
)
|
31
|
+
parser.add_argument(
|
32
|
+
"--url",
|
33
|
+
type=str,
|
34
|
+
)
|
29
35
|
parser.add_argument(
|
30
36
|
"--seed_urls",
|
31
37
|
type=str,
|
@@ -38,22 +44,34 @@ args = parser.parse_args()
|
|
38
44
|
|
39
45
|
success = False
|
40
46
|
if args.task == "crawl":
|
41
|
-
|
42
|
-
|
43
|
-
output = crawl_list_of_urls(
|
47
|
+
dict_of_urls = crawl_list_of_urls(
|
44
48
|
seed_urls=args.seed_urls.split("+"),
|
45
49
|
object_name=args.object_name,
|
46
50
|
max_iterations=args.max_iterations,
|
47
51
|
)
|
48
52
|
|
49
53
|
if args.verbose == 1:
|
50
|
-
logger
|
51
|
-
for index, (url, content) in enumerate(output.items()):
|
52
|
-
logger.info(f"#{index: 4} - {url}: {content[:200]}...\n")
|
53
|
-
if index > 10:
|
54
|
-
logger.info("...")
|
55
|
-
break
|
54
|
+
log_dict(logger, dict_of_urls, "url(s)")
|
56
55
|
|
56
|
+
success = post_to_object(
|
57
|
+
args.object_name,
|
58
|
+
NAME.replace(".", "-"),
|
59
|
+
dict_of_urls,
|
60
|
+
)
|
61
|
+
elif args.task == "fetch":
|
62
|
+
links, text = fetch_links_and_text(
|
63
|
+
url=args.url,
|
64
|
+
verbose=True,
|
65
|
+
)
|
66
|
+
|
67
|
+
success = post_to_object(
|
68
|
+
args.object_name,
|
69
|
+
NAME.replace(".", "-"),
|
70
|
+
{
|
71
|
+
"links": list(links),
|
72
|
+
"text": text,
|
73
|
+
},
|
74
|
+
)
|
57
75
|
else:
|
58
76
|
success = None
|
59
77
|
|
@@ -1,18 +1,25 @@
|
|
1
|
-
from typing import List, Dict, Set
|
1
|
+
from typing import List, Dict, Set, Tuple
|
2
2
|
import requests
|
3
|
-
from bs4 import BeautifulSoup
|
4
|
-
from urllib.parse import urljoin
|
3
|
+
from bs4 import BeautifulSoup, XMLParsedAsHTMLWarning
|
4
|
+
from urllib.parse import urljoin
|
5
|
+
import re
|
6
|
+
import warnings
|
5
7
|
|
6
8
|
from blueness import module
|
7
|
-
|
9
|
+
from blue_options.logger import log_long_text, log_list
|
8
10
|
|
9
11
|
from blue_assistant import NAME
|
10
12
|
from blue_assistant.logger import logger
|
11
13
|
|
14
|
+
warnings.filterwarnings("ignore", category=XMLParsedAsHTMLWarning)
|
15
|
+
|
12
16
|
NAME = module.name(__file__, NAME)
|
13
17
|
|
14
18
|
|
15
|
-
def
|
19
|
+
def fetch_links_and_text(
|
20
|
+
url: str,
|
21
|
+
verbose: bool = False,
|
22
|
+
) -> Tuple[List[str], str]:
|
16
23
|
try:
|
17
24
|
response = requests.get(url, timeout=5)
|
18
25
|
response.raise_for_status()
|
@@ -23,17 +30,27 @@ def fetch_links_and_content(url, base_url, original_path):
|
|
23
30
|
links = set()
|
24
31
|
|
25
32
|
for a_tag in soup.find_all("a", href=True):
|
26
|
-
|
27
|
-
|
33
|
+
a_url = urljoin(url, a_tag["href"])
|
34
|
+
|
35
|
+
if a_url.startswith(url):
|
36
|
+
logger.info(f"+= {a_url}")
|
37
|
+
links.add(a_url)
|
38
|
+
continue
|
28
39
|
|
29
|
-
|
30
|
-
if parsed_url.netloc == urlparse(
|
31
|
-
base_url
|
32
|
-
).netloc and parsed_url.path.startswith(original_path):
|
33
|
-
links.add(full_url)
|
40
|
+
logger.info(f"ignored: {a_url}")
|
34
41
|
|
35
42
|
plain_text = soup.get_text(separator=" ", strip=True)
|
36
43
|
|
44
|
+
# remove non-ASCII characters
|
45
|
+
plain_text = re.sub(r"[^\x20-\x7E]+", "", plain_text)
|
46
|
+
for thing in ["\r", "\n", "\t"]:
|
47
|
+
plain_text = plain_text.replace(thing, " ")
|
48
|
+
plain_text = re.sub(r"\s+", " ", plain_text).strip()
|
49
|
+
|
50
|
+
if verbose:
|
51
|
+
log_list(logger, list(links), "link(s)")
|
52
|
+
log_long_text(logger, plain_text)
|
53
|
+
|
37
54
|
return links, plain_text
|
38
55
|
|
39
56
|
|
@@ -41,6 +58,7 @@ def crawl_list_of_urls(
|
|
41
58
|
seed_urls: List[str],
|
42
59
|
object_name: str,
|
43
60
|
max_iterations: int = 10,
|
61
|
+
verbose: bool = False,
|
44
62
|
) -> Dict[str, str]:
|
45
63
|
logger.info(
|
46
64
|
"{}.crawl_list_of_urls({}): {} -> {}".format(
|
@@ -53,23 +71,20 @@ def crawl_list_of_urls(
|
|
53
71
|
|
54
72
|
visited: Dict[str, str] = {}
|
55
73
|
queue: Set[str] = set(seed_urls)
|
56
|
-
base_url = urlparse(seed_urls[0]).scheme + "://" + urlparse(seed_urls[0]).netloc
|
57
|
-
original_path = (
|
58
|
-
urlparse(seed_urls[0]).path.rsplit("/", 1)[0] + "/"
|
59
|
-
) # Get base directory
|
60
74
|
|
61
75
|
iteration: int = 0
|
62
76
|
while queue:
|
63
|
-
|
64
|
-
if
|
77
|
+
url = queue.pop()
|
78
|
+
if url in visited:
|
65
79
|
continue
|
66
80
|
|
67
|
-
logger.info(f"🔗 {
|
68
|
-
|
69
|
-
|
81
|
+
logger.info(f"🔗 {url} ...")
|
82
|
+
url_links, url_text = fetch_links_and_text(
|
83
|
+
url=url,
|
84
|
+
verbose=verbose,
|
70
85
|
)
|
71
|
-
visited[
|
72
|
-
queue.update(
|
86
|
+
visited[url] = url_text
|
87
|
+
queue.update(url_links - visited.keys())
|
73
88
|
|
74
89
|
iteration += 1
|
75
90
|
if max_iterations != -1 and iteration >= max_iterations:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: blue_assistant
|
3
|
-
Version: 4.
|
3
|
+
Version: 4.268.1
|
4
4
|
Summary: 🧠 An AI Assistant.
|
5
5
|
Home-page: https://github.com/kamangir/blue-assistant
|
6
6
|
Author: Arash Abadpour (Kamangir)
|
@@ -56,10 +56,14 @@ graph LR
|
|
56
56
|
assistant_script_list["@assistant<br>script<br>list"]
|
57
57
|
assistant_script_run["@assistant<br>script<br>run -<br><script><br><object-name>"]
|
58
58
|
|
59
|
-
|
59
|
+
web_crawl["@web<br>crawl -<br><url-1>+<url-2><br><object-name>"]
|
60
|
+
|
61
|
+
web_fetch["@web<br>fetch -<br><url><br><object-name>"]
|
60
62
|
|
61
63
|
script["📜 script"]:::folder
|
62
64
|
url["🔗 url"]:::folder
|
65
|
+
url2["🔗 url"]:::folder
|
66
|
+
url3["🔗 url"]:::folder
|
63
67
|
object["📂 object"]:::folder
|
64
68
|
|
65
69
|
|
@@ -69,9 +73,13 @@ graph LR
|
|
69
73
|
object --> assistant_script_run
|
70
74
|
assistant_script_run --> object
|
71
75
|
|
72
|
-
url -->
|
73
|
-
|
76
|
+
url --> web_crawl
|
77
|
+
url2 --> web_crawl
|
78
|
+
web_crawl --> url3
|
79
|
+
web_crawl --> object
|
74
80
|
|
81
|
+
url --> web_fetch
|
82
|
+
web_fetch --> object
|
75
83
|
|
76
84
|
bridge_ip["🔗 bridge_ip"]:::folder
|
77
85
|
hue_username["🔗 hue_username"]:::folder
|
@@ -104,12 +112,16 @@ graph LR
|
|
104
112
|
|
105
113
|
| | |
|
106
114
|
| --- | --- |
|
107
|
-
| [`
|
108
|
-
| [`
|
115
|
+
| [`orbital-data-explorer`](https://raw.githubusercontent.com/kamangir/blue-assistant/main/blue_assistant/script/repository/orbital_data_explorer) [](https://raw.githubusercontent.com/kamangir/blue-assistant/main/blue_assistant/script/repository/orbital_data_explorer) Access to the [Orbital Data Explorer](https://ode.rsl.wustl.edu/). 🔥 | [`🌀 blue script`](https://raw.githubusercontent.com/kamangir/blue-assistant/main/blue_assistant/script/) [](https://raw.githubusercontent.com/kamangir/blue-assistant/main/blue_assistant/script/) A minimal AI DAG interface. |
|
116
|
+
| [`@hue`](https://raw.githubusercontent.com/kamangir/blue-assistant/main/blue_assistant/script/repository/hue) [](https://raw.githubusercontent.com/kamangir/blue-assistant/main/blue_assistant/script/repository/hue) "send a color command to the Hue LED lights in my apartment." | [`blue-amo`](https://raw.githubusercontent.com/kamangir/blue-assistant/main/blue_assistant/script/repository/blue_amo/README.md) [](https://raw.githubusercontent.com/kamangir/blue-assistant/main/blue_assistant/script/repository/blue_amo/README.md) Story development and visualization. |
|
117
|
+
|
118
|
+
---
|
119
|
+
|
120
|
+
Also home to [`@web`](https://raw.githubusercontent.com/kamangir/blue-assistant/main/blue_assistant/web/)
|
109
121
|
|
110
122
|
---
|
111
123
|
|
112
124
|
|
113
125
|
[](https://github.com/kamangir/blue-assistant/actions/workflows/pylint.yml) [](https://github.com/kamangir/blue-assistant/actions/workflows/pytest.yml) [](https://github.com/kamangir/blue-assistant/actions/workflows/bashtest.yml) [](https://pypi.org/project/blue-assistant/) [](https://pypistats.org/packages/blue-assistant)
|
114
126
|
|
115
|
-
built by 🌀 [`blue_options-4.
|
127
|
+
built by 🌀 [`blue_options-4.236.1`](https://github.com/kamangir/awesome-bash-cli), based on 🧠 [`blue_assistant-4.268.1`](https://github.com/kamangir/blue-assistant).
|
@@ -1,5 +1,5 @@
|
|
1
|
-
blue_assistant/README.py,sha256=
|
2
|
-
blue_assistant/__init__.py,sha256=
|
1
|
+
blue_assistant/README.py,sha256=EJORj3I5pucJplI86lrFaZBN5C9IYNgKoG_V7h27NHw,2586
|
2
|
+
blue_assistant/__init__.py,sha256=5VPcpYS8pA9i9Mzr1cjbFQudWPy0Fa0eMv4UD4wYQJw,311
|
3
3
|
blue_assistant/__main__.py,sha256=URtal70XZc0--3FDTYWcLtnGOqBYjMX9gt-L1k8hDXI,361
|
4
4
|
blue_assistant/config.env,sha256=npodyuuhkZUHUv9FnEiQQZkKxFbg8nQb1YpOCURqV3Y,301
|
5
5
|
blue_assistant/env.py,sha256=FTSdJ8-J4jAyI0-h3MBgOweQBWd3YEFIibBHSXpClrY,760
|
@@ -10,7 +10,7 @@ blue_assistant/sample.env,sha256=rFfaN3lwiVm1CW28Pi0ZPwJPuu7_r3QWL54jIgHK_fY,20
|
|
10
10
|
blue_assistant/urls.py,sha256=59Op4CwgZeo1ZtFouisZxMk07zJNBOqlVAi8tXpsidM,20
|
11
11
|
blue_assistant/.abcli/abcli.sh,sha256=56ZicaXpbZ4zuaGPZJTEgfajokNUWTklzl38vENGzz0,198
|
12
12
|
blue_assistant/.abcli/actions.sh,sha256=vW1hNMuhjghvqib0775kDzDwqGnqPo3mqLTUkPCd8z4,236
|
13
|
-
blue_assistant/.abcli/alias.sh,sha256=
|
13
|
+
blue_assistant/.abcli/alias.sh,sha256=C9AzxECPt1FaBmJIt3JPR9BcSzkoZS1w_1gaInvcBdw,116
|
14
14
|
blue_assistant/.abcli/blue_assistant.sh,sha256=plLTQQerVmfb_SNlOkv0MEaQCF7YdsOHzCq0M3FWT4c,239
|
15
15
|
blue_assistant/.abcli/browse.sh,sha256=qZ_RK_WnsjmF-hfWKiMEOnnv22QtZh9HQ0VFJUbP6aI,294
|
16
16
|
blue_assistant/.abcli/hue.sh,sha256=avQT49SlA2ZPDvSdme1vWqDAYtAOHJQI8-3LdqXvBZc,362
|
@@ -22,49 +22,50 @@ blue_assistant/.abcli/hue/set.sh,sha256=VcADsfbjjbrxIMX9cVVHeK0MH649ZRY29V8YDTgf
|
|
22
22
|
blue_assistant/.abcli/script/list.sh,sha256=2lcVfqDfZP50NszF8o5YCo3TrJKeDc_qo7MTAF3XTGw,131
|
23
23
|
blue_assistant/.abcli/script/run.sh,sha256=kSXmyM9NUj2X2orSGyu5t_P5frG-gyumbRq-xqF692c,911
|
24
24
|
blue_assistant/.abcli/tests/README.sh,sha256=Qs0YUxVB1OZZ70Nqw2kT1LKXeUnC5-XfQRMfqb8Cbwg,152
|
25
|
-
blue_assistant/.abcli/tests/help.sh,sha256=
|
25
|
+
blue_assistant/.abcli/tests/help.sh,sha256=7AAZzCEo5vZ1cBAMfj4virDClabaUMdOV-NqXSJQVUM,918
|
26
26
|
blue_assistant/.abcli/tests/script_list.sh,sha256=OVOwWO9wR0eeDZTM6uub-eTKbz3eswU3vEUPWXcK-gQ,178
|
27
27
|
blue_assistant/.abcli/tests/script_run.sh,sha256=vfmK8sjkMfSQPwCacQppiL6inMbvQP7nci7qLppFSL0,769
|
28
28
|
blue_assistant/.abcli/tests/version.sh,sha256=oR2rvYR8zi-0VDPIdPJsmsmWwYaamT8dmNTqUh3-8Gw,154
|
29
|
-
blue_assistant/.abcli/tests/web_crawl.sh,sha256=
|
29
|
+
blue_assistant/.abcli/tests/web_crawl.sh,sha256=sz3LbpidWvjG7kQoWxQBtdBe5yntm14ylAUsgPJWhko,372
|
30
|
+
blue_assistant/.abcli/tests/web_fetch.sh,sha256=C8PFWlmRa9heNdP9yhshriCBKG1uUlps-oxhAM70AZI,312
|
30
31
|
blue_assistant/.abcli/web/crawl.sh,sha256=M9YoKKJBKZT2OtmFPvRCSSKpiAq0zyacRAVZ6s7i3FM,698
|
32
|
+
blue_assistant/.abcli/web/fetch.sh,sha256=9SggFZTtpff-gnCd987zP6UqzG4So5D4px2jMg2Vicc,674
|
31
33
|
blue_assistant/help/__init__.py,sha256=ajz1GSNU9xYVrFEDSz6Xwg7amWQ_yvW75tQa1ZvRIWc,3
|
32
34
|
blue_assistant/help/__main__.py,sha256=cVejR7OpoWPg0qLbm-PZf5TuJS27x49jzfiyCLyzEns,241
|
33
35
|
blue_assistant/help/functions.py,sha256=O85zVEMtnm32O7KB6W6uQRoFXnE_4dW5pwYZtMakYDg,865
|
34
36
|
blue_assistant/help/hue.py,sha256=ZElPG24ekiS7eIGLVrP2gB_womlGUuwln2cded4Li-c,2319
|
35
37
|
blue_assistant/help/script.py,sha256=tofv49tIBqoH8ed9hDCFHqzWaXmyyPofvqElk2n976w,1121
|
36
|
-
blue_assistant/help/web.py,sha256
|
38
|
+
blue_assistant/help/web.py,sha256=-vi1P3p6zKQFbQzg9qhUMsjtZBLTjdJ0PZh7RokE9Wc,908
|
37
39
|
blue_assistant/script/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
38
40
|
blue_assistant/script/__main__.py,sha256=eOSOo5yYTPMwIXZ0GkuWkmOcsDWrZtHvClyJizXSk2w,1657
|
39
41
|
blue_assistant/script/load.py,sha256=JsDY9T3HTM9vXngvKsA0Mt_erxAnRR_jI62-JhrOBMU,831
|
40
|
-
blue_assistant/script/actions/__init__.py,sha256=
|
42
|
+
blue_assistant/script/actions/__init__.py,sha256=GJJCaXSrfhwW9K5A2PHmrS7iGl-Ss9UUeYHUtxni4k0,576
|
41
43
|
blue_assistant/script/actions/generate_image.py,sha256=PgvOspDV8n2M7ZmgVOdZzJwQ1tnJNJ6V8gV94P74ksA,1336
|
42
44
|
blue_assistant/script/actions/generate_text.py,sha256=SSyII0QPqiD538hveaN3RSyEHOt_lJ7Q-AVUg6fmlaM,1889
|
43
45
|
blue_assistant/script/actions/generic.py,sha256=ET1RaKcUABM8HdIv8JecSpUFasYqmwHacL-5LjF-8NM,355
|
46
|
+
blue_assistant/script/actions/web_crawl.py,sha256=3dYt5nTZpTwXSoM30Ik5yS409kkWt4L5GYrsA9LlZN0,1872
|
44
47
|
blue_assistant/script/repository/__init__.py,sha256=zVI3cubRqM9H6WgF0EUP9idILVLCumPFmJgKPM7iVlM,604
|
45
48
|
blue_assistant/script/repository/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
46
|
-
blue_assistant/script/repository/base/classes.py,sha256
|
49
|
+
blue_assistant/script/repository/base/classes.py,sha256=JWUnXsFsh0sogOUqJSQSJuHbPHWzFNvTpEFEJ1mX0XI,5276
|
47
50
|
blue_assistant/script/repository/blue_amo/__init__.py,sha256=WjL9GIlN-DBnbUMJ8O_FxTp0rcVGlsIS3H9YtXEefTk,76
|
48
51
|
blue_assistant/script/repository/blue_amo/classes.py,sha256=Rl_UX67AlfhVEhv8tfgdWKItOxI3cUfiGsN12-k8sWI,2085
|
49
52
|
blue_assistant/script/repository/blue_amo/actions/__init__.py,sha256=je2S21KvYB3QkbABs71parwUh8MCh2mdlNZfLx_QuDg,430
|
50
53
|
blue_assistant/script/repository/blue_amo/actions/slicing_into_frames.py,sha256=79SI7_69FKKLeX_jHlfXnUWGtG4Sj7sBJOfeFwK8I9U,1201
|
51
54
|
blue_assistant/script/repository/blue_amo/actions/stitching_the_frames.py,sha256=mbXriat6deEAmuo5Y1ValySnUXDENR7TZS_3nVPlQ6M,3622
|
52
55
|
blue_assistant/script/repository/generic/__init__.py,sha256=kLffGsQMQAFJTw6IZBE5eBxvshP1x9wwHHR4hsDJblo,75
|
53
|
-
blue_assistant/script/repository/generic/classes.py,sha256=
|
56
|
+
blue_assistant/script/repository/generic/classes.py,sha256=0_AE8TcSvtew5oM9BuJGJoi8jzRqImCmAwSpbn4dHuQ,2535
|
54
57
|
blue_assistant/script/repository/hue/__init__.py,sha256=WjL9GIlN-DBnbUMJ8O_FxTp0rcVGlsIS3H9YtXEefTk,76
|
55
58
|
blue_assistant/script/repository/hue/__main__.py,sha256=jaesrONQsrpVdg8A7NzzT8xpsdXs5gmrywOTE_TWD6c,2321
|
56
59
|
blue_assistant/script/repository/hue/api.py,sha256=C3KzT_MG868gsznUXpwEbUleBjnJObWzZgzvN6wi3uo,4774
|
57
60
|
blue_assistant/script/repository/hue/classes.py,sha256=YhifmcuylnZuI0_BjBPmwrSbsO-BOHDHNJ0pSLIExiE,188
|
58
61
|
blue_assistant/script/repository/hue/colors.py,sha256=rUdtCroNAnzm1zUuVp8eVhvfIie1f7sd208ypsFAJ_w,625
|
59
62
|
blue_assistant/script/repository/orbital_data_explorer/__init__.py,sha256=yy5FtCeHlr9dRfqxw4QYWr7_yRjnQpwVyuAY2vLrh4Q,110
|
60
|
-
blue_assistant/script/repository/orbital_data_explorer/classes.py,sha256=
|
61
|
-
blue_assistant/script/repository/orbital_data_explorer/actions/__init__.py,sha256=RcrFUAwnvhuwNh3gC65w9G26vd_cIa7LV1lFvGFcigk,370
|
62
|
-
blue_assistant/script/repository/orbital_data_explorer/actions/researching_the_questions.py,sha256=MDhncDBCLH-T7nfHjlfiN_nKv6gsY4YmiNWguVvKq_g,1100
|
63
|
+
blue_assistant/script/repository/orbital_data_explorer/classes.py,sha256=ixYd_FHWYtp8Sfd6AiZkIqePjoUlT9iLg7TvuxHIDzA,204
|
63
64
|
blue_assistant/web/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
64
|
-
blue_assistant/web/__main__.py,sha256=
|
65
|
-
blue_assistant/web/
|
66
|
-
blue_assistant-4.
|
67
|
-
blue_assistant-4.
|
68
|
-
blue_assistant-4.
|
69
|
-
blue_assistant-4.
|
70
|
-
blue_assistant-4.
|
65
|
+
blue_assistant/web/__main__.py,sha256=f6uPh7LnvIuVtrC9kZGbCr8nDFEJju-GcUgdRVsQvQE,1613
|
66
|
+
blue_assistant/web/functions.py,sha256=AiezAvbw0eHG9XaYnXw8KQ_OjCfDvVPzsZxrzxwyPyg,2483
|
67
|
+
blue_assistant-4.268.1.dist-info/LICENSE,sha256=ogEPNDSH0_dhiv_lT3ifVIdgIzHAqNA_SemnxUfPBJk,7048
|
68
|
+
blue_assistant-4.268.1.dist-info/METADATA,sha256=Toip9qF5bXXpDE8PZprhRkVi5kmY7GR5rXztqK8xjtw,5380
|
69
|
+
blue_assistant-4.268.1.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
|
70
|
+
blue_assistant-4.268.1.dist-info/top_level.txt,sha256=ud0BkBbdOVze13bNqHuhZj1rwCztaBtDf5ChEYzASOs,15
|
71
|
+
blue_assistant-4.268.1.dist-info/RECORD,,
|
@@ -1,11 +0,0 @@
|
|
1
|
-
from typing import Dict, Callable
|
2
|
-
|
3
|
-
from blue_assistant.script.repository.base.classes import BaseScript
|
4
|
-
from blue_assistant.script.repository.orbital_data_explorer.actions import (
|
5
|
-
researching_the_questions,
|
6
|
-
)
|
7
|
-
|
8
|
-
|
9
|
-
dict_of_actions: Dict[str, Callable[[BaseScript, str], bool]] = {
|
10
|
-
"researching_the_questions": researching_the_questions.researching_the_questions,
|
11
|
-
}
|
File without changes
|
File without changes
|
File without changes
|