blue-assistant 4.283.1__tar.gz → 4.294.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {blue_assistant-4.283.1/blue_assistant.egg-info → blue_assistant-4.294.1}/PKG-INFO +2 -2
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/README.md +1 -1
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/.abcli/script/run.sh +1 -3
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/.abcli/web/crawl.sh +1 -3
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/__init__.py +1 -1
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/help/script.py +1 -1
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/help/web.py +1 -1
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/script/__main__.py +1 -9
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/script/actions/generate_image.py +0 -1
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/script/actions/generate_text.py +0 -1
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/script/actions/generic.py +0 -1
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/script/actions/web_crawl.py +4 -6
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/script/repository/base/classes.py +1 -3
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/script/repository/blue_amo/actions/slicing_into_frames.py +0 -1
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/script/repository/blue_amo/actions/stitching_the_frames.py +0 -1
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/script/repository/blue_amo/classes.py +0 -3
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/script/repository/generic/classes.py +1 -5
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/web/__main__.py +2 -10
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/web/crawl.py +49 -43
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/web/fetch.py +3 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1/blue_assistant.egg-info}/PKG-INFO +2 -2
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/LICENSE +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/MANIFEST.in +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/.abcli/abcli.sh +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/.abcli/actions.sh +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/.abcli/alias.sh +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/.abcli/blue_assistant.sh +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/.abcli/browse.sh +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/.abcli/hue/create_user.sh +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/.abcli/hue/list.sh +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/.abcli/hue/set.sh +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/.abcli/hue.sh +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/.abcli/script/list.sh +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/.abcli/script.sh +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/.abcli/tests/README.sh +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/.abcli/tests/help.sh +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/.abcli/tests/script_list.sh +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/.abcli/tests/script_run.sh +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/.abcli/tests/version.sh +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/.abcli/tests/web_crawl.sh +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/.abcli/tests/web_fetch.sh +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/.abcli/web/fetch.sh +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/.abcli/web.sh +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/README.py +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/__main__.py +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/config.env +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/env.py +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/functions.py +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/help/__init__.py +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/help/__main__.py +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/help/functions.py +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/help/hue.py +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/host.py +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/logger.py +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/sample.env +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/script/__init__.py +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/script/actions/__init__.py +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/script/load.py +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/script/repository/__init__.py +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/script/repository/base/__init__.py +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/script/repository/blue_amo/__init__.py +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/script/repository/blue_amo/actions/__init__.py +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/script/repository/generic/__init__.py +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/script/repository/hue/__init__.py +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/script/repository/hue/__main__.py +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/script/repository/hue/api.py +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/script/repository/hue/classes.py +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/script/repository/hue/colors.py +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/script/repository/orbital_data_explorer/__init__.py +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/script/repository/orbital_data_explorer/classes.py +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/urls.py +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/web/__init__.py +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/web/functions.py +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant.egg-info/SOURCES.txt +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant.egg-info/dependency_links.txt +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant.egg-info/requires.txt +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant.egg-info/top_level.txt +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/pyproject.toml +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/requirements.txt +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/setup.cfg +0 -0
- {blue_assistant-4.283.1 → blue_assistant-4.294.1}/setup.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: blue_assistant
|
3
|
-
Version: 4.
|
3
|
+
Version: 4.294.1
|
4
4
|
Summary: 🧠 An AI Assistant.
|
5
5
|
Home-page: https://github.com/kamangir/blue-assistant
|
6
6
|
Author: Arash Abadpour (Kamangir)
|
@@ -124,4 +124,4 @@ Also home to [`@web`](https://raw.githubusercontent.com/kamangir/blue-assistant/
|
|
124
124
|
|
125
125
|
[](https://github.com/kamangir/blue-assistant/actions/workflows/pylint.yml) [](https://github.com/kamangir/blue-assistant/actions/workflows/pytest.yml) [](https://github.com/kamangir/blue-assistant/actions/workflows/bashtest.yml) [](https://pypi.org/project/blue-assistant/) [](https://pypistats.org/packages/blue-assistant)
|
126
126
|
|
127
|
-
built by 🌀 [`blue_options-4.240.1`](https://github.com/kamangir/awesome-bash-cli), based on 🧠 [`blue_assistant-4.
|
127
|
+
built by 🌀 [`blue_options-4.240.1`](https://github.com/kamangir/awesome-bash-cli), based on 🧠 [`blue_assistant-4.294.1`](https://github.com/kamangir/blue-assistant).
|
@@ -79,4 +79,4 @@ Also home to [`@web`](./blue_assistant/web/)
|
|
79
79
|
|
80
80
|
[](https://github.com/kamangir/blue-assistant/actions/workflows/pylint.yml) [](https://github.com/kamangir/blue-assistant/actions/workflows/pytest.yml) [](https://github.com/kamangir/blue-assistant/actions/workflows/bashtest.yml) [](https://pypi.org/project/blue-assistant/) [](https://pypistats.org/packages/blue-assistant)
|
81
81
|
|
82
|
-
built by 🌀 [`blue_options-4.240.1`](https://github.com/kamangir/awesome-bash-cli), based on 🧠 [`blue_assistant-4.
|
82
|
+
built by 🌀 [`blue_options-4.240.1`](https://github.com/kamangir/awesome-bash-cli), based on 🧠 [`blue_assistant-4.294.1`](https://github.com/kamangir/blue-assistant).
|
@@ -3,8 +3,7 @@
|
|
3
3
|
function blue_assistant_script_run() {
|
4
4
|
local options=$1
|
5
5
|
local do_dryrun=$(abcli_option_int "$options" dryrun 0)
|
6
|
-
local
|
7
|
-
local do_download=$(abcli_option_int "$options" download $use_cache)
|
6
|
+
local do_download=$(abcli_option_int "$options" download $(abcli_not $do_dryrun))
|
8
7
|
local do_upload=$(abcli_option_int "$options" upload $(abcli_not $do_dryrun))
|
9
8
|
|
10
9
|
local script_options=$2
|
@@ -21,7 +20,6 @@ function blue_assistant_script_run() {
|
|
21
20
|
run \
|
22
21
|
--script_name $script_name \
|
23
22
|
--object_name $object_name \
|
24
|
-
--use_cache $use_cache \
|
25
23
|
"${@:4}"
|
26
24
|
[[ $? -ne 0 ]] && return 1
|
27
25
|
|
@@ -3,8 +3,7 @@
|
|
3
3
|
function blue_assistant_web_crawl() {
|
4
4
|
local options=$1
|
5
5
|
local do_dryrun=$(abcli_option_int "$options" dryrun 0)
|
6
|
-
local
|
7
|
-
local do_download=$(abcli_option_int "$options" download $use_cache)
|
6
|
+
local do_download=$(abcli_option_int "$options" download $(abcli_not $do_dryrun))
|
8
7
|
local do_upload=$(abcli_option_int "$options" upload $(abcli_not $do_dryrun))
|
9
8
|
|
10
9
|
local seed_urls=${2:-void}
|
@@ -20,7 +19,6 @@ function blue_assistant_web_crawl() {
|
|
20
19
|
crawl \
|
21
20
|
--seed_urls $seed_urls \
|
22
21
|
--object_name $object_name \
|
23
|
-
--use_cache $use_cache \
|
24
22
|
"${@:4}"
|
25
23
|
[[ $? -ne 0 ]] && return 1
|
26
24
|
|
@@ -48,12 +48,6 @@ parser.add_argument(
|
|
48
48
|
default=1,
|
49
49
|
help="0 | 1",
|
50
50
|
)
|
51
|
-
parser.add_argument(
|
52
|
-
"--use_cache",
|
53
|
-
type=int,
|
54
|
-
default=1,
|
55
|
-
help="0 | 1",
|
56
|
-
)
|
57
51
|
args = parser.parse_args()
|
58
52
|
|
59
53
|
delim = " " if args.delim == "space" else args.delim
|
@@ -76,9 +70,7 @@ elif args.task == "run":
|
|
76
70
|
)
|
77
71
|
|
78
72
|
if success:
|
79
|
-
success = script.run(
|
80
|
-
use_cache=args.use_cache == 1,
|
81
|
-
)
|
73
|
+
success = script.run()
|
82
74
|
else:
|
83
75
|
success = None
|
84
76
|
|
{blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/script/actions/web_crawl.py
RENAMED
@@ -14,7 +14,6 @@ NAME = module.name(__file__, NAME)
|
|
14
14
|
def web_crawl(
|
15
15
|
script: BaseScript,
|
16
16
|
node_name: str,
|
17
|
-
use_cache: bool,
|
18
17
|
) -> bool:
|
19
18
|
logger.info(f"{NAME}: {script} @ {node_name} ...")
|
20
19
|
|
@@ -36,14 +35,13 @@ def web_crawl(
|
|
36
35
|
seed_urls = script.vars[seed_url_var_name]
|
37
36
|
log_list(logger, "using", seed_urls, "seed url(s)")
|
38
37
|
|
39
|
-
|
38
|
+
success, _ = crawl_list_of_urls(
|
40
39
|
seed_urls=seed_urls,
|
41
40
|
object_name=script.object_name,
|
42
41
|
max_iterations=script.nodes[node_name]["max_iterations"],
|
43
|
-
|
42
|
+
cache_prefix=node_name,
|
44
43
|
)
|
45
44
|
|
46
|
-
script.nodes[node_name]["
|
47
|
-
script.nodes[node_name]["output"] = "TBA"
|
45
|
+
script.nodes[node_name]["output"] = success
|
48
46
|
|
49
|
-
return
|
47
|
+
return success
|
{blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/script/repository/base/classes.py
RENAMED
@@ -147,14 +147,12 @@ class BaseScript:
|
|
147
147
|
|
148
148
|
def run(
|
149
149
|
self,
|
150
|
-
use_cache: bool = True,
|
151
150
|
) -> bool:
|
152
151
|
logger.info(
|
153
|
-
"{}.run: {}:{}
|
152
|
+
"{}.run: {}:{} -> {}".format(
|
154
153
|
NAME,
|
155
154
|
self.__class__.__name__,
|
156
155
|
self.name,
|
157
|
-
"use-cache-" if use_cache else "",
|
158
156
|
self.object_name,
|
159
157
|
)
|
160
158
|
)
|
@@ -66,11 +66,9 @@ class BlueAmoScript(GenericScript):
|
|
66
66
|
def perform_action(
|
67
67
|
self,
|
68
68
|
node_name: str,
|
69
|
-
use_cache: bool,
|
70
69
|
) -> bool:
|
71
70
|
if not super().perform_action(
|
72
71
|
node_name=node_name,
|
73
|
-
use_cache=use_cache,
|
74
72
|
):
|
75
73
|
return False
|
76
74
|
|
@@ -78,7 +76,6 @@ class BlueAmoScript(GenericScript):
|
|
78
76
|
return dict_of_actions[node_name](
|
79
77
|
script=self,
|
80
78
|
node_name=node_name,
|
81
|
-
use_cache=use_cache,
|
82
79
|
)
|
83
80
|
|
84
81
|
return True
|
@@ -22,7 +22,6 @@ class GenericScript(BaseScript):
|
|
22
22
|
def perform_action(
|
23
23
|
self,
|
24
24
|
node_name: str,
|
25
|
-
use_cache: bool,
|
26
25
|
) -> bool:
|
27
26
|
action_name = self.nodes[node_name].get("action", "unknown")
|
28
27
|
logger.info(f"---- node: {node_name} ---- ")
|
@@ -34,14 +33,12 @@ class GenericScript(BaseScript):
|
|
34
33
|
return dict_of_actions[action_name](
|
35
34
|
script=self,
|
36
35
|
node_name=node_name,
|
37
|
-
use_cache=use_cache,
|
38
36
|
)
|
39
37
|
|
40
38
|
def run(
|
41
39
|
self,
|
42
|
-
use_cache: bool = True,
|
43
40
|
) -> bool:
|
44
|
-
if not super().run(
|
41
|
+
if not super().run():
|
45
42
|
return False
|
46
43
|
|
47
44
|
success: bool = True
|
@@ -75,7 +72,6 @@ class GenericScript(BaseScript):
|
|
75
72
|
|
76
73
|
if not self.perform_action(
|
77
74
|
node_name=node_name,
|
78
|
-
use_cache=use_cache,
|
79
75
|
):
|
80
76
|
success = False
|
81
77
|
break
|
@@ -40,26 +40,18 @@ parser.add_argument(
|
|
40
40
|
"--object_name",
|
41
41
|
type=str,
|
42
42
|
)
|
43
|
-
parser.add_argument(
|
44
|
-
"--use_cache",
|
45
|
-
type=int,
|
46
|
-
default=0,
|
47
|
-
help="0 | 1",
|
48
|
-
)
|
49
43
|
args = parser.parse_args()
|
50
44
|
|
51
45
|
success = False
|
52
46
|
if args.task == "crawl":
|
53
|
-
success =
|
54
|
-
dict_of_urls = crawl_list_of_urls(
|
47
|
+
success, crawl_cache = crawl_list_of_urls(
|
55
48
|
seed_urls=args.seed_urls.split("+"),
|
56
49
|
object_name=args.object_name,
|
57
50
|
max_iterations=args.max_iterations,
|
58
|
-
use_cache=args.use_cache == 1,
|
59
51
|
)
|
60
52
|
|
61
53
|
if args.verbose == 1:
|
62
|
-
log_dict(logger, "crawled",
|
54
|
+
log_dict(logger, "crawled", crawl_cache, "url(s)")
|
63
55
|
elif args.task == "fetch":
|
64
56
|
summary = fetch_links_and_text(
|
65
57
|
url=args.url,
|
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import List, Dict
|
1
|
+
from typing import List, Dict, Tuple
|
2
2
|
|
3
3
|
from blueness import module
|
4
4
|
from blue_options.logger import log_dict, log_list
|
@@ -18,46 +18,47 @@ def crawl_list_of_urls(
|
|
18
18
|
seed_urls: List[str],
|
19
19
|
object_name: str,
|
20
20
|
max_iterations: int = 10,
|
21
|
-
use_cache: bool = False,
|
22
21
|
verbose: bool = False,
|
23
|
-
|
22
|
+
cache_prefix: str = "",
|
23
|
+
) -> Tuple[bool, Dict[str, str]]:
|
24
24
|
logger.info(
|
25
|
-
"{}.crawl_list_of_urls({}): {}
|
25
|
+
"{}.crawl_list_of_urls({}): {} -> {}".format(
|
26
26
|
NAME,
|
27
27
|
len(seed_urls),
|
28
28
|
", ".join(seed_urls),
|
29
|
-
"use-cache-" if use_cache else "",
|
30
29
|
object_name,
|
31
30
|
)
|
32
31
|
)
|
33
32
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
)
|
43
|
-
log_dict(logger, "loaded cache:", crawl_cache, "url(s)")
|
44
|
-
|
45
|
-
queue += get_from_object(
|
46
|
-
object_name,
|
47
|
-
"crawl_queue",
|
48
|
-
[],
|
33
|
+
queue: List[str] = list(
|
34
|
+
set(
|
35
|
+
seed_urls
|
36
|
+
+ get_from_object(
|
37
|
+
object_name,
|
38
|
+
f"{cache_prefix}_crawl_queue",
|
39
|
+
[],
|
40
|
+
)
|
49
41
|
)
|
50
|
-
|
42
|
+
)
|
51
43
|
log_list(logger, "queue:", queue, "url(s)")
|
52
44
|
|
45
|
+
crawl_cache: Dict[str, str] = get_from_object(
|
46
|
+
object_name,
|
47
|
+
f"{cache_prefix}_crawl_cache",
|
48
|
+
{},
|
49
|
+
)
|
50
|
+
log_dict(logger, "loaded cache:", crawl_cache, "url(s)")
|
51
|
+
|
53
52
|
iteration: int = 0
|
54
53
|
while queue:
|
55
54
|
url = queue[0]
|
56
55
|
queue = queue[1:]
|
57
56
|
|
58
57
|
logger.info(
|
59
|
-
"{} {}
|
58
|
+
"{} [#{:,}/{:,}]: {} ".format(
|
60
59
|
"✅ " if url in crawl_cache else "🔗 ",
|
60
|
+
iteration,
|
61
|
+
len(queue),
|
61
62
|
url,
|
62
63
|
)
|
63
64
|
)
|
@@ -70,27 +71,31 @@ def crawl_list_of_urls(
|
|
70
71
|
)
|
71
72
|
content_type = url_summary.get("content_type", "unknown")
|
72
73
|
|
73
|
-
if
|
74
|
-
file.save_yaml(
|
74
|
+
if "html" in content_type:
|
75
|
+
if not file.save_yaml(
|
75
76
|
filename=objects.path_of(
|
76
77
|
object_name=object_name,
|
77
|
-
filename="
|
78
|
+
filename="{}_crawl_cache/{}.yaml".format(
|
79
|
+
cache_prefix,
|
80
|
+
url_to_filename(url),
|
81
|
+
),
|
78
82
|
),
|
79
83
|
data=url_summary,
|
80
|
-
)
|
84
|
+
):
|
85
|
+
return False, {}
|
81
86
|
|
82
87
|
crawl_cache[url] = content_type
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
88
|
+
|
89
|
+
queue = (
|
90
|
+
queue
|
91
|
+
+ url_summary.get("list_of_urls", [])
|
92
|
+
+ [
|
93
|
+
url
|
94
|
+
for url in url_summary.get("list_of_ignored_urls", [])
|
95
|
+
if any(url.startswith(url_prefix) for url_prefix in seed_urls)
|
96
|
+
]
|
97
|
+
)
|
98
|
+
queue = list({url for url in queue if url not in crawl_cache.keys()})
|
94
99
|
|
95
100
|
iteration += 1
|
96
101
|
if max_iterations != -1 and iteration >= max_iterations:
|
@@ -100,20 +105,21 @@ def crawl_list_of_urls(
|
|
100
105
|
if queue:
|
101
106
|
logger.warning(f"queue: {len(queue)}")
|
102
107
|
|
103
|
-
if
|
108
|
+
if not (
|
104
109
|
post_to_object(
|
105
110
|
object_name,
|
106
|
-
"
|
111
|
+
f"{cache_prefix}_crawl_cache",
|
107
112
|
crawl_cache,
|
108
113
|
)
|
109
|
-
|
110
|
-
post_to_object(
|
114
|
+
and post_to_object(
|
111
115
|
object_name,
|
112
|
-
"
|
116
|
+
f"{cache_prefix}_crawl_queue",
|
113
117
|
queue,
|
114
118
|
)
|
119
|
+
):
|
120
|
+
return False, {}
|
115
121
|
|
116
122
|
log_dict(logger, "crawled", crawl_cache, "url(s)")
|
117
123
|
log_list(logger, "queue:", queue, "url(s)")
|
118
124
|
|
119
|
-
return crawl_cache
|
125
|
+
return True, crawl_cache
|
@@ -49,6 +49,9 @@ def fetch_links_and_text(
|
|
49
49
|
for a_tag in soup.find_all("a", href=True):
|
50
50
|
a_url = urljoin(url, a_tag["href"])
|
51
51
|
|
52
|
+
if "#" in a_url:
|
53
|
+
a_url = a_url.split("#", 1)[0]
|
54
|
+
|
52
55
|
if a_url.startswith(url):
|
53
56
|
if url not in list_of_urls:
|
54
57
|
logger.info(f"+= {a_url}")
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: blue_assistant
|
3
|
-
Version: 4.
|
3
|
+
Version: 4.294.1
|
4
4
|
Summary: 🧠 An AI Assistant.
|
5
5
|
Home-page: https://github.com/kamangir/blue-assistant
|
6
6
|
Author: Arash Abadpour (Kamangir)
|
@@ -124,4 +124,4 @@ Also home to [`@web`](https://raw.githubusercontent.com/kamangir/blue-assistant/
|
|
124
124
|
|
125
125
|
[](https://github.com/kamangir/blue-assistant/actions/workflows/pylint.yml) [](https://github.com/kamangir/blue-assistant/actions/workflows/pytest.yml) [](https://github.com/kamangir/blue-assistant/actions/workflows/bashtest.yml) [](https://pypi.org/project/blue-assistant/) [](https://pypistats.org/packages/blue-assistant)
|
126
126
|
|
127
|
-
built by 🌀 [`blue_options-4.240.1`](https://github.com/kamangir/awesome-bash-cli), based on 🧠 [`blue_assistant-4.
|
127
|
+
built by 🌀 [`blue_options-4.240.1`](https://github.com/kamangir/awesome-bash-cli), based on 🧠 [`blue_assistant-4.294.1`](https://github.com/kamangir/blue-assistant).
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/.abcli/tests/script_list.sh
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/script/repository/__init__.py
RENAMED
File without changes
|
{blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/script/repository/base/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/script/repository/hue/__init__.py
RENAMED
File without changes
|
{blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/script/repository/hue/__main__.py
RENAMED
File without changes
|
{blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/script/repository/hue/api.py
RENAMED
File without changes
|
{blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/script/repository/hue/classes.py
RENAMED
File without changes
|
{blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant/script/repository/hue/colors.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{blue_assistant-4.283.1 → blue_assistant-4.294.1}/blue_assistant.egg-info/dependency_links.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|