blue-assistant 4.268.1__tar.gz → 4.283.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. {blue_assistant-4.268.1/blue_assistant.egg-info → blue_assistant-4.283.1}/PKG-INFO +2 -2
  2. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/README.md +1 -1
  3. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/.abcli/script/run.sh +3 -1
  4. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/.abcli/tests/script_run.sh +3 -1
  5. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/.abcli/web/crawl.sh +5 -0
  6. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/__init__.py +1 -1
  7. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/help/script.py +1 -1
  8. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/help/web.py +1 -1
  9. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/script/__main__.py +9 -1
  10. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/script/actions/generate_image.py +1 -0
  11. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/script/actions/generate_text.py +1 -0
  12. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/script/actions/generic.py +1 -0
  13. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/script/actions/web_crawl.py +10 -21
  14. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/script/repository/base/classes.py +6 -2
  15. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/script/repository/blue_amo/actions/slicing_into_frames.py +1 -0
  16. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/script/repository/blue_amo/actions/stitching_the_frames.py +1 -0
  17. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/script/repository/blue_amo/classes.py +6 -1
  18. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/script/repository/generic/classes.py +11 -3
  19. blue_assistant-4.283.1/blue_assistant/web/__init__.py +2 -0
  20. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/web/__main__.py +12 -13
  21. blue_assistant-4.283.1/blue_assistant/web/crawl.py +119 -0
  22. blue_assistant-4.283.1/blue_assistant/web/fetch.py +90 -0
  23. blue_assistant-4.283.1/blue_assistant/web/functions.py +24 -0
  24. {blue_assistant-4.268.1 → blue_assistant-4.283.1/blue_assistant.egg-info}/PKG-INFO +2 -2
  25. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant.egg-info/SOURCES.txt +2 -0
  26. blue_assistant-4.268.1/blue_assistant/web/__init__.py +0 -0
  27. blue_assistant-4.268.1/blue_assistant/web/functions.py +0 -97
  28. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/LICENSE +0 -0
  29. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/MANIFEST.in +0 -0
  30. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/.abcli/abcli.sh +0 -0
  31. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/.abcli/actions.sh +0 -0
  32. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/.abcli/alias.sh +0 -0
  33. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/.abcli/blue_assistant.sh +0 -0
  34. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/.abcli/browse.sh +0 -0
  35. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/.abcli/hue/create_user.sh +0 -0
  36. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/.abcli/hue/list.sh +0 -0
  37. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/.abcli/hue/set.sh +0 -0
  38. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/.abcli/hue.sh +0 -0
  39. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/.abcli/script/list.sh +0 -0
  40. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/.abcli/script.sh +0 -0
  41. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/.abcli/tests/README.sh +0 -0
  42. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/.abcli/tests/help.sh +0 -0
  43. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/.abcli/tests/script_list.sh +0 -0
  44. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/.abcli/tests/version.sh +0 -0
  45. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/.abcli/tests/web_crawl.sh +0 -0
  46. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/.abcli/tests/web_fetch.sh +0 -0
  47. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/.abcli/web/fetch.sh +0 -0
  48. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/.abcli/web.sh +0 -0
  49. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/README.py +0 -0
  50. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/__main__.py +0 -0
  51. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/config.env +0 -0
  52. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/env.py +0 -0
  53. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/functions.py +0 -0
  54. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/help/__init__.py +0 -0
  55. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/help/__main__.py +0 -0
  56. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/help/functions.py +0 -0
  57. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/help/hue.py +0 -0
  58. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/host.py +0 -0
  59. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/logger.py +0 -0
  60. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/sample.env +0 -0
  61. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/script/__init__.py +0 -0
  62. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/script/actions/__init__.py +0 -0
  63. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/script/load.py +0 -0
  64. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/script/repository/__init__.py +0 -0
  65. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/script/repository/base/__init__.py +0 -0
  66. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/script/repository/blue_amo/__init__.py +0 -0
  67. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/script/repository/blue_amo/actions/__init__.py +0 -0
  68. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/script/repository/generic/__init__.py +0 -0
  69. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/script/repository/hue/__init__.py +0 -0
  70. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/script/repository/hue/__main__.py +0 -0
  71. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/script/repository/hue/api.py +0 -0
  72. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/script/repository/hue/classes.py +0 -0
  73. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/script/repository/hue/colors.py +0 -0
  74. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/script/repository/orbital_data_explorer/__init__.py +0 -0
  75. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/script/repository/orbital_data_explorer/classes.py +0 -0
  76. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant/urls.py +0 -0
  77. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant.egg-info/dependency_links.txt +0 -0
  78. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant.egg-info/requires.txt +0 -0
  79. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/blue_assistant.egg-info/top_level.txt +0 -0
  80. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/pyproject.toml +0 -0
  81. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/requirements.txt +0 -0
  82. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/setup.cfg +0 -0
  83. {blue_assistant-4.268.1 → blue_assistant-4.283.1}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: blue_assistant
3
- Version: 4.268.1
3
+ Version: 4.283.1
4
4
  Summary: 🧠 An AI Assistant.
5
5
  Home-page: https://github.com/kamangir/blue-assistant
6
6
  Author: Arash Abadpour (Kamangir)
@@ -124,4 +124,4 @@ Also home to [`@web`](https://raw.githubusercontent.com/kamangir/blue-assistant/
124
124
 
125
125
  [![pylint](https://github.com/kamangir/blue-assistant/actions/workflows/pylint.yml/badge.svg)](https://github.com/kamangir/blue-assistant/actions/workflows/pylint.yml) [![pytest](https://github.com/kamangir/blue-assistant/actions/workflows/pytest.yml/badge.svg)](https://github.com/kamangir/blue-assistant/actions/workflows/pytest.yml) [![bashtest](https://github.com/kamangir/blue-assistant/actions/workflows/bashtest.yml/badge.svg)](https://github.com/kamangir/blue-assistant/actions/workflows/bashtest.yml) [![PyPI version](https://img.shields.io/pypi/v/blue-assistant.svg)](https://pypi.org/project/blue-assistant/) [![PyPI - Downloads](https://img.shields.io/pypi/dd/blue-assistant)](https://pypistats.org/packages/blue-assistant)
126
126
 
127
- built by 🌀 [`blue_options-4.236.1`](https://github.com/kamangir/awesome-bash-cli), based on 🧠 [`blue_assistant-4.268.1`](https://github.com/kamangir/blue-assistant).
127
+ built by 🌀 [`blue_options-4.240.1`](https://github.com/kamangir/awesome-bash-cli), based on 🧠 [`blue_assistant-4.283.1`](https://github.com/kamangir/blue-assistant).
@@ -79,4 +79,4 @@ Also home to [`@web`](./blue_assistant/web/)
79
79
 
80
80
  [![pylint](https://github.com/kamangir/blue-assistant/actions/workflows/pylint.yml/badge.svg)](https://github.com/kamangir/blue-assistant/actions/workflows/pylint.yml) [![pytest](https://github.com/kamangir/blue-assistant/actions/workflows/pytest.yml/badge.svg)](https://github.com/kamangir/blue-assistant/actions/workflows/pytest.yml) [![bashtest](https://github.com/kamangir/blue-assistant/actions/workflows/bashtest.yml/badge.svg)](https://github.com/kamangir/blue-assistant/actions/workflows/bashtest.yml) [![PyPI version](https://img.shields.io/pypi/v/blue-assistant.svg)](https://pypi.org/project/blue-assistant/) [![PyPI - Downloads](https://img.shields.io/pypi/dd/blue-assistant)](https://pypistats.org/packages/blue-assistant)
81
81
 
82
- built by 🌀 [`blue_options-4.236.1`](https://github.com/kamangir/awesome-bash-cli), based on 🧠 [`blue_assistant-4.268.1`](https://github.com/kamangir/blue-assistant).
82
+ built by 🌀 [`blue_options-4.240.1`](https://github.com/kamangir/awesome-bash-cli), based on 🧠 [`blue_assistant-4.283.1`](https://github.com/kamangir/blue-assistant).
@@ -3,7 +3,8 @@
3
3
  function blue_assistant_script_run() {
4
4
  local options=$1
5
5
  local do_dryrun=$(abcli_option_int "$options" dryrun 0)
6
- local do_download=$(abcli_option_int "$options" download 0)
6
+ local use_cache=$(abcli_option_int "$options" cache 1)
7
+ local do_download=$(abcli_option_int "$options" download $use_cache)
7
8
  local do_upload=$(abcli_option_int "$options" upload $(abcli_not $do_dryrun))
8
9
 
9
10
  local script_options=$2
@@ -20,6 +21,7 @@ function blue_assistant_script_run() {
20
21
  run \
21
22
  --script_name $script_name \
22
23
  --object_name $object_name \
24
+ --use_cache $use_cache \
23
25
  "${@:4}"
24
26
  [[ $? -ne 0 ]] && return 1
25
27
 
@@ -11,11 +11,13 @@ function test_blue_assistant_script_run() {
11
11
  for script_name in $(echo "$list_of_script_name" | tr + " "); do
12
12
  abcli_log "testing $script_name ..."
13
13
 
14
+ local object_name=test_blue_assistant_script_run-$(abcli_string_timestamp_short)
15
+
14
16
  abcli_eval ,$options \
15
17
  blue_assistant_script_run \
16
18
  ~upload,$options \
17
19
  script=$script_name \
18
- test_blue_assistant_script_run-$(abcli_string_timestamp_short) \
20
+ $object_name \
19
21
  "${@:2}" \
20
22
  --test_mode 1 \
21
23
  --verbose 1
@@ -3,11 +3,15 @@
3
3
  function blue_assistant_web_crawl() {
4
4
  local options=$1
5
5
  local do_dryrun=$(abcli_option_int "$options" dryrun 0)
6
+ local use_cache=$(abcli_option_int "$options" cache 0)
7
+ local do_download=$(abcli_option_int "$options" download $use_cache)
6
8
  local do_upload=$(abcli_option_int "$options" upload $(abcli_not $do_dryrun))
7
9
 
8
10
  local seed_urls=${2:-void}
9
11
 
10
12
  local object_name=$(abcli_clarify_object $3 web-crawl-$(abcli_string_timestamp_short))
13
+ [[ "$do_download" == 1 ]] &&
14
+ abcli_download - $object_name
11
15
 
12
16
  abcli_log "crawling $seed_urls -> $object_name ..."
13
17
 
@@ -16,6 +20,7 @@ function blue_assistant_web_crawl() {
16
20
  crawl \
17
21
  --seed_urls $seed_urls \
18
22
  --object_name $object_name \
23
+ --use_cache $use_cache \
19
24
  "${@:4}"
20
25
  [[ $? -ne 0 ]] && return 1
21
26
 
@@ -4,7 +4,7 @@ ICON = "🧠"
4
4
 
5
5
  DESCRIPTION = f"{ICON} An AI Assistant."
6
6
 
7
- VERSION = "4.268.1"
7
+ VERSION = "4.283.1"
8
8
 
9
9
  REPO_NAME = "blue-assistant"
10
10
 
@@ -30,7 +30,7 @@ def help_run(
30
30
  tokens: List[str],
31
31
  mono: bool,
32
32
  ) -> str:
33
- options = xtra("download,dryrun,~upload", mono=mono)
33
+ options = xtra("~cache,download,dryrun,~upload", mono=mono)
34
34
 
35
35
  script_options = "script=<script>"
36
36
 
@@ -7,7 +7,7 @@ def help_crawl(
7
7
  tokens: List[str],
8
8
  mono: bool,
9
9
  ) -> str:
10
- options = xtra("dryrun,~upload", mono=mono)
10
+ options = xtra("cache,~download,dryrun,~upload", mono=mono)
11
11
 
12
12
  args = [
13
13
  "[--max_iterations <100000>]",
@@ -48,6 +48,12 @@ parser.add_argument(
48
48
  default=1,
49
49
  help="0 | 1",
50
50
  )
51
+ parser.add_argument(
52
+ "--use_cache",
53
+ type=int,
54
+ default=1,
55
+ help="0 | 1",
56
+ )
51
57
  args = parser.parse_args()
52
58
 
53
59
  delim = " " if args.delim == "space" else args.delim
@@ -70,7 +76,9 @@ elif args.task == "run":
70
76
  )
71
77
 
72
78
  if success:
73
- success = script.run()
79
+ success = script.run(
80
+ use_cache=args.use_cache == 1,
81
+ )
74
82
  else:
75
83
  success = None
76
84
 
@@ -17,6 +17,7 @@ NAME = module.name(__file__, NAME)
17
17
  def generate_image(
18
18
  script: BaseScript,
19
19
  node_name: str,
20
+ use_cache: bool,
20
21
  ) -> bool:
21
22
  logger.info(f"{NAME}: {script} @ {node_name} ...")
22
23
 
@@ -18,6 +18,7 @@ NAME = module.name(__file__, NAME)
18
18
  def generate_text(
19
19
  script: BaseScript,
20
20
  node_name: str,
21
+ use_cache: bool,
21
22
  ) -> bool:
22
23
  logger.info(f"{NAME}: {script} @ {node_name} ...")
23
24
 
@@ -10,6 +10,7 @@ NAME = module.name(__file__, NAME)
10
10
  def generic_action(
11
11
  script: BaseScript,
12
12
  node_name: str,
13
+ use_cache: bool,
13
14
  ) -> bool:
14
15
  logger.info(f"{NAME}: {script} @ {node_name} ...")
15
16
  return True
@@ -1,12 +1,9 @@
1
- from typing import Dict
2
1
  from blueness import module
3
- from tqdm import tqdm
4
2
 
5
3
  from blue_options.logger import log_list
6
- from openai_commands.text_generation import api
7
4
 
8
5
  from blue_assistant import NAME
9
- from blue_assistant.web.functions import crawl_list_of_urls
6
+ from blue_assistant.web.crawl import crawl_list_of_urls
10
7
  from blue_assistant.script.repository.base.classes import BaseScript
11
8
  from blue_assistant.logger import logger
12
9
 
@@ -17,6 +14,7 @@ NAME = module.name(__file__, NAME)
17
14
  def web_crawl(
18
15
  script: BaseScript,
19
16
  node_name: str,
17
+ use_cache: bool,
20
18
  ) -> bool:
21
19
  logger.info(f"{NAME}: {script} @ {node_name} ...")
22
20
 
@@ -24,37 +22,28 @@ def web_crawl(
24
22
  if not isinstance(seed_url_var_name, str):
25
23
  logger.error(f"{node_name}: seed_urls must be a string.")
26
24
  return False
27
- # to allow both :::<var-name> and <var-name> - for convenience :)
28
- if seed_url_var_name.startswith(":::"):
29
- seed_url_var_name = seed_url_var_name[3:].strip()
30
25
  if not seed_url_var_name:
31
26
  logger.error(f"{node_name}: seed_urls not found.")
32
27
  return False
28
+
29
+ # to allow both :::<var-name> and <var-name> - for convenience :)
30
+ if seed_url_var_name.startswith(":::"):
31
+ seed_url_var_name = seed_url_var_name[3:].strip()
32
+
33
33
  if seed_url_var_name not in script.vars:
34
34
  logger.error(f"{node_name}: {seed_url_var_name}: seed_urls not found in vars.")
35
35
  return False
36
-
37
36
  seed_urls = script.vars[seed_url_var_name]
38
- log_list(logger, seed_urls, "seed url(s)")
37
+ log_list(logger, "using", seed_urls, "seed url(s)")
39
38
 
40
39
  visited_urls = crawl_list_of_urls(
41
40
  seed_urls=seed_urls,
42
41
  object_name=script.object_name,
43
42
  max_iterations=script.nodes[node_name]["max_iterations"],
43
+ use_cache=use_cache,
44
44
  )
45
45
 
46
- success, output, _ = api.generate_text(
47
- prompt=script.nodes[node_name]["prompt"].replace(
48
- ":::input", " ".join([content for content in visited_urls.values()])
49
- ),
50
- verbose=script.verbose,
51
- )
52
- if not success:
53
- return success
54
-
55
- logger.info(output)
56
-
57
46
  script.nodes[node_name]["visited_urls"] = visited_urls
58
- script.nodes[node_name]["output"] = output
47
+ script.nodes[node_name]["output"] = "TBA"
59
48
 
60
49
  return True
@@ -145,12 +145,16 @@ class BaseScript:
145
145
  [node_name],
146
146
  )
147
147
 
148
- def run(self) -> bool:
148
+ def run(
149
+ self,
150
+ use_cache: bool = True,
151
+ ) -> bool:
149
152
  logger.info(
150
- "{}.run: {}:{} -> {}".format(
153
+ "{}.run: {}:{} -{}> {}".format(
151
154
  NAME,
152
155
  self.__class__.__name__,
153
156
  self.name,
157
+ "use-cache-" if use_cache else "",
154
158
  self.object_name,
155
159
  )
156
160
  )
@@ -10,6 +10,7 @@ NAME = module.name(__file__, NAME)
10
10
  def slicing_into_frames(
11
11
  script: BaseScript,
12
12
  node_name: str,
13
+ use_cache: bool,
13
14
  ) -> bool:
14
15
  logger.info(f"{NAME}: processing the output...")
15
16
 
@@ -18,6 +18,7 @@ NAME = module.name(__file__, NAME)
18
18
  def stitching_the_frames(
19
19
  script: BaseScript,
20
20
  node_name: str,
21
+ use_cache: bool,
21
22
  ) -> bool:
22
23
  list_of_frames_filenames: List[str] = [
23
24
  filename
@@ -66,14 +66,19 @@ class BlueAmoScript(GenericScript):
66
66
  def perform_action(
67
67
  self,
68
68
  node_name: str,
69
+ use_cache: bool,
69
70
  ) -> bool:
70
- if not super().perform_action(node_name=node_name):
71
+ if not super().perform_action(
72
+ node_name=node_name,
73
+ use_cache=use_cache,
74
+ ):
71
75
  return False
72
76
 
73
77
  if node_name in dict_of_actions:
74
78
  return dict_of_actions[node_name](
75
79
  script=self,
76
80
  node_name=node_name,
81
+ use_cache=use_cache,
77
82
  )
78
83
 
79
84
  return True
@@ -22,6 +22,7 @@ class GenericScript(BaseScript):
22
22
  def perform_action(
23
23
  self,
24
24
  node_name: str,
25
+ use_cache: bool,
25
26
  ) -> bool:
26
27
  action_name = self.nodes[node_name].get("action", "unknown")
27
28
  logger.info(f"---- node: {node_name} ---- ")
@@ -33,10 +34,14 @@ class GenericScript(BaseScript):
33
34
  return dict_of_actions[action_name](
34
35
  script=self,
35
36
  node_name=node_name,
37
+ use_cache=use_cache,
36
38
  )
37
39
 
38
- def run(self) -> bool:
39
- if not super().run():
40
+ def run(
41
+ self,
42
+ use_cache: bool = True,
43
+ ) -> bool:
44
+ if not super().run(use_cache=use_cache):
40
45
  return False
41
46
 
42
47
  success: bool = True
@@ -68,7 +73,10 @@ class GenericScript(BaseScript):
68
73
  )
69
74
  continue
70
75
 
71
- if not self.perform_action(node_name=node_name):
76
+ if not self.perform_action(
77
+ node_name=node_name,
78
+ use_cache=use_cache,
79
+ ):
72
80
  success = False
73
81
  break
74
82
 
@@ -0,0 +1,2 @@
1
+ from blue_assistant.web.crawl import crawl_list_of_urls
2
+ from blue_assistant.web.fetch import fetch_links_and_text
@@ -6,7 +6,7 @@ from blue_options.logger import log_dict
6
6
  from blue_objects.metadata import post_to_object
7
7
 
8
8
  from blue_assistant import NAME
9
- from blue_assistant.web.functions import crawl_list_of_urls, fetch_links_and_text
9
+ from blue_assistant.web import crawl_list_of_urls, fetch_links_and_text
10
10
  from blue_assistant.logger import logger
11
11
 
12
12
  NAME = module.name(__file__, NAME)
@@ -40,26 +40,28 @@ parser.add_argument(
40
40
  "--object_name",
41
41
  type=str,
42
42
  )
43
+ parser.add_argument(
44
+ "--use_cache",
45
+ type=int,
46
+ default=0,
47
+ help="0 | 1",
48
+ )
43
49
  args = parser.parse_args()
44
50
 
45
51
  success = False
46
52
  if args.task == "crawl":
53
+ success = True
47
54
  dict_of_urls = crawl_list_of_urls(
48
55
  seed_urls=args.seed_urls.split("+"),
49
56
  object_name=args.object_name,
50
57
  max_iterations=args.max_iterations,
58
+ use_cache=args.use_cache == 1,
51
59
  )
52
60
 
53
61
  if args.verbose == 1:
54
- log_dict(logger, dict_of_urls, "url(s)")
55
-
56
- success = post_to_object(
57
- args.object_name,
58
- NAME.replace(".", "-"),
59
- dict_of_urls,
60
- )
62
+ log_dict(logger, "crawled", dict_of_urls, "url(s)")
61
63
  elif args.task == "fetch":
62
- links, text = fetch_links_and_text(
64
+ summary = fetch_links_and_text(
63
65
  url=args.url,
64
66
  verbose=True,
65
67
  )
@@ -67,10 +69,7 @@ elif args.task == "fetch":
67
69
  success = post_to_object(
68
70
  args.object_name,
69
71
  NAME.replace(".", "-"),
70
- {
71
- "links": list(links),
72
- "text": text,
73
- },
72
+ summary,
74
73
  )
75
74
  else:
76
75
  success = None
@@ -0,0 +1,119 @@
1
+ from typing import List, Dict
2
+
3
+ from blueness import module
4
+ from blue_options.logger import log_dict, log_list
5
+ from blue_objects import file
6
+ from blue_objects import objects
7
+ from blue_objects.metadata import get_from_object, post_to_object
8
+
9
+ from blue_assistant import NAME
10
+ from blue_assistant.web.fetch import fetch_links_and_text
11
+ from blue_assistant.web.functions import url_to_filename
12
+ from blue_assistant.logger import logger
13
+
14
+ NAME = module.name(__file__, NAME)
15
+
16
+
17
+ def crawl_list_of_urls(
18
+ seed_urls: List[str],
19
+ object_name: str,
20
+ max_iterations: int = 10,
21
+ use_cache: bool = False,
22
+ verbose: bool = False,
23
+ ) -> Dict[str, str]:
24
+ logger.info(
25
+ "{}.crawl_list_of_urls({}): {} -{}> {}".format(
26
+ NAME,
27
+ len(seed_urls),
28
+ ", ".join(seed_urls),
29
+ "use-cache-" if use_cache else "",
30
+ object_name,
31
+ )
32
+ )
33
+
34
+ crawl_cache: Dict[str, str] = {}
35
+ queue: List[str] = [url for url in seed_urls]
36
+
37
+ if use_cache:
38
+ crawl_cache = get_from_object(
39
+ object_name,
40
+ "crawl_cache",
41
+ {},
42
+ )
43
+ log_dict(logger, "loaded cache:", crawl_cache, "url(s)")
44
+
45
+ queue += get_from_object(
46
+ object_name,
47
+ "crawl_queue",
48
+ [],
49
+ )
50
+
51
+ log_list(logger, "queue:", queue, "url(s)")
52
+
53
+ iteration: int = 0
54
+ while queue:
55
+ url = queue[0]
56
+ queue = queue[1:]
57
+
58
+ logger.info(
59
+ "{} {} ...".format(
60
+ "✅ " if url in crawl_cache else "🔗 ",
61
+ url,
62
+ )
63
+ )
64
+ if url in crawl_cache:
65
+ continue
66
+
67
+ url_summary = fetch_links_and_text(
68
+ url=url,
69
+ verbose=verbose,
70
+ )
71
+ content_type = url_summary.get("content_type", "unknown")
72
+
73
+ if use_cache and "html" in content_type:
74
+ file.save_yaml(
75
+ filename=objects.path_of(
76
+ object_name=object_name,
77
+ filename="crawl_summary_cache/{}.yaml".format(url_to_filename(url)),
78
+ ),
79
+ data=url_summary,
80
+ )
81
+
82
+ crawl_cache[url] = content_type
83
+ if "list_of_urls" in url_summary:
84
+ queue = list(
85
+ set(
86
+ queue
87
+ + [
88
+ url
89
+ for url in url_summary["list_of_urls"]
90
+ if url not in crawl_cache.keys()
91
+ ]
92
+ )
93
+ )
94
+
95
+ iteration += 1
96
+ if max_iterations != -1 and iteration >= max_iterations:
97
+ logger.warning(f"max iteration of {max_iterations} reached.")
98
+ break
99
+
100
+ if queue:
101
+ logger.warning(f"queue: {len(queue)}")
102
+
103
+ if use_cache:
104
+ post_to_object(
105
+ object_name,
106
+ "crawl_cache",
107
+ crawl_cache,
108
+ )
109
+
110
+ post_to_object(
111
+ object_name,
112
+ "crawl_queue",
113
+ queue,
114
+ )
115
+
116
+ log_dict(logger, "crawled", crawl_cache, "url(s)")
117
+ log_list(logger, "queue:", queue, "url(s)")
118
+
119
+ return crawl_cache
@@ -0,0 +1,90 @@
1
+ from typing import Dict, Any, List
2
+ import requests
3
+ from bs4 import BeautifulSoup, XMLParsedAsHTMLWarning
4
+ from urllib.parse import urljoin
5
+ import re
6
+ import warnings
7
+
8
+ from blueness import module
9
+ from blue_options.logger import log_long_text, log_list
10
+
11
+ from blue_assistant import NAME
12
+ from blue_assistant.logger import logger
13
+
14
+ warnings.filterwarnings("ignore", category=XMLParsedAsHTMLWarning)
15
+
16
+ NAME = module.name(__file__, NAME)
17
+
18
+
19
+ def fetch_links_and_text(
20
+ url: str,
21
+ verbose: bool = False,
22
+ ) -> Dict[str, Any]:
23
+ try:
24
+ response = requests.get(url, timeout=5)
25
+ except Exception as e:
26
+ logger.warning(e)
27
+ return {}
28
+
29
+ if response.status_code != 200:
30
+ logger.error(response)
31
+ return {}
32
+
33
+ content_type = response.headers.get("Content-Type", "")
34
+ logger.info(f"content-type: {content_type}")
35
+
36
+ list_of_urls: List[str] = []
37
+ list_of_ignored_urls: List[str] = []
38
+ text = ""
39
+
40
+ if not any(
41
+ thing in content_type
42
+ for thing in [
43
+ "pdf",
44
+ "xml",
45
+ ]
46
+ ):
47
+ soup = BeautifulSoup(response.text, "html.parser")
48
+
49
+ for a_tag in soup.find_all("a", href=True):
50
+ a_url = urljoin(url, a_tag["href"])
51
+
52
+ if a_url.startswith(url):
53
+ if url not in list_of_urls:
54
+ logger.info(f"+= {a_url}")
55
+ list_of_urls += [a_url]
56
+ continue
57
+
58
+ if a_url not in list_of_ignored_urls:
59
+ list_of_ignored_urls += [a_url]
60
+ if verbose:
61
+ logger.info(f"ignored: {a_url}")
62
+
63
+ text = soup.get_text(separator=" ", strip=True)
64
+
65
+ # remove non-ASCII characters
66
+ text = re.sub(r"[^\x20-\x7E]+", "", text)
67
+ for thing in ["\r", "\n", "\t"]:
68
+ text = text.replace(thing, " ")
69
+ text = re.sub(r"\s+", " ", text).strip()
70
+
71
+ if verbose:
72
+ log_list(logger, "fetched", list_of_urls, "url(s)")
73
+ log_list(logger, "ignored", list_of_ignored_urls, "url(s)")
74
+ log_long_text(logger, text)
75
+ else:
76
+ logger.info(
77
+ "{} url(s) collected, {} url(s) ignored, text: {:,} char(s).".format(
78
+ len(list_of_urls),
79
+ len(list_of_ignored_urls),
80
+ len(text),
81
+ )
82
+ )
83
+
84
+ return {
85
+ "url": url,
86
+ "content_type": content_type,
87
+ "list_of_ignored_urls": list_of_ignored_urls,
88
+ "list_of_urls": list_of_urls,
89
+ "text": text,
90
+ }
@@ -0,0 +1,24 @@
1
+ import re
2
+
3
+
4
+ def url_to_filename(
5
+ url: str,
6
+ max_length: int = 255,
7
+ ) -> str:
8
+ # Remove the URL scheme (http://, https://)
9
+ filename = re.sub(r"^https?://", "", url)
10
+
11
+ # Replace unwanted characters with an underscore
12
+ filename = re.sub(r"[^\w\s-]", "_", filename)
13
+
14
+ # Replace slashes with a hyphen to preserve some structure
15
+ filename = re.sub(r"\/", "-", filename)
16
+
17
+ # Replace spaces with underscores
18
+ filename = filename.replace(" ", "_")
19
+
20
+ # Ensure the filename length is not too long
21
+ if len(filename) > max_length:
22
+ filename = filename[:max_length]
23
+
24
+ return filename
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: blue_assistant
3
- Version: 4.268.1
3
+ Version: 4.283.1
4
4
  Summary: 🧠 An AI Assistant.
5
5
  Home-page: https://github.com/kamangir/blue-assistant
6
6
  Author: Arash Abadpour (Kamangir)
@@ -124,4 +124,4 @@ Also home to [`@web`](https://raw.githubusercontent.com/kamangir/blue-assistant/
124
124
 
125
125
  [![pylint](https://github.com/kamangir/blue-assistant/actions/workflows/pylint.yml/badge.svg)](https://github.com/kamangir/blue-assistant/actions/workflows/pylint.yml) [![pytest](https://github.com/kamangir/blue-assistant/actions/workflows/pytest.yml/badge.svg)](https://github.com/kamangir/blue-assistant/actions/workflows/pytest.yml) [![bashtest](https://github.com/kamangir/blue-assistant/actions/workflows/bashtest.yml/badge.svg)](https://github.com/kamangir/blue-assistant/actions/workflows/bashtest.yml) [![PyPI version](https://img.shields.io/pypi/v/blue-assistant.svg)](https://pypi.org/project/blue-assistant/) [![PyPI - Downloads](https://img.shields.io/pypi/dd/blue-assistant)](https://pypistats.org/packages/blue-assistant)
126
126
 
127
- built by 🌀 [`blue_options-4.236.1`](https://github.com/kamangir/awesome-bash-cli), based on 🧠 [`blue_assistant-4.268.1`](https://github.com/kamangir/blue-assistant).
127
+ built by 🌀 [`blue_options-4.240.1`](https://github.com/kamangir/awesome-bash-cli), based on 🧠 [`blue_assistant-4.283.1`](https://github.com/kamangir/blue-assistant).
@@ -74,4 +74,6 @@ blue_assistant/script/repository/orbital_data_explorer/__init__.py
74
74
  blue_assistant/script/repository/orbital_data_explorer/classes.py
75
75
  blue_assistant/web/__init__.py
76
76
  blue_assistant/web/__main__.py
77
+ blue_assistant/web/crawl.py
78
+ blue_assistant/web/fetch.py
77
79
  blue_assistant/web/functions.py
File without changes
@@ -1,97 +0,0 @@
1
- from typing import List, Dict, Set, Tuple
2
- import requests
3
- from bs4 import BeautifulSoup, XMLParsedAsHTMLWarning
4
- from urllib.parse import urljoin
5
- import re
6
- import warnings
7
-
8
- from blueness import module
9
- from blue_options.logger import log_long_text, log_list
10
-
11
- from blue_assistant import NAME
12
- from blue_assistant.logger import logger
13
-
14
- warnings.filterwarnings("ignore", category=XMLParsedAsHTMLWarning)
15
-
16
- NAME = module.name(__file__, NAME)
17
-
18
-
19
- def fetch_links_and_text(
20
- url: str,
21
- verbose: bool = False,
22
- ) -> Tuple[List[str], str]:
23
- try:
24
- response = requests.get(url, timeout=5)
25
- response.raise_for_status()
26
- except requests.RequestException:
27
- return set(), ""
28
-
29
- soup = BeautifulSoup(response.text, "html.parser")
30
- links = set()
31
-
32
- for a_tag in soup.find_all("a", href=True):
33
- a_url = urljoin(url, a_tag["href"])
34
-
35
- if a_url.startswith(url):
36
- logger.info(f"+= {a_url}")
37
- links.add(a_url)
38
- continue
39
-
40
- logger.info(f"ignored: {a_url}")
41
-
42
- plain_text = soup.get_text(separator=" ", strip=True)
43
-
44
- # remove non-ASCII characters
45
- plain_text = re.sub(r"[^\x20-\x7E]+", "", plain_text)
46
- for thing in ["\r", "\n", "\t"]:
47
- plain_text = plain_text.replace(thing, " ")
48
- plain_text = re.sub(r"\s+", " ", plain_text).strip()
49
-
50
- if verbose:
51
- log_list(logger, list(links), "link(s)")
52
- log_long_text(logger, plain_text)
53
-
54
- return links, plain_text
55
-
56
-
57
- def crawl_list_of_urls(
58
- seed_urls: List[str],
59
- object_name: str,
60
- max_iterations: int = 10,
61
- verbose: bool = False,
62
- ) -> Dict[str, str]:
63
- logger.info(
64
- "{}.crawl_list_of_urls({}): {} -> {}".format(
65
- NAME,
66
- len(seed_urls),
67
- ", ".join(seed_urls),
68
- object_name,
69
- )
70
- )
71
-
72
- visited: Dict[str, str] = {}
73
- queue: Set[str] = set(seed_urls)
74
-
75
- iteration: int = 0
76
- while queue:
77
- url = queue.pop()
78
- if url in visited:
79
- continue
80
-
81
- logger.info(f"🔗 {url} ...")
82
- url_links, url_text = fetch_links_and_text(
83
- url=url,
84
- verbose=verbose,
85
- )
86
- visited[url] = url_text
87
- queue.update(url_links - visited.keys())
88
-
89
- iteration += 1
90
- if max_iterations != -1 and iteration >= max_iterations:
91
- logger.warning(f"max iteration of {max_iterations} reached.")
92
- break
93
-
94
- if queue:
95
- logger.warning(f"queue: {len(queue)}")
96
-
97
- return visited