olas-operate-middleware 0.9.0__py3-none-any.whl → 0.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {olas_operate_middleware-0.9.0.dist-info → olas_operate_middleware-0.10.1.dist-info}/METADATA +1 -1
  2. {olas_operate_middleware-0.9.0.dist-info → olas_operate_middleware-0.10.1.dist-info}/RECORD +30 -30
  3. operate/bridge/bridge_manager.py +2 -3
  4. operate/bridge/providers/native_bridge_provider.py +1 -1
  5. operate/bridge/providers/provider.py +2 -3
  6. operate/bridge/providers/relay_provider.py +9 -1
  7. operate/cli.py +123 -43
  8. operate/constants.py +5 -0
  9. operate/keys.py +26 -14
  10. operate/ledger/profiles.py +1 -3
  11. operate/migration.py +288 -21
  12. operate/operate_types.py +9 -6
  13. operate/quickstart/analyse_logs.py +1 -4
  14. operate/quickstart/claim_staking_rewards.py +0 -3
  15. operate/quickstart/reset_configs.py +0 -3
  16. operate/quickstart/reset_password.py +0 -3
  17. operate/quickstart/reset_staking.py +2 -4
  18. operate/quickstart/run_service.py +3 -5
  19. operate/quickstart/stop_service.py +0 -3
  20. operate/quickstart/terminate_on_chain_service.py +0 -3
  21. operate/services/deployment_runner.py +170 -38
  22. operate/services/health_checker.py +3 -2
  23. operate/services/manage.py +90 -123
  24. operate/services/service.py +15 -225
  25. operate/utils/__init__.py +44 -0
  26. operate/utils/gnosis.py +22 -12
  27. operate/wallet/master.py +16 -20
  28. {olas_operate_middleware-0.9.0.dist-info → olas_operate_middleware-0.10.1.dist-info}/LICENSE +0 -0
  29. {olas_operate_middleware-0.9.0.dist-info → olas_operate_middleware-0.10.1.dist-info}/WHEEL +0 -0
  30. {olas_operate_middleware-0.9.0.dist-info → olas_operate_middleware-0.10.1.dist-info}/entry_points.txt +0 -0
@@ -29,15 +29,18 @@ import sys # nosec
29
29
  import time
30
30
  import typing as t
31
31
  from abc import ABC, ABCMeta, abstractmethod
32
+ from contextlib import suppress
33
+ from enum import Enum
32
34
  from io import TextIOWrapper
33
35
  from pathlib import Path
34
36
  from traceback import print_exc
35
- from typing import Any, Dict, List
37
+ from typing import Any, Dict, List, Type
36
38
  from venv import main as venv_cli
37
39
 
38
40
  import psutil
39
41
  import requests
40
42
  from aea.__version__ import __version__ as aea_version
43
+ from aea.helpers.logging import setup_logger
41
44
  from autonomy.__version__ import __version__ as autonomy_version
42
45
 
43
46
  from operate import constants
@@ -99,6 +102,8 @@ class BaseDeploymentRunner(AbstractDeploymentRunner, metaclass=ABCMeta):
99
102
 
100
103
  TM_CONTROL_URL = constants.TM_CONTROL_URL
101
104
  SLEEP_BEFORE_TM_KILL = 2 # seconds
105
+ START_TRIES = constants.DEPLOYMENT_START_TRIES_NUM
106
+ logger = setup_logger(name="operate.base_deployment_runner")
102
107
 
103
108
  def _open_agent_runner_log_file(self) -> TextIOWrapper:
104
109
  """Open agent_runner.log file."""
@@ -109,7 +114,7 @@ class BaseDeploymentRunner(AbstractDeploymentRunner, metaclass=ABCMeta):
109
114
  def _run_aea_command(self, *args: str, cwd: Path) -> Any:
110
115
  """Run aea command."""
111
116
  cmd = " ".join(args)
112
- print("Running aea command: ", cmd, " at ", str(cwd))
117
+ self.logger.info(f"Running aea command: {cmd} at {str(cwd)}")
113
118
  p = multiprocessing.Process(
114
119
  target=self.__class__._call_aea_command, # pylint: disable=protected-access
115
120
  args=(cwd, args),
@@ -134,14 +139,14 @@ class BaseDeploymentRunner(AbstractDeploymentRunner, metaclass=ABCMeta):
134
139
  args, standalone_mode=False
135
140
  )
136
141
  except Exception:
142
+ print(f"Error on calling aea command: {args}")
137
143
  print_exc()
138
144
  raise
139
145
 
140
- @staticmethod
141
- def _run_cmd(args: t.List[str], cwd: t.Optional[Path] = None) -> None:
146
+ def _run_cmd(self, args: t.List[str], cwd: t.Optional[Path] = None) -> None:
142
147
  """Run command in a subprocess."""
143
- print(f"Running: {' '.join(args)}")
144
- print(f"Working dir: {os.getcwd()}")
148
+ self.logger.info(f"Running: {' '.join(args)}")
149
+ self.logger.info(f"Working dir: {os.getcwd()}")
145
150
  result = subprocess.run( # pylint: disable=subprocess-run-check # nosec
146
151
  args=args,
147
152
  cwd=cwd,
@@ -157,15 +162,8 @@ class BaseDeploymentRunner(AbstractDeploymentRunner, metaclass=ABCMeta):
157
162
  """Prepare agent env, add keys, run aea commands."""
158
163
  working_dir = self._work_directory
159
164
  env = json.loads((working_dir / "agent.json").read_text(encoding="utf-8"))
160
- # Patch for trader agent
161
- if "SKILL_TRADER_ABCI_MODELS_PARAMS_ARGS_STORE_PATH" in env:
162
- data_dir = working_dir / "data"
163
- data_dir.mkdir(exist_ok=True)
164
- env["SKILL_TRADER_ABCI_MODELS_PARAMS_ARGS_STORE_PATH"] = str(data_dir)
165
165
 
166
166
  # TODO: Dynamic port allocation, backport to service builder
167
- env["CONNECTION_ABCI_CONFIG_HOST"] = "localhost"
168
- env["CONNECTION_ABCI_CONFIG_PORT"] = "26658"
169
167
  env["PYTHONUTF8"] = "1"
170
168
  for var in env:
171
169
  # Fix tendermint connection params
@@ -178,11 +176,6 @@ class BaseDeploymentRunner(AbstractDeploymentRunner, metaclass=ABCMeta):
178
176
  if var.endswith("MODELS_PARAMS_ARGS_TENDERMINT_P2P_URL"):
179
177
  env[var] = "localhost:26656"
180
178
 
181
- if var.endswith("MODELS_BENCHMARK_TOOL_ARGS_LOG_DIR"):
182
- benchmarks_dir = working_dir / "benchmarks"
183
- benchmarks_dir.mkdir(exist_ok=True, parents=True)
184
- env[var] = str(benchmarks_dir.resolve())
185
-
186
179
  (working_dir / "agent.json").write_text(
187
180
  json.dumps(env, indent=4),
188
181
  encoding="utf-8",
@@ -206,8 +199,22 @@ class BaseDeploymentRunner(AbstractDeploymentRunner, metaclass=ABCMeta):
206
199
  cwd=working_dir,
207
200
  )
208
201
 
202
+ agent_alias_name = "agent"
203
+
204
+ agent_dir_full_path = Path(working_dir) / agent_alias_name
205
+
206
+ if agent_dir_full_path.exists():
207
+ # remove if exists before fetching! can have issues with retry mechanism of multiple start attempts
208
+ with suppress(Exception):
209
+ shutil.rmtree(agent_dir_full_path, ignore_errors=True)
210
+
209
211
  self._run_aea_command(
210
- "-s", "fetch", env["AEA_AGENT"], "--alias", "agent", cwd=working_dir
212
+ "-s",
213
+ "fetch",
214
+ env["AEA_AGENT"],
215
+ "--alias",
216
+ agent_alias_name,
217
+ cwd=working_dir,
211
218
  )
212
219
 
213
220
  # Add keys
@@ -221,6 +228,18 @@ class BaseDeploymentRunner(AbstractDeploymentRunner, metaclass=ABCMeta):
221
228
  self._run_aea_command("-s", "issue-certificates", cwd=working_dir / "agent")
222
229
 
223
230
  def start(self) -> None:
231
+ """Start the deployment with retries."""
232
+ for _ in range(self.START_TRIES):
233
+ try:
234
+ self._start()
235
+ return
236
+ except Exception as e: # pylint: disable=broad-except
237
+ self.logger.exception(f"Error on starting deployment: {e}")
238
+ raise RuntimeError(
239
+ f"Failed to start the deployment after {self.START_TRIES} attempts! Check logs"
240
+ )
241
+
242
+ def _start(self) -> None:
224
243
  """Start the deployment."""
225
244
  self._setup_agent()
226
245
  self._start_tendermint()
@@ -247,9 +266,11 @@ class BaseDeploymentRunner(AbstractDeploymentRunner, metaclass=ABCMeta):
247
266
  requests.get(self._get_tm_exit_url(), timeout=(1, 10))
248
267
  time.sleep(self.SLEEP_BEFORE_TM_KILL)
249
268
  except requests.ConnectionError:
250
- print(f"No Tendermint process listening on {self._get_tm_exit_url()}.")
269
+ self.logger.error(
270
+ f"No Tendermint process listening on {self._get_tm_exit_url()}."
271
+ )
251
272
  except Exception: # pylint: disable=broad-except
252
- print_exc()
273
+ self.logger.exception("Exception on tendermint stop!")
253
274
 
254
275
  pid = self._work_directory / "tendermint.pid"
255
276
  if not pid.exists():
@@ -611,30 +632,141 @@ class HostPythonHostDeploymentRunner(BaseDeploymentRunner):
611
632
  )
612
633
 
613
634
 
614
- def _get_host_deployment_runner(build_dir: Path) -> BaseDeploymentRunner:
615
- """Return depoyment runner according to running env."""
616
- deployment_runner: BaseDeploymentRunner
635
+ class States(Enum):
636
+ """Service deployment states."""
617
637
 
618
- if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"):
619
- # pyinstaller inside!
620
- if platform.system() == "Darwin":
621
- deployment_runner = PyInstallerHostDeploymentRunnerMac(build_dir)
622
- elif platform.system() == "Windows":
623
- deployment_runner = PyInstallerHostDeploymentRunnerWindows(build_dir)
624
- else:
638
+ NONE = 0
639
+ STARTING = 1
640
+ STARTED = 2
641
+ STOPPING = 3
642
+ STOPPED = 4
643
+ ERROR = 5
644
+
645
+
646
+ class DeploymentManager:
647
+ """Deployment manager to run and stop deployments."""
648
+
649
+ def __init__(self) -> None:
650
+ """Init the deployment manager."""
651
+ self._deployment_runner_class = self._get_host_deployment_runner_class()
652
+ self._is_stopping = False
653
+ self.logger = setup_logger(name="operate.deployment_manager")
654
+ self._states: Dict[Path, States] = {}
655
+
656
+ def _get_deployment_runner(self, build_dir: Path) -> BaseDeploymentRunner:
657
+ """Get deploymnent runner instance."""
658
+ return self._deployment_runner_class(build_dir)
659
+
660
+ @staticmethod
661
+ def _get_host_deployment_runner_class() -> Type[BaseDeploymentRunner]:
662
+ """Return depoyment runner class according to running env."""
663
+
664
+ if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"):
665
+ # pyinstaller inside!
666
+ if platform.system() == "Darwin":
667
+ return PyInstallerHostDeploymentRunnerMac
668
+ if platform.system() == "Windows":
669
+ return PyInstallerHostDeploymentRunnerWindows
625
670
  raise ValueError(f"Platform not supported {platform.system()}")
626
- else:
627
- deployment_runner = HostPythonHostDeploymentRunner(build_dir)
628
- return deployment_runner
671
+
672
+ return HostPythonHostDeploymentRunner
673
+
674
+ def stop(self) -> None:
675
+ """Stop deploment manager."""
676
+ self.logger.info("Stop deployment manager")
677
+ self._is_stopping = True
678
+
679
+ def get_state(self, build_dir: Path) -> States:
680
+ """Get state of the deployment."""
681
+ return self._states.get(build_dir) or States.NONE
682
+
683
+ def check_ipfs_connection_works(self) -> None:
684
+ """Check ipfs works and there is a good net connection."""
685
+ self.logger.info("Doing network connection check by test call to ipfs server.")
686
+ for i in range(3):
687
+ try:
688
+ requests.get(constants.IPFS_CHECK_URL, timeout=60)
689
+ return
690
+ except OSError:
691
+ self.logger.exception(
692
+ "failed to connect to ipfs to test connection. OSError, critical!"
693
+ )
694
+ raise
695
+ except Exception: # pylint: disable=broad-except
696
+ self.logger.exception(
697
+ "failed to connect to ipfs to test connection. do another try"
698
+ )
699
+ time.sleep(i * 5)
700
+ self.logger.error(
701
+ "failed to connect to ipfs to test connection. no attempts left. raise error"
702
+ )
703
+ raise RuntimeError(
704
+ "Failed to perform test connection to ipfs to check network connection!"
705
+ )
706
+
707
+ def run_deployment(self, build_dir: Path) -> None:
708
+ """Run deployment."""
709
+ if self._is_stopping:
710
+ raise RuntimeError("deployment manager stopped")
711
+ if self.get_state(build_dir=build_dir) in [States.STARTING, States.STOPPING]:
712
+ raise ValueError("Service already in transition")
713
+
714
+ # doing pre check for ipfs works fine, also network connection is ok.
715
+ self.check_ipfs_connection_works()
716
+
717
+ self.logger.info(f"Starting deployment {build_dir}...")
718
+ self._states[build_dir] = States.STARTING
719
+ try:
720
+ deployment_runner = self._get_deployment_runner(build_dir=build_dir)
721
+ deployment_runner.start()
722
+ self.logger.info(f"Started deployment {build_dir}")
723
+ self._states[build_dir] = States.STARTED
724
+ except Exception: # pylint: disable=broad-except
725
+ self.logger.exception(
726
+ f"Starting deployment failed {build_dir}. so try to stop"
727
+ )
728
+ self._states[build_dir] = States.ERROR
729
+ self.stop_deployemnt(build_dir=build_dir, force=True)
730
+
731
+ if self._is_stopping:
732
+ self.logger.warning(
733
+ f"Deployment at {build_dir} started when it was going to stop, so stop it"
734
+ )
735
+ self.stop_deployemnt(build_dir=build_dir, force=True)
736
+
737
+ def stop_deployemnt(self, build_dir: Path, force: bool = False) -> None:
738
+ """Stop the deployment."""
739
+ if (
740
+ self.get_state(build_dir=build_dir) in [States.STARTING, States.STOPPING]
741
+ and not force
742
+ ):
743
+ raise ValueError("Service already in transition")
744
+ self.logger.info(f"Stopping deployment {build_dir}...")
745
+ self._states[build_dir] = States.STOPPING
746
+ deployment_runner = self._get_deployment_runner(build_dir=build_dir)
747
+ try:
748
+ deployment_runner.stop()
749
+ self.logger.info(f"Stopped deployment {build_dir}...")
750
+ self._states[build_dir] = States.STOPPED
751
+ except Exception:
752
+ self.logger.exception(f"Stopping deployment failed {build_dir}...")
753
+ self._states[build_dir] = States.ERROR
754
+ raise
755
+
756
+
757
+ deployment_manager = DeploymentManager()
629
758
 
630
759
 
631
760
  def run_host_deployment(build_dir: Path) -> None:
632
761
  """Run host deployment."""
633
- deployment_runner = _get_host_deployment_runner(build_dir=build_dir)
634
- deployment_runner.start()
762
+ deployment_manager.run_deployment(build_dir=build_dir)
635
763
 
636
764
 
637
765
  def stop_host_deployment(build_dir: Path) -> None:
638
766
  """Stop host deployment."""
639
- deployment_runner = _get_host_deployment_runner(build_dir=build_dir)
640
- deployment_runner.stop()
767
+ deployment_manager.stop_deployemnt(build_dir=build_dir)
768
+
769
+
770
+ def stop_deployment_manager() -> None:
771
+ """Stop deployment manager."""
772
+ deployment_manager.stop()
@@ -20,6 +20,7 @@
20
20
  """Source code for checking aea is alive.."""
21
21
  import asyncio
22
22
  import json
23
+ import logging
23
24
  import typing as t
24
25
  from concurrent.futures import ThreadPoolExecutor
25
26
  from http import HTTPStatus
@@ -27,7 +28,6 @@ from pathlib import Path
27
28
  from traceback import print_exc
28
29
 
29
30
  import aiohttp # type: ignore
30
- from aea.helpers.logging import setup_logger
31
31
 
32
32
  from operate.constants import HEALTH_CHECK_URL
33
33
  from operate.services.manage import ServiceManager # type: ignore
@@ -44,14 +44,15 @@ class HealthChecker:
44
44
  def __init__(
45
45
  self,
46
46
  service_manager: ServiceManager,
47
+ logger: logging.Logger,
47
48
  port_up_timeout: int | None = None,
48
49
  sleep_period: int | None = None,
49
50
  number_of_fails: int | None = None,
50
51
  ) -> None:
51
52
  """Init the healtch checker."""
52
53
  self._jobs: t.Dict[str, asyncio.Task] = {}
53
- self.logger = setup_logger(name="operate.health_checker")
54
54
  self._service_manager = service_manager
55
+ self.logger = logger
55
56
  self.port_up_timeout = port_up_timeout or self.PORT_UP_TIMEOUT_DEFAULT
56
57
  self.sleep_period = sleep_period or self.SLEEP_PERIOD_DEFAULT
57
58
  self.number_of_fails = number_of_fails or self.NUMBER_OF_FAILS_DEFAULT