olas-operate-middleware 0.8.2__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {olas_operate_middleware-0.8.2.dist-info → olas_operate_middleware-0.10.0.dist-info}/METADATA +2 -2
- {olas_operate_middleware-0.8.2.dist-info → olas_operate_middleware-0.10.0.dist-info}/RECORD +34 -34
- operate/bridge/bridge_manager.py +5 -6
- operate/bridge/providers/native_bridge_provider.py +1 -1
- operate/bridge/providers/provider.py +4 -5
- operate/bridge/providers/relay_provider.py +1 -1
- operate/cli.py +128 -48
- operate/constants.py +9 -9
- operate/keys.py +26 -14
- operate/ledger/__init__.py +4 -4
- operate/ledger/profiles.py +9 -11
- operate/migration.py +326 -0
- operate/operate_types.py +9 -27
- operate/quickstart/analyse_logs.py +3 -6
- operate/quickstart/claim_staking_rewards.py +1 -4
- operate/quickstart/reset_configs.py +0 -3
- operate/quickstart/reset_password.py +0 -3
- operate/quickstart/reset_staking.py +3 -5
- operate/quickstart/run_service.py +5 -7
- operate/quickstart/stop_service.py +3 -4
- operate/quickstart/terminate_on_chain_service.py +1 -4
- operate/quickstart/utils.py +4 -7
- operate/resource.py +37 -5
- operate/services/deployment_runner.py +170 -38
- operate/services/health_checker.py +5 -8
- operate/services/manage.py +103 -164
- operate/services/protocol.py +5 -5
- operate/services/service.py +42 -242
- operate/utils/__init__.py +44 -0
- operate/utils/gnosis.py +25 -17
- operate/wallet/master.py +20 -24
- {olas_operate_middleware-0.8.2.dist-info → olas_operate_middleware-0.10.0.dist-info}/LICENSE +0 -0
- {olas_operate_middleware-0.8.2.dist-info → olas_operate_middleware-0.10.0.dist-info}/WHEEL +0 -0
- {olas_operate_middleware-0.8.2.dist-info → olas_operate_middleware-0.10.0.dist-info}/entry_points.txt +0 -0
|
@@ -34,10 +34,10 @@ from halo import Halo # type: ignore[import]
|
|
|
34
34
|
from web3.exceptions import Web3Exception
|
|
35
35
|
|
|
36
36
|
from operate.account.user import UserAccount
|
|
37
|
-
from operate.constants import IPFS_ADDRESS, OPERATE_HOME
|
|
37
|
+
from operate.constants import IPFS_ADDRESS, NO_STAKING_PROGRAM_ID, OPERATE_HOME
|
|
38
38
|
from operate.data import DATA_DIR
|
|
39
39
|
from operate.data.contracts.staking_token.contract import StakingTokenContract
|
|
40
|
-
from operate.ledger.profiles import
|
|
40
|
+
from operate.ledger.profiles import STAKING, get_staking_contract
|
|
41
41
|
from operate.operate_types import (
|
|
42
42
|
Chain,
|
|
43
43
|
LedgerType,
|
|
@@ -100,7 +100,7 @@ QS_STAKING_PROGRAMS: t.Dict[Chain, t.Dict[str, str]] = {
|
|
|
100
100
|
"mech_marketplace": "mech",
|
|
101
101
|
"marketplace_supply_alpha": "mech",
|
|
102
102
|
},
|
|
103
|
-
Chain.
|
|
103
|
+
Chain.OPTIMISM: {
|
|
104
104
|
"optimus_alpha_2": "optimus",
|
|
105
105
|
"optimus_alpha_3": "optimus",
|
|
106
106
|
"optimus_alpha_4": "optimus",
|
|
@@ -363,14 +363,12 @@ def configure_local_config(
|
|
|
363
363
|
template["configurations"][chain] |= {
|
|
364
364
|
"staking_program_id": config.staking_program_id,
|
|
365
365
|
"rpc": config.rpc[chain],
|
|
366
|
-
"use_staking": config.staking_program_id != NO_STAKING_PROGRAM_ID,
|
|
367
366
|
"cost_of_bond": min_staking_deposit,
|
|
368
367
|
}
|
|
369
368
|
else:
|
|
370
369
|
template["configurations"][chain] |= {
|
|
371
370
|
"staking_program_id": NO_STAKING_PROGRAM_ID,
|
|
372
371
|
"rpc": config.rpc[chain],
|
|
373
|
-
"use_staking": False,
|
|
374
372
|
"cost_of_bond": 1,
|
|
375
373
|
}
|
|
376
374
|
|
|
@@ -414,7 +412,7 @@ def configure_local_config(
|
|
|
414
412
|
|
|
415
413
|
if env_var_name not in config.user_provided_args:
|
|
416
414
|
print(f"Description: {env_var_data['description']}")
|
|
417
|
-
if env_var_data["value"]:
|
|
415
|
+
if env_var_data["value"] is not None and env_var_data["value"] != "":
|
|
418
416
|
print(f"Default: {env_var_data['value']}")
|
|
419
417
|
|
|
420
418
|
user_provided_arg = ask_or_get_from_env(
|
|
@@ -569,7 +567,7 @@ def _ask_funds_from_requirements(
|
|
|
569
567
|
chain_config.chain_data.multisig: "Service Safe"
|
|
570
568
|
for chain_config in service.chain_configs.values()
|
|
571
569
|
}
|
|
572
|
-
| {
|
|
570
|
+
| {address: "Agent EOA" for address in service.agent_addresses}
|
|
573
571
|
)
|
|
574
572
|
|
|
575
573
|
if not requirements["is_refill_required"] and requirements["allow_start_agent"]:
|
|
@@ -23,6 +23,7 @@ import warnings
|
|
|
23
23
|
from typing import TYPE_CHECKING, cast
|
|
24
24
|
|
|
25
25
|
from operate.quickstart.run_service import (
|
|
26
|
+
ask_password_if_needed,
|
|
26
27
|
configure_local_config,
|
|
27
28
|
get_service,
|
|
28
29
|
load_local_config,
|
|
@@ -44,9 +45,6 @@ def stop_service(operate: "OperateApp", config_path: str) -> None:
|
|
|
44
45
|
|
|
45
46
|
print_title(f"Stop {template['name']} Quickstart")
|
|
46
47
|
|
|
47
|
-
operate.service_manager().migrate_service_configs()
|
|
48
|
-
operate.wallet_manager.migrate_wallet_configs()
|
|
49
|
-
|
|
50
48
|
# check if agent was started before
|
|
51
49
|
config = load_local_config(
|
|
52
50
|
operate=operate, service_name=cast(str, template["name"])
|
|
@@ -55,11 +53,12 @@ def stop_service(operate: "OperateApp", config_path: str) -> None:
|
|
|
55
53
|
print("No previous agent setup found. Exiting.")
|
|
56
54
|
return
|
|
57
55
|
|
|
56
|
+
ask_password_if_needed(operate)
|
|
58
57
|
configure_local_config(template, operate)
|
|
59
58
|
manager = operate.service_manager()
|
|
60
59
|
service = get_service(manager, template)
|
|
61
60
|
manager.stop_service_locally(
|
|
62
|
-
service_config_id=service.service_config_id,
|
|
61
|
+
service_config_id=service.service_config_id, use_docker=True
|
|
63
62
|
)
|
|
64
63
|
|
|
65
64
|
print()
|
|
@@ -43,9 +43,6 @@ def terminate_service(operate: "OperateApp", config_path: str) -> None:
|
|
|
43
43
|
|
|
44
44
|
print_title(f"Terminate {template['name']} on-chain service")
|
|
45
45
|
|
|
46
|
-
operate.service_manager().migrate_service_configs()
|
|
47
|
-
operate.wallet_manager.migrate_wallet_configs()
|
|
48
|
-
|
|
49
46
|
# check if agent was started before
|
|
50
47
|
config = load_local_config(
|
|
51
48
|
operate=operate, service_name=cast(str, template["name"])
|
|
@@ -61,8 +58,8 @@ def terminate_service(operate: "OperateApp", config_path: str) -> None:
|
|
|
61
58
|
print("Cancelled.")
|
|
62
59
|
return
|
|
63
60
|
|
|
64
|
-
config = configure_local_config(template, operate)
|
|
65
61
|
ask_password_if_needed(operate)
|
|
62
|
+
config = configure_local_config(template, operate)
|
|
66
63
|
manager = operate.service_manager()
|
|
67
64
|
service = get_service(manager, template)
|
|
68
65
|
ensure_enough_funds(operate, service)
|
operate/quickstart/utils.py
CHANGED
|
@@ -35,9 +35,6 @@ from operate.operate_types import Chain
|
|
|
35
35
|
from operate.resource import LocalResource, deserialize
|
|
36
36
|
|
|
37
37
|
|
|
38
|
-
MAX_QUICKSTART_VERSION = 1
|
|
39
|
-
|
|
40
|
-
|
|
41
38
|
def print_box(text: str, margin: int = 1, character: str = "=") -> None:
|
|
42
39
|
"""Print text centered within a box."""
|
|
43
40
|
|
|
@@ -119,20 +116,20 @@ CHAIN_TO_METADATA = {
|
|
|
119
116
|
"MAX_FEE_PER_GAS": "",
|
|
120
117
|
},
|
|
121
118
|
},
|
|
122
|
-
"
|
|
119
|
+
"optimism": {
|
|
123
120
|
"name": "Optimism",
|
|
124
121
|
"gasFundReq": unit_to_wei(0.005), # fund for master EOA
|
|
125
|
-
"staking_bonding_token": OLAS[Chain.
|
|
122
|
+
"staking_bonding_token": OLAS[Chain.OPTIMISM],
|
|
126
123
|
"token_data": {
|
|
127
124
|
ZERO_ADDRESS: {
|
|
128
125
|
"symbol": "ETH",
|
|
129
126
|
"decimals": 18,
|
|
130
127
|
},
|
|
131
|
-
USDC[Chain.
|
|
128
|
+
USDC[Chain.OPTIMISM]: {
|
|
132
129
|
"symbol": "USDC",
|
|
133
130
|
"decimals": 6,
|
|
134
131
|
},
|
|
135
|
-
OLAS[Chain.
|
|
132
|
+
OLAS[Chain.OPTIMISM]: {
|
|
136
133
|
"symbol": "OLAS",
|
|
137
134
|
"decimals": 18,
|
|
138
135
|
},
|
operate/resource.py
CHANGED
|
@@ -22,7 +22,9 @@
|
|
|
22
22
|
import enum
|
|
23
23
|
import json
|
|
24
24
|
import os
|
|
25
|
+
import platform
|
|
25
26
|
import shutil
|
|
27
|
+
import time
|
|
26
28
|
import types
|
|
27
29
|
import typing as t
|
|
28
30
|
from dataclasses import asdict, is_dataclass
|
|
@@ -92,6 +94,23 @@ def deserialize(obj: t.Any, otype: t.Any) -> t.Any:
|
|
|
92
94
|
return obj
|
|
93
95
|
|
|
94
96
|
|
|
97
|
+
def _safe_file_operation(operation: t.Callable, *args: t.Any, **kwargs: t.Any) -> None:
|
|
98
|
+
"""Safely perform file operation with retries on Windows."""
|
|
99
|
+
max_retries = 3 if platform.system() == "Windows" else 1
|
|
100
|
+
|
|
101
|
+
for attempt in range(max_retries):
|
|
102
|
+
try:
|
|
103
|
+
operation(*args, **kwargs)
|
|
104
|
+
return
|
|
105
|
+
except (PermissionError, FileNotFoundError, OSError) as e:
|
|
106
|
+
if attempt == max_retries - 1:
|
|
107
|
+
raise e
|
|
108
|
+
|
|
109
|
+
if platform.system() == "Windows":
|
|
110
|
+
# On Windows, wait a bit and retry
|
|
111
|
+
time.sleep(0.1)
|
|
112
|
+
|
|
113
|
+
|
|
95
114
|
class LocalResource:
|
|
96
115
|
"""Initialize local resource."""
|
|
97
116
|
|
|
@@ -144,9 +163,14 @@ class LocalResource:
|
|
|
144
163
|
bak0 = path.with_name(f"{path.name}.0.bak")
|
|
145
164
|
|
|
146
165
|
if path.exists() and not bak0.exists():
|
|
147
|
-
shutil.copy2
|
|
166
|
+
_safe_file_operation(shutil.copy2, path, bak0)
|
|
148
167
|
|
|
149
168
|
tmp_path = path.parent / f".{path.name}.tmp"
|
|
169
|
+
|
|
170
|
+
# Clean up any existing tmp file
|
|
171
|
+
if tmp_path.exists():
|
|
172
|
+
_safe_file_operation(tmp_path.unlink)
|
|
173
|
+
|
|
150
174
|
tmp_path.write_text(
|
|
151
175
|
json.dumps(
|
|
152
176
|
self.json,
|
|
@@ -155,15 +179,23 @@ class LocalResource:
|
|
|
155
179
|
encoding="utf-8",
|
|
156
180
|
)
|
|
157
181
|
|
|
158
|
-
|
|
182
|
+
# Atomic replace to avoid corruption
|
|
183
|
+
try:
|
|
184
|
+
_safe_file_operation(os.replace, tmp_path, path)
|
|
185
|
+
except (PermissionError, FileNotFoundError):
|
|
186
|
+
# On Windows, if the replace fails, clean up and skip
|
|
187
|
+
if platform.system() == "Windows":
|
|
188
|
+
_safe_file_operation(tmp_path.unlink)
|
|
189
|
+
|
|
159
190
|
self.load(self.path) # Validate before making backup
|
|
160
191
|
|
|
192
|
+
# Rotate backup files
|
|
161
193
|
for i in reversed(range(N_BACKUPS - 1)):
|
|
162
194
|
newer = path.with_name(f"{path.name}.{i}.bak")
|
|
163
195
|
older = path.with_name(f"{path.name}.{i + 1}.bak")
|
|
164
196
|
if newer.exists():
|
|
165
197
|
if older.exists():
|
|
166
|
-
older.unlink
|
|
167
|
-
newer.rename
|
|
198
|
+
_safe_file_operation(older.unlink)
|
|
199
|
+
_safe_file_operation(newer.rename, older)
|
|
168
200
|
|
|
169
|
-
shutil.copy2
|
|
201
|
+
_safe_file_operation(shutil.copy2, path, bak0)
|
|
@@ -29,15 +29,18 @@ import sys # nosec
|
|
|
29
29
|
import time
|
|
30
30
|
import typing as t
|
|
31
31
|
from abc import ABC, ABCMeta, abstractmethod
|
|
32
|
+
from contextlib import suppress
|
|
33
|
+
from enum import Enum
|
|
32
34
|
from io import TextIOWrapper
|
|
33
35
|
from pathlib import Path
|
|
34
36
|
from traceback import print_exc
|
|
35
|
-
from typing import Any, Dict, List
|
|
37
|
+
from typing import Any, Dict, List, Type
|
|
36
38
|
from venv import main as venv_cli
|
|
37
39
|
|
|
38
40
|
import psutil
|
|
39
41
|
import requests
|
|
40
42
|
from aea.__version__ import __version__ as aea_version
|
|
43
|
+
from aea.helpers.logging import setup_logger
|
|
41
44
|
from autonomy.__version__ import __version__ as autonomy_version
|
|
42
45
|
|
|
43
46
|
from operate import constants
|
|
@@ -99,6 +102,8 @@ class BaseDeploymentRunner(AbstractDeploymentRunner, metaclass=ABCMeta):
|
|
|
99
102
|
|
|
100
103
|
TM_CONTROL_URL = constants.TM_CONTROL_URL
|
|
101
104
|
SLEEP_BEFORE_TM_KILL = 2 # seconds
|
|
105
|
+
START_TRIES = constants.DEPLOYMENT_START_TRIES_NUM
|
|
106
|
+
logger = setup_logger(name="operate.base_deployment_runner")
|
|
102
107
|
|
|
103
108
|
def _open_agent_runner_log_file(self) -> TextIOWrapper:
|
|
104
109
|
"""Open agent_runner.log file."""
|
|
@@ -109,7 +114,7 @@ class BaseDeploymentRunner(AbstractDeploymentRunner, metaclass=ABCMeta):
|
|
|
109
114
|
def _run_aea_command(self, *args: str, cwd: Path) -> Any:
|
|
110
115
|
"""Run aea command."""
|
|
111
116
|
cmd = " ".join(args)
|
|
112
|
-
|
|
117
|
+
self.logger.info(f"Running aea command: {cmd} at {str(cwd)}")
|
|
113
118
|
p = multiprocessing.Process(
|
|
114
119
|
target=self.__class__._call_aea_command, # pylint: disable=protected-access
|
|
115
120
|
args=(cwd, args),
|
|
@@ -134,14 +139,14 @@ class BaseDeploymentRunner(AbstractDeploymentRunner, metaclass=ABCMeta):
|
|
|
134
139
|
args, standalone_mode=False
|
|
135
140
|
)
|
|
136
141
|
except Exception:
|
|
142
|
+
print(f"Error on calling aea command: {args}")
|
|
137
143
|
print_exc()
|
|
138
144
|
raise
|
|
139
145
|
|
|
140
|
-
|
|
141
|
-
def _run_cmd(args: t.List[str], cwd: t.Optional[Path] = None) -> None:
|
|
146
|
+
def _run_cmd(self, args: t.List[str], cwd: t.Optional[Path] = None) -> None:
|
|
142
147
|
"""Run command in a subprocess."""
|
|
143
|
-
|
|
144
|
-
|
|
148
|
+
self.logger.info(f"Running: {' '.join(args)}")
|
|
149
|
+
self.logger.info(f"Working dir: {os.getcwd()}")
|
|
145
150
|
result = subprocess.run( # pylint: disable=subprocess-run-check # nosec
|
|
146
151
|
args=args,
|
|
147
152
|
cwd=cwd,
|
|
@@ -157,15 +162,8 @@ class BaseDeploymentRunner(AbstractDeploymentRunner, metaclass=ABCMeta):
|
|
|
157
162
|
"""Prepare agent env, add keys, run aea commands."""
|
|
158
163
|
working_dir = self._work_directory
|
|
159
164
|
env = json.loads((working_dir / "agent.json").read_text(encoding="utf-8"))
|
|
160
|
-
# Patch for trader agent
|
|
161
|
-
if "SKILL_TRADER_ABCI_MODELS_PARAMS_ARGS_STORE_PATH" in env:
|
|
162
|
-
data_dir = working_dir / "data"
|
|
163
|
-
data_dir.mkdir(exist_ok=True)
|
|
164
|
-
env["SKILL_TRADER_ABCI_MODELS_PARAMS_ARGS_STORE_PATH"] = str(data_dir)
|
|
165
165
|
|
|
166
166
|
# TODO: Dynamic port allocation, backport to service builder
|
|
167
|
-
env["CONNECTION_ABCI_CONFIG_HOST"] = "localhost"
|
|
168
|
-
env["CONNECTION_ABCI_CONFIG_PORT"] = "26658"
|
|
169
167
|
env["PYTHONUTF8"] = "1"
|
|
170
168
|
for var in env:
|
|
171
169
|
# Fix tendermint connection params
|
|
@@ -178,11 +176,6 @@ class BaseDeploymentRunner(AbstractDeploymentRunner, metaclass=ABCMeta):
|
|
|
178
176
|
if var.endswith("MODELS_PARAMS_ARGS_TENDERMINT_P2P_URL"):
|
|
179
177
|
env[var] = "localhost:26656"
|
|
180
178
|
|
|
181
|
-
if var.endswith("MODELS_BENCHMARK_TOOL_ARGS_LOG_DIR"):
|
|
182
|
-
benchmarks_dir = working_dir / "benchmarks"
|
|
183
|
-
benchmarks_dir.mkdir(exist_ok=True, parents=True)
|
|
184
|
-
env[var] = str(benchmarks_dir.resolve())
|
|
185
|
-
|
|
186
179
|
(working_dir / "agent.json").write_text(
|
|
187
180
|
json.dumps(env, indent=4),
|
|
188
181
|
encoding="utf-8",
|
|
@@ -206,8 +199,22 @@ class BaseDeploymentRunner(AbstractDeploymentRunner, metaclass=ABCMeta):
|
|
|
206
199
|
cwd=working_dir,
|
|
207
200
|
)
|
|
208
201
|
|
|
202
|
+
agent_alias_name = "agent"
|
|
203
|
+
|
|
204
|
+
agent_dir_full_path = Path(working_dir) / agent_alias_name
|
|
205
|
+
|
|
206
|
+
if agent_dir_full_path.exists():
|
|
207
|
+
# remove if exists before fetching! can have issues with retry mechanism of multiple start attempts
|
|
208
|
+
with suppress(Exception):
|
|
209
|
+
shutil.rmtree(agent_dir_full_path, ignore_errors=True)
|
|
210
|
+
|
|
209
211
|
self._run_aea_command(
|
|
210
|
-
"-s",
|
|
212
|
+
"-s",
|
|
213
|
+
"fetch",
|
|
214
|
+
env["AEA_AGENT"],
|
|
215
|
+
"--alias",
|
|
216
|
+
agent_alias_name,
|
|
217
|
+
cwd=working_dir,
|
|
211
218
|
)
|
|
212
219
|
|
|
213
220
|
# Add keys
|
|
@@ -221,6 +228,18 @@ class BaseDeploymentRunner(AbstractDeploymentRunner, metaclass=ABCMeta):
|
|
|
221
228
|
self._run_aea_command("-s", "issue-certificates", cwd=working_dir / "agent")
|
|
222
229
|
|
|
223
230
|
def start(self) -> None:
|
|
231
|
+
"""Start the deployment with retries."""
|
|
232
|
+
for _ in range(self.START_TRIES):
|
|
233
|
+
try:
|
|
234
|
+
self._start()
|
|
235
|
+
return
|
|
236
|
+
except Exception as e: # pylint: disable=broad-except
|
|
237
|
+
self.logger.exception(f"Error on starting deployment: {e}")
|
|
238
|
+
raise RuntimeError(
|
|
239
|
+
f"Failed to start the deployment after {self.START_TRIES} attempts! Check logs"
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
def _start(self) -> None:
|
|
224
243
|
"""Start the deployment."""
|
|
225
244
|
self._setup_agent()
|
|
226
245
|
self._start_tendermint()
|
|
@@ -247,9 +266,11 @@ class BaseDeploymentRunner(AbstractDeploymentRunner, metaclass=ABCMeta):
|
|
|
247
266
|
requests.get(self._get_tm_exit_url(), timeout=(1, 10))
|
|
248
267
|
time.sleep(self.SLEEP_BEFORE_TM_KILL)
|
|
249
268
|
except requests.ConnectionError:
|
|
250
|
-
|
|
269
|
+
self.logger.error(
|
|
270
|
+
f"No Tendermint process listening on {self._get_tm_exit_url()}."
|
|
271
|
+
)
|
|
251
272
|
except Exception: # pylint: disable=broad-except
|
|
252
|
-
|
|
273
|
+
self.logger.exception("Exception on tendermint stop!")
|
|
253
274
|
|
|
254
275
|
pid = self._work_directory / "tendermint.pid"
|
|
255
276
|
if not pid.exists():
|
|
@@ -611,30 +632,141 @@ class HostPythonHostDeploymentRunner(BaseDeploymentRunner):
|
|
|
611
632
|
)
|
|
612
633
|
|
|
613
634
|
|
|
614
|
-
|
|
615
|
-
"""
|
|
616
|
-
deployment_runner: BaseDeploymentRunner
|
|
635
|
+
class States(Enum):
|
|
636
|
+
"""Service deployment states."""
|
|
617
637
|
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
638
|
+
NONE = 0
|
|
639
|
+
STARTING = 1
|
|
640
|
+
STARTED = 2
|
|
641
|
+
STOPPING = 3
|
|
642
|
+
STOPPED = 4
|
|
643
|
+
ERROR = 5
|
|
644
|
+
|
|
645
|
+
|
|
646
|
+
class DeploymentManager:
|
|
647
|
+
"""Deployment manager to run and stop deployments."""
|
|
648
|
+
|
|
649
|
+
def __init__(self) -> None:
|
|
650
|
+
"""Init the deployment manager."""
|
|
651
|
+
self._deployment_runner_class = self._get_host_deployment_runner_class()
|
|
652
|
+
self._is_stopping = False
|
|
653
|
+
self.logger = setup_logger(name="operate.deployment_manager")
|
|
654
|
+
self._states: Dict[Path, States] = {}
|
|
655
|
+
|
|
656
|
+
def _get_deployment_runner(self, build_dir: Path) -> BaseDeploymentRunner:
|
|
657
|
+
"""Get deploymnent runner instance."""
|
|
658
|
+
return self._deployment_runner_class(build_dir)
|
|
659
|
+
|
|
660
|
+
@staticmethod
|
|
661
|
+
def _get_host_deployment_runner_class() -> Type[BaseDeploymentRunner]:
|
|
662
|
+
"""Return depoyment runner class according to running env."""
|
|
663
|
+
|
|
664
|
+
if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"):
|
|
665
|
+
# pyinstaller inside!
|
|
666
|
+
if platform.system() == "Darwin":
|
|
667
|
+
return PyInstallerHostDeploymentRunnerMac
|
|
668
|
+
if platform.system() == "Windows":
|
|
669
|
+
return PyInstallerHostDeploymentRunnerWindows
|
|
625
670
|
raise ValueError(f"Platform not supported {platform.system()}")
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
671
|
+
|
|
672
|
+
return HostPythonHostDeploymentRunner
|
|
673
|
+
|
|
674
|
+
def stop(self) -> None:
|
|
675
|
+
"""Stop deploment manager."""
|
|
676
|
+
self.logger.info("Stop deployment manager")
|
|
677
|
+
self._is_stopping = True
|
|
678
|
+
|
|
679
|
+
def get_state(self, build_dir: Path) -> States:
|
|
680
|
+
"""Get state of the deployment."""
|
|
681
|
+
return self._states.get(build_dir) or States.NONE
|
|
682
|
+
|
|
683
|
+
def check_ipfs_connection_works(self) -> None:
|
|
684
|
+
"""Check ipfs works and there is a good net connection."""
|
|
685
|
+
self.logger.info("Doing network connection check by test call to ipfs server.")
|
|
686
|
+
for i in range(3):
|
|
687
|
+
try:
|
|
688
|
+
requests.get(constants.IPFS_CHECK_URL, timeout=60)
|
|
689
|
+
return
|
|
690
|
+
except OSError:
|
|
691
|
+
self.logger.exception(
|
|
692
|
+
"failed to connect to ipfs to test connection. OSError, critical!"
|
|
693
|
+
)
|
|
694
|
+
raise
|
|
695
|
+
except Exception: # pylint: disable=broad-except
|
|
696
|
+
self.logger.exception(
|
|
697
|
+
"failed to connect to ipfs to test connection. do another try"
|
|
698
|
+
)
|
|
699
|
+
time.sleep(i * 5)
|
|
700
|
+
self.logger.error(
|
|
701
|
+
"failed to connect to ipfs to test connection. no attempts left. raise error"
|
|
702
|
+
)
|
|
703
|
+
raise RuntimeError(
|
|
704
|
+
"Failed to perform test connection to ipfs to check network connection!"
|
|
705
|
+
)
|
|
706
|
+
|
|
707
|
+
def run_deployment(self, build_dir: Path) -> None:
|
|
708
|
+
"""Run deployment."""
|
|
709
|
+
if self._is_stopping:
|
|
710
|
+
raise RuntimeError("deployment manager stopped")
|
|
711
|
+
if self.get_state(build_dir=build_dir) in [States.STARTING, States.STOPPING]:
|
|
712
|
+
raise ValueError("Service already in transition")
|
|
713
|
+
|
|
714
|
+
# doing pre check for ipfs works fine, also network connection is ok.
|
|
715
|
+
self.check_ipfs_connection_works()
|
|
716
|
+
|
|
717
|
+
self.logger.info(f"Starting deployment {build_dir}...")
|
|
718
|
+
self._states[build_dir] = States.STARTING
|
|
719
|
+
try:
|
|
720
|
+
deployment_runner = self._get_deployment_runner(build_dir=build_dir)
|
|
721
|
+
deployment_runner.start()
|
|
722
|
+
self.logger.info(f"Started deployment {build_dir}")
|
|
723
|
+
self._states[build_dir] = States.STARTED
|
|
724
|
+
except Exception: # pylint: disable=broad-except
|
|
725
|
+
self.logger.exception(
|
|
726
|
+
f"Starting deployment failed {build_dir}. so try to stop"
|
|
727
|
+
)
|
|
728
|
+
self._states[build_dir] = States.ERROR
|
|
729
|
+
self.stop_deployemnt(build_dir=build_dir, force=True)
|
|
730
|
+
|
|
731
|
+
if self._is_stopping:
|
|
732
|
+
self.logger.warning(
|
|
733
|
+
f"Deployment at {build_dir} started when it was going to stop, so stop it"
|
|
734
|
+
)
|
|
735
|
+
self.stop_deployemnt(build_dir=build_dir, force=True)
|
|
736
|
+
|
|
737
|
+
def stop_deployemnt(self, build_dir: Path, force: bool = False) -> None:
|
|
738
|
+
"""Stop the deployment."""
|
|
739
|
+
if (
|
|
740
|
+
self.get_state(build_dir=build_dir) in [States.STARTING, States.STOPPING]
|
|
741
|
+
and not force
|
|
742
|
+
):
|
|
743
|
+
raise ValueError("Service already in transition")
|
|
744
|
+
self.logger.info(f"Stopping deployment {build_dir}...")
|
|
745
|
+
self._states[build_dir] = States.STOPPING
|
|
746
|
+
deployment_runner = self._get_deployment_runner(build_dir=build_dir)
|
|
747
|
+
try:
|
|
748
|
+
deployment_runner.stop()
|
|
749
|
+
self.logger.info(f"Stopped deployment {build_dir}...")
|
|
750
|
+
self._states[build_dir] = States.STOPPED
|
|
751
|
+
except Exception:
|
|
752
|
+
self.logger.exception(f"Stopping deployment failed {build_dir}...")
|
|
753
|
+
self._states[build_dir] = States.ERROR
|
|
754
|
+
raise
|
|
755
|
+
|
|
756
|
+
|
|
757
|
+
deployment_manager = DeploymentManager()
|
|
629
758
|
|
|
630
759
|
|
|
631
760
|
def run_host_deployment(build_dir: Path) -> None:
|
|
632
761
|
"""Run host deployment."""
|
|
633
|
-
|
|
634
|
-
deployment_runner.start()
|
|
762
|
+
deployment_manager.run_deployment(build_dir=build_dir)
|
|
635
763
|
|
|
636
764
|
|
|
637
765
|
def stop_host_deployment(build_dir: Path) -> None:
|
|
638
766
|
"""Stop host deployment."""
|
|
639
|
-
|
|
640
|
-
|
|
767
|
+
deployment_manager.stop_deployemnt(build_dir=build_dir)
|
|
768
|
+
|
|
769
|
+
|
|
770
|
+
def stop_deployment_manager() -> None:
|
|
771
|
+
"""Stop deployment manager."""
|
|
772
|
+
deployment_manager.stop()
|
|
@@ -20,6 +20,7 @@
|
|
|
20
20
|
"""Source code for checking aea is alive.."""
|
|
21
21
|
import asyncio
|
|
22
22
|
import json
|
|
23
|
+
import logging
|
|
23
24
|
import typing as t
|
|
24
25
|
from concurrent.futures import ThreadPoolExecutor
|
|
25
26
|
from http import HTTPStatus
|
|
@@ -27,15 +28,11 @@ from pathlib import Path
|
|
|
27
28
|
from traceback import print_exc
|
|
28
29
|
|
|
29
30
|
import aiohttp # type: ignore
|
|
30
|
-
from aea.helpers.logging import setup_logger
|
|
31
31
|
|
|
32
32
|
from operate.constants import HEALTH_CHECK_URL
|
|
33
33
|
from operate.services.manage import ServiceManager # type: ignore
|
|
34
34
|
|
|
35
35
|
|
|
36
|
-
HTTP_OK = HTTPStatus.OK
|
|
37
|
-
|
|
38
|
-
|
|
39
36
|
class HealthChecker:
|
|
40
37
|
"""Health checker manager."""
|
|
41
38
|
|
|
@@ -43,19 +40,19 @@ class HealthChecker:
|
|
|
43
40
|
PORT_UP_TIMEOUT_DEFAULT = 300 # seconds
|
|
44
41
|
REQUEST_TIMEOUT_DEFAULT = 90
|
|
45
42
|
NUMBER_OF_FAILS_DEFAULT = 10
|
|
46
|
-
HEALTH_CHECK_URL = HEALTH_CHECK_URL
|
|
47
43
|
|
|
48
44
|
def __init__(
|
|
49
45
|
self,
|
|
50
46
|
service_manager: ServiceManager,
|
|
47
|
+
logger: logging.Logger,
|
|
51
48
|
port_up_timeout: int | None = None,
|
|
52
49
|
sleep_period: int | None = None,
|
|
53
50
|
number_of_fails: int | None = None,
|
|
54
51
|
) -> None:
|
|
55
52
|
"""Init the healtch checker."""
|
|
56
53
|
self._jobs: t.Dict[str, asyncio.Task] = {}
|
|
57
|
-
self.logger = setup_logger(name="operate.health_checker")
|
|
58
54
|
self._service_manager = service_manager
|
|
55
|
+
self.logger = logger
|
|
59
56
|
self.port_up_timeout = port_up_timeout or self.PORT_UP_TIMEOUT_DEFAULT
|
|
60
57
|
self.sleep_period = sleep_period or self.SLEEP_PERIOD_DEFAULT
|
|
61
58
|
self.number_of_fails = number_of_fails or self.NUMBER_OF_FAILS_DEFAULT
|
|
@@ -95,11 +92,11 @@ class HealthChecker:
|
|
|
95
92
|
del service_config_id
|
|
96
93
|
timeout = aiohttp.ClientTimeout(total=self.REQUEST_TIMEOUT_DEFAULT)
|
|
97
94
|
async with aiohttp.ClientSession(timeout=timeout) as session:
|
|
98
|
-
async with session.get(
|
|
95
|
+
async with session.get(HEALTH_CHECK_URL) as resp:
|
|
99
96
|
try:
|
|
100
97
|
status = resp.status
|
|
101
98
|
|
|
102
|
-
if status !=
|
|
99
|
+
if status != HTTPStatus.OK:
|
|
103
100
|
# not HTTP OK -> not healthy for sure
|
|
104
101
|
content = await resp.text()
|
|
105
102
|
self.logger.warning(
|