primitive 0.2.11__py3-none-any.whl → 0.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- primitive/__about__.py +1 -1
- primitive/agent/actions.py +52 -112
- primitive/agent/commands.py +2 -1
- primitive/agent/runner.py +14 -2
- primitive/agent/uploader.py +2 -2
- primitive/daemons/actions.py +37 -6
- primitive/daemons/launch_agents.py +8 -18
- primitive/daemons/launch_service.py +6 -13
- primitive/db/models.py +15 -5
- primitive/db/sqlite.py +9 -2
- primitive/hardware/actions.py +7 -2
- primitive/monitor/actions.py +130 -33
- primitive/utils/daemons.py +1 -1
- {primitive-0.2.11.dist-info → primitive-0.2.13.dist-info}/METADATA +1 -1
- {primitive-0.2.11.dist-info → primitive-0.2.13.dist-info}/RECORD +18 -18
- {primitive-0.2.11.dist-info → primitive-0.2.13.dist-info}/WHEEL +0 -0
- {primitive-0.2.11.dist-info → primitive-0.2.13.dist-info}/entry_points.txt +0 -0
- {primitive-0.2.11.dist-info → primitive-0.2.13.dist-info}/licenses/LICENSE.txt +0 -0
primitive/__about__.py
CHANGED
primitive/agent/actions.py
CHANGED
@@ -6,10 +6,10 @@ from loguru import logger
|
|
6
6
|
from primitive.__about__ import __version__
|
7
7
|
from primitive.utils.actions import BaseAction
|
8
8
|
|
9
|
-
from .runner import Runner
|
10
|
-
from .uploader import Uploader
|
11
9
|
from ..db import sqlite
|
12
10
|
from ..db.models import JobRun
|
11
|
+
from .runner import Runner
|
12
|
+
from .uploader import Uploader
|
13
13
|
|
14
14
|
|
15
15
|
class Agent(BaseAction):
|
@@ -33,137 +33,77 @@ class Agent(BaseAction):
|
|
33
33
|
# Create uploader
|
34
34
|
uploader = Uploader(primitive=self.primitive)
|
35
35
|
|
36
|
-
# self.primitive.hardware.update_hardware_system_info()
|
37
|
-
try:
|
38
|
-
# hey stupid:
|
39
|
-
# do not set is_available to True here, it will mess up the reservation logic
|
40
|
-
# only set is_available after we've checked that no active reservation is present
|
41
|
-
# setting is_available of the parent also effects the children,
|
42
|
-
# which may have active reservations as well
|
43
|
-
self.primitive.hardware.check_in_http(is_online=True)
|
44
|
-
except Exception as exception:
|
45
|
-
logger.exception(f"Error checking in hardware: {exception}")
|
46
|
-
sys.exit(1)
|
47
|
-
|
48
36
|
try:
|
49
|
-
active_reservation_id = None
|
50
|
-
active_reservation_pk = None
|
51
|
-
|
52
37
|
while True:
|
53
38
|
logger.debug("Scanning for files to upload...")
|
54
39
|
uploader.scan()
|
55
40
|
|
56
|
-
|
57
|
-
|
58
|
-
if
|
59
|
-
if (
|
60
|
-
hardware["activeReservation"]["id"] != active_reservation_id
|
61
|
-
or hardware["activeReservation"]["pk"] != active_reservation_pk
|
62
|
-
):
|
63
|
-
logger.warning("New reservation for this hardware.")
|
64
|
-
active_reservation_id = hardware["activeReservation"]["id"]
|
65
|
-
active_reservation_pk = hardware["activeReservation"]["pk"]
|
66
|
-
logger.debug("Active Reservation:")
|
67
|
-
logger.debug(f"Node ID: {active_reservation_id}")
|
68
|
-
logger.debug(f"PK: {active_reservation_pk}")
|
69
|
-
|
70
|
-
logger.debug("Running pre provisioning steps for reservation.")
|
71
|
-
self.primitive.provisioning.add_reservation_authorized_keys(
|
72
|
-
reservation_id=active_reservation_id
|
73
|
-
)
|
74
|
-
else:
|
75
|
-
if (
|
76
|
-
hardware["activeReservation"] is None
|
77
|
-
and active_reservation_id is not None
|
78
|
-
# and hardware["isAvailable"] NOTE: this condition was causing the CLI to get into a loop searching for job runs
|
79
|
-
):
|
80
|
-
logger.debug("Previous Reservation is Complete:")
|
81
|
-
logger.debug(f"Node ID: {active_reservation_id}")
|
82
|
-
logger.debug(f"PK: {active_reservation_pk}")
|
83
|
-
logger.debug(
|
84
|
-
"Running cleanup provisioning steps for reservation."
|
85
|
-
)
|
86
|
-
self.primitive.provisioning.remove_reservation_authorized_keys(
|
87
|
-
reservation_id=active_reservation_id
|
88
|
-
)
|
89
|
-
active_reservation_id = None
|
90
|
-
active_reservation_pk = None
|
91
|
-
|
92
|
-
if not active_reservation_id:
|
93
|
-
self.primitive.hardware.check_in_http(
|
94
|
-
is_available=True, is_online=True
|
95
|
-
)
|
41
|
+
db_job_run = JobRun.objects.first()
|
42
|
+
|
43
|
+
if not db_job_run:
|
96
44
|
sleep_amount = 5
|
97
45
|
logger.debug(
|
98
|
-
f"No
|
46
|
+
f"No pending job runs... [sleeping {sleep_amount} seconds]"
|
99
47
|
)
|
100
48
|
sleep(sleep_amount)
|
101
49
|
continue
|
102
50
|
|
103
|
-
|
104
|
-
|
51
|
+
api_job_run_data = self.primitive.jobs.get_job_run(
|
52
|
+
id=db_job_run.job_run_id,
|
105
53
|
)
|
106
54
|
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
if not pending_job_runs:
|
112
|
-
self.primitive.hardware.check_in_http(is_online=True)
|
113
|
-
sleep_amount = 5
|
114
|
-
logger.debug(
|
115
|
-
f"Waiting for Job Runs... [sleeping {sleep_amount} seconds]"
|
55
|
+
if not api_job_run_data or not api_job_run_data.data:
|
56
|
+
logger.error(
|
57
|
+
f"Job Run {db_job_run.job_run_id} not found in API, deleting from DB"
|
116
58
|
)
|
117
|
-
|
59
|
+
JobRun.objects.filter_by(job_run_id=db_job_run.job_run_id).delete()
|
118
60
|
continue
|
119
61
|
|
120
|
-
|
121
|
-
logger.debug("Found pending Job Run")
|
122
|
-
logger.debug(f"Job Run ID: {job_run['id']}")
|
123
|
-
logger.debug(f"Job Name: {job_run['job']['name']}")
|
62
|
+
api_job_run = api_job_run_data.data["jobRun"]
|
124
63
|
|
125
|
-
|
126
|
-
|
127
|
-
|
64
|
+
logger.debug("Found pending Job Run")
|
65
|
+
logger.debug(f"Job Run ID: {api_job_run.get('id')}")
|
66
|
+
logger.debug(f"Job Name: {api_job_run.get('name')}")
|
67
|
+
|
68
|
+
runner = Runner(
|
69
|
+
primitive=self.primitive,
|
70
|
+
job_run=api_job_run,
|
71
|
+
# max_log_size=500 * 1024,
|
72
|
+
)
|
73
|
+
|
74
|
+
try:
|
75
|
+
runner.setup()
|
76
|
+
except Exception as exception:
|
77
|
+
logger.exception(
|
78
|
+
f"Exception while initializing runner: {exception}"
|
79
|
+
)
|
80
|
+
self.primitive.jobs.job_run_update(
|
81
|
+
id=api_job_run.get("id"),
|
82
|
+
status="request_completed",
|
83
|
+
conclusion="failure",
|
128
84
|
)
|
85
|
+
JobRun.objects.filter_by(job_run_id=api_job_run.get("id")).delete()
|
86
|
+
continue
|
129
87
|
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
88
|
+
try:
|
89
|
+
runner.execute()
|
90
|
+
except Exception as exception:
|
91
|
+
logger.exception(f"Exception while executing job: {exception}")
|
92
|
+
self.primitive.jobs.job_run_update(
|
93
|
+
id=api_job_run.get("id"),
|
94
|
+
status="request_completed",
|
95
|
+
conclusion="failure",
|
134
96
|
)
|
97
|
+
finally:
|
98
|
+
runner.cleanup()
|
99
|
+
|
100
|
+
# NOTE: also run scan here to force upload of artifacts
|
101
|
+
# This should probably eventually be another daemon?
|
102
|
+
uploader.scan()
|
135
103
|
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
logger.exception(
|
140
|
-
f"Exception while initializing runner: {exception}"
|
141
|
-
)
|
142
|
-
self.primitive.jobs.job_run_update(
|
143
|
-
id=job_run["id"],
|
144
|
-
status="request_completed",
|
145
|
-
conclusion="failure",
|
146
|
-
)
|
147
|
-
JobRun.objects.filter_by(job_run_id=job_run["id"]).delete()
|
148
|
-
continue
|
149
|
-
|
150
|
-
try:
|
151
|
-
runner.execute()
|
152
|
-
except Exception as exception:
|
153
|
-
logger.exception(f"Exception while executing job: {exception}")
|
154
|
-
self.primitive.jobs.job_run_update(
|
155
|
-
id=job_run["id"],
|
156
|
-
status="request_completed",
|
157
|
-
conclusion="failure",
|
158
|
-
)
|
159
|
-
finally:
|
160
|
-
runner.cleanup()
|
161
|
-
|
162
|
-
# NOTE: also run scan here to force upload of artifacts
|
163
|
-
# This should probably eventually be another daemon?
|
164
|
-
uploader.scan()
|
165
|
-
|
166
|
-
JobRun.objects.filter_by(job_run_id=job_run["id"]).delete()
|
104
|
+
JobRun.objects.filter_by(
|
105
|
+
job_run_id=api_job_run.get("id"),
|
106
|
+
).delete()
|
167
107
|
|
168
108
|
sleep(5)
|
169
109
|
except KeyboardInterrupt:
|
primitive/agent/commands.py
CHANGED
primitive/agent/runner.py
CHANGED
@@ -7,11 +7,11 @@ from abc import abstractmethod
|
|
7
7
|
from enum import Enum, IntEnum
|
8
8
|
from pathlib import Path, PurePath
|
9
9
|
from typing import Dict, List, TypedDict
|
10
|
-
from ..db.models import JobRun
|
11
10
|
|
12
11
|
import yaml
|
13
12
|
from loguru import logger
|
14
13
|
|
14
|
+
from ..db.models import JobRun
|
15
15
|
from ..utils.cache import get_artifacts_cache, get_logs_cache, get_sources_cache
|
16
16
|
from ..utils.shell import env_to_dict
|
17
17
|
|
@@ -157,8 +157,11 @@ class Runner:
|
|
157
157
|
|
158
158
|
task_failed = False
|
159
159
|
cancelled = False
|
160
|
-
|
160
|
+
|
161
161
|
for task in self.config["executes"]:
|
162
|
+
# the get status check here is to ensure that if cancel is called
|
163
|
+
# while one task is running, we do not run any OTHER laebeled tasks
|
164
|
+
# THIS is required for MULTI STEP JOBS
|
162
165
|
status = self.primitive.jobs.get_job_status(self.job_run["id"])
|
163
166
|
status_value = status.data["jobRun"]["status"]
|
164
167
|
conclusion_value = status.data["jobRun"]["conclusion"]
|
@@ -177,6 +180,14 @@ class Runner:
|
|
177
180
|
f"Produced {number_of_files_produced} files for {self.job['slug']} job"
|
178
181
|
)
|
179
182
|
|
183
|
+
# FOR NONE MULTI STEP JOBS
|
184
|
+
# we still have to check that the job was cancelled here as well
|
185
|
+
status = self.primitive.jobs.get_job_status(self.job_run["id"])
|
186
|
+
status_value = status.data["jobRun"]["status"]
|
187
|
+
conclusion_value = status.data["jobRun"]["conclusion"]
|
188
|
+
if status_value == "completed" and conclusion_value == "cancelled":
|
189
|
+
cancelled = True
|
190
|
+
|
180
191
|
if cancelled:
|
181
192
|
logger.warning("Job cancelled by user")
|
182
193
|
self.primitive.jobs.job_run_update(
|
@@ -185,6 +196,7 @@ class Runner:
|
|
185
196
|
)
|
186
197
|
return
|
187
198
|
|
199
|
+
conclusion = "success"
|
188
200
|
if task_failed:
|
189
201
|
conclusion = "failure"
|
190
202
|
else:
|
primitive/agent/uploader.py
CHANGED
@@ -50,8 +50,8 @@ class Uploader:
|
|
50
50
|
path=file,
|
51
51
|
key_prefix=str(PurePath(file).relative_to(cache.parent).parent),
|
52
52
|
)
|
53
|
-
except Exception as
|
54
|
-
if "is empty" in str(
|
53
|
+
except Exception as exception:
|
54
|
+
if "is empty" in str(exception):
|
55
55
|
logger.warning(f"{file} is empty, skipping upload")
|
56
56
|
continue
|
57
57
|
|
primitive/daemons/actions.py
CHANGED
@@ -1,13 +1,18 @@
|
|
1
1
|
import platform
|
2
|
+
import subprocess
|
2
3
|
import typing
|
3
|
-
from
|
4
|
+
from pathlib import Path
|
5
|
+
from typing import Dict, List, Optional
|
4
6
|
|
5
7
|
if typing.TYPE_CHECKING:
|
6
8
|
from ..client import Primitive
|
7
9
|
|
10
|
+
from ..utils.daemons import Daemon
|
8
11
|
from .launch_agents import LaunchAgent
|
9
12
|
from .launch_service import LaunchService
|
10
|
-
|
13
|
+
|
14
|
+
HOME_DIRECTORY = Path.home()
|
15
|
+
PRIMITIVE_BINARY_PATH = Path(HOME_DIRECTORY / ".pyenv" / "shims" / "primitive")
|
11
16
|
|
12
17
|
|
13
18
|
class Daemons:
|
@@ -15,16 +20,42 @@ class Daemons:
|
|
15
20
|
self.primitive: Primitive = primitive
|
16
21
|
self.os_family = platform.system()
|
17
22
|
|
23
|
+
found_primitive_binary_path = PRIMITIVE_BINARY_PATH
|
24
|
+
if not PRIMITIVE_BINARY_PATH.exists():
|
25
|
+
result = subprocess.run(["which", "primitive"], capture_output=True)
|
26
|
+
if result.returncode == 0:
|
27
|
+
found_primitive_binary_path = result.stdout.decode().rstrip("\n")
|
28
|
+
else:
|
29
|
+
raise Exception(
|
30
|
+
f"primitive binary not found at {PRIMITIVE_BINARY_PATH}"
|
31
|
+
)
|
32
|
+
|
33
|
+
base_primitive_command = f'/bin/sh -lc "{found_primitive_binary_path} "'
|
34
|
+
|
18
35
|
match self.os_family:
|
19
36
|
case "Darwin":
|
20
37
|
self.daemons: Dict[str, Daemon] = {
|
21
|
-
"agent": LaunchAgent(
|
22
|
-
|
38
|
+
"agent": LaunchAgent(
|
39
|
+
"tech.primitive.agent",
|
40
|
+
executable=str(found_primitive_binary_path),
|
41
|
+
command="agent --debug",
|
42
|
+
),
|
43
|
+
"monitor": LaunchAgent(
|
44
|
+
"tech.primitive.monitor",
|
45
|
+
executable=str(found_primitive_binary_path),
|
46
|
+
command="monitor --debug",
|
47
|
+
),
|
23
48
|
}
|
24
49
|
case "Linux":
|
25
50
|
self.daemons: Dict[str, Daemon] = {
|
26
|
-
"agent": LaunchService(
|
27
|
-
|
51
|
+
"agent": LaunchService(
|
52
|
+
"tech.primitive.agent",
|
53
|
+
command=f"{base_primitive_command} agent --debug",
|
54
|
+
),
|
55
|
+
"monitor": LaunchService(
|
56
|
+
"tech.primitive.monitor",
|
57
|
+
command=f"{base_primitive_command} monitor --debug",
|
58
|
+
),
|
28
59
|
}
|
29
60
|
case _:
|
30
61
|
raise NotImplementedError(f"{self.os_family} is not supported.")
|
@@ -1,18 +1,21 @@
|
|
1
1
|
import os
|
2
|
-
from pathlib import Path
|
3
2
|
import subprocess
|
3
|
+
from pathlib import Path
|
4
|
+
|
4
5
|
from loguru import logger
|
6
|
+
|
5
7
|
from ..utils.daemons import Daemon
|
6
8
|
|
7
9
|
HOME_DIRECTORY = Path.home()
|
8
10
|
CURRENT_USER = str(HOME_DIRECTORY.expanduser()).lstrip("/Users/")
|
9
|
-
PRIMITIVE_BINARY_PATH = Path(HOME_DIRECTORY / ".pyenv" / "shims" / "primitive")
|
10
11
|
|
11
12
|
|
12
13
|
class LaunchAgent(Daemon):
|
13
|
-
def __init__(self, label: str):
|
14
|
+
def __init__(self, label: str, executable: str, command: str):
|
14
15
|
self.label = label
|
15
16
|
self.name = label.split(".")[-1]
|
17
|
+
self.executable = executable
|
18
|
+
self.command = command
|
16
19
|
|
17
20
|
@property
|
18
21
|
def file_path(self) -> Path:
|
@@ -22,10 +25,6 @@ class LaunchAgent(Daemon):
|
|
22
25
|
def logs(self) -> Path:
|
23
26
|
return Path(HOME_DIRECTORY / "Library" / "Logs" / f"{self.label}.log")
|
24
27
|
|
25
|
-
@property
|
26
|
-
def cmd(self) -> str:
|
27
|
-
return self.label.split(".")[-1]
|
28
|
-
|
29
28
|
def stop(self, unload: bool = True) -> bool:
|
30
29
|
try:
|
31
30
|
stop_existing_process = f"launchctl stop {self.label}"
|
@@ -107,15 +106,6 @@ class LaunchAgent(Daemon):
|
|
107
106
|
self.file_path.parent.mkdir(parents=True, exist_ok=True)
|
108
107
|
self.file_path.touch()
|
109
108
|
|
110
|
-
found_primitive_binary_path = PRIMITIVE_BINARY_PATH
|
111
|
-
if not PRIMITIVE_BINARY_PATH.exists():
|
112
|
-
result = subprocess.run(["which", "primitive"], capture_output=True)
|
113
|
-
if result.returncode == 0:
|
114
|
-
found_primitive_binary_path = result.stdout.decode().rstrip("\n")
|
115
|
-
else:
|
116
|
-
logger.error("primitive binary not found")
|
117
|
-
return False
|
118
|
-
|
119
109
|
self.file_path.write_text(
|
120
110
|
f"""<?xml version="1.0" encoding="UTF-8"?>
|
121
111
|
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
@@ -134,8 +124,8 @@ class LaunchAgent(Daemon):
|
|
134
124
|
</array>
|
135
125
|
<key>ProgramArguments</key>
|
136
126
|
<array>
|
137
|
-
<string>{
|
138
|
-
<string>{self.
|
127
|
+
<string>{self.executable}</string>
|
128
|
+
<string>{self.command}</string>
|
139
129
|
</array>
|
140
130
|
<key>RunAtLoad</key>
|
141
131
|
<true/>
|
@@ -1,18 +1,20 @@
|
|
1
|
-
import os
|
2
1
|
import configparser
|
2
|
+
import os
|
3
3
|
import subprocess
|
4
4
|
from pathlib import Path
|
5
|
+
|
5
6
|
from loguru import logger
|
7
|
+
|
6
8
|
from ..utils.daemons import Daemon
|
7
9
|
|
8
10
|
HOME_DIRECTORY = Path.home()
|
9
|
-
PRIMITIVE_BINARY_PATH = Path(HOME_DIRECTORY / ".pyenv" / "shims" / "primitive")
|
10
11
|
|
11
12
|
|
12
13
|
class LaunchService(Daemon):
|
13
|
-
def __init__(self, label: str):
|
14
|
+
def __init__(self, label: str, command: str):
|
14
15
|
self.label = label
|
15
16
|
self.name = label.split(".")[-1]
|
17
|
+
self.command = command
|
16
18
|
|
17
19
|
@property
|
18
20
|
def service_name(self) -> str:
|
@@ -119,17 +121,8 @@ class LaunchService(Daemon):
|
|
119
121
|
"After": "network.target",
|
120
122
|
}
|
121
123
|
|
122
|
-
found_primitive_binary_path = PRIMITIVE_BINARY_PATH
|
123
|
-
if not PRIMITIVE_BINARY_PATH.exists():
|
124
|
-
result = subprocess.run(["which", "primitive"], capture_output=True)
|
125
|
-
if result.returncode == 0:
|
126
|
-
found_primitive_binary_path = result.stdout.decode().rstrip("\n")
|
127
|
-
else:
|
128
|
-
print("primitive binary not found")
|
129
|
-
return False
|
130
|
-
|
131
124
|
config["Service"] = {
|
132
|
-
"ExecStart":
|
125
|
+
"ExecStart": self.command,
|
133
126
|
"Restart": "always",
|
134
127
|
"StandardError": f"append:{self.logs}",
|
135
128
|
"StandardOutput": f"append:{self.logs}",
|
primitive/db/models.py
CHANGED
@@ -1,8 +1,10 @@
|
|
1
|
+
from typing import Any, Callable, Dict, Generic, List, Optional, Type, TypeVar, Union
|
2
|
+
|
1
3
|
from sqlalchemy import Column, Integer, String
|
2
|
-
from sqlalchemy.orm import Mapped,
|
3
|
-
|
4
|
-
from .sqlite import Session
|
4
|
+
from sqlalchemy.orm import Mapped, Query, mapped_column
|
5
|
+
|
5
6
|
from .base import Base
|
7
|
+
from .sqlite import Session
|
6
8
|
|
7
9
|
T = TypeVar("T", bound="Base")
|
8
10
|
|
@@ -25,6 +27,14 @@ class Manager(Generic[T]):
|
|
25
27
|
self.filters = kwargs
|
26
28
|
return self
|
27
29
|
|
30
|
+
def exists(self) -> bool:
|
31
|
+
with Session() as session:
|
32
|
+
model = self.model_cls_lambda()
|
33
|
+
query = session.query(model)
|
34
|
+
query.filter_by(**self.filters)
|
35
|
+
self.filters.clear()
|
36
|
+
return query.count() > 0
|
37
|
+
|
28
38
|
def all(self) -> List[T]:
|
29
39
|
with Session() as session:
|
30
40
|
model = self.model_cls_lambda()
|
@@ -51,7 +61,7 @@ class Manager(Generic[T]):
|
|
51
61
|
session.commit()
|
52
62
|
return query
|
53
63
|
else:
|
54
|
-
raise ValueError(f"{model.__name__} not found")
|
64
|
+
raise ValueError(f"Update failed, {model.__name__} not found")
|
55
65
|
|
56
66
|
def delete(self) -> None:
|
57
67
|
with Session() as session:
|
@@ -62,7 +72,7 @@ class Manager(Generic[T]):
|
|
62
72
|
query.delete()
|
63
73
|
session.commit()
|
64
74
|
else:
|
65
|
-
raise ValueError(f"{model.__name__} not found")
|
75
|
+
raise ValueError(f"Delete failed, {model.__name__} not found")
|
66
76
|
|
67
77
|
|
68
78
|
class JobRun(Base):
|
primitive/db/sqlite.py
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
from pathlib import Path
|
2
|
-
|
2
|
+
|
3
|
+
from loguru import logger
|
4
|
+
from sqlalchemy import Engine, create_engine
|
3
5
|
from sqlalchemy.orm import Session as SQLAlchemySession
|
6
|
+
|
4
7
|
from ..utils.cache import get_cache_dir
|
5
8
|
from .base import Base
|
6
9
|
|
@@ -9,9 +12,13 @@ def init() -> None:
|
|
9
12
|
db_path: Path = get_cache_dir() / "primitive.sqlite3"
|
10
13
|
|
11
14
|
# Drop DB existing database if it exists
|
15
|
+
# if db_path.exists():
|
16
|
+
# logger.warning(f"[*] Deleting existing SQLite database at {db_path}")
|
17
|
+
# db_path.unlink()
|
12
18
|
if db_path.exists():
|
13
|
-
|
19
|
+
return
|
14
20
|
|
21
|
+
logger.info(f"[*] Initializing SQLite database at {db_path}")
|
15
22
|
engine = create_engine(f"sqlite:///{db_path}", echo=False)
|
16
23
|
Base.metadata.create_all(engine)
|
17
24
|
|
primitive/hardware/actions.py
CHANGED
@@ -592,12 +592,17 @@ class Hardware(BaseAction):
|
|
592
592
|
pass
|
593
593
|
|
594
594
|
@guard
|
595
|
-
def _sync_children(self):
|
595
|
+
def _sync_children(self, hardware: Optional[Dict[str, str]] = None):
|
596
596
|
# get the existing children if any from the hardware details
|
597
597
|
# get the latest children from the node
|
598
598
|
# compare the two and update the node with the latest children
|
599
599
|
# remove any children from remote that are not in the latest children
|
600
|
-
|
600
|
+
if not hardware:
|
601
|
+
hardware = self.primitive.hardware.get_own_hardware_details()
|
602
|
+
if not hardware:
|
603
|
+
logger.error("No hardware found.")
|
604
|
+
return
|
605
|
+
|
601
606
|
remote_children = hardware.get("children", [])
|
602
607
|
local_children = self.primitive.hardware._list_local_children()
|
603
608
|
|
primitive/monitor/actions.py
CHANGED
@@ -1,13 +1,15 @@
|
|
1
|
-
from primitive.utils.actions import BaseAction
|
2
|
-
from loguru import logger
|
3
|
-
from primitive.__about__ import __version__
|
4
|
-
from ..utils.exceptions import P_CLI_100
|
5
1
|
import sys
|
6
|
-
import psutil
|
7
|
-
from ..db import sqlite
|
8
|
-
from ..db.models import JobRun
|
9
2
|
from time import sleep
|
10
3
|
|
4
|
+
import psutil
|
5
|
+
from loguru import logger
|
6
|
+
|
7
|
+
from primitive.__about__ import __version__
|
8
|
+
from primitive.db import sqlite
|
9
|
+
from primitive.db.models import JobRun
|
10
|
+
from primitive.utils.actions import BaseAction
|
11
|
+
from primitive.utils.exceptions import P_CLI_100
|
12
|
+
|
11
13
|
|
12
14
|
class Monitor(BaseAction):
|
13
15
|
def start(self):
|
@@ -22,9 +24,6 @@ class Monitor(BaseAction):
|
|
22
24
|
logger.info("[*] primitive monitor")
|
23
25
|
logger.info(f"[*] Version: {__version__}")
|
24
26
|
|
25
|
-
# Initialize the database
|
26
|
-
sqlite.init()
|
27
|
-
|
28
27
|
try:
|
29
28
|
# hey stupid:
|
30
29
|
# do not set is_available to True here, it will mess up the reservation logic
|
@@ -36,34 +35,39 @@ class Monitor(BaseAction):
|
|
36
35
|
logger.exception(f"Error checking in hardware: {exception}")
|
37
36
|
sys.exit(1)
|
38
37
|
|
38
|
+
# Initialize the database
|
39
|
+
sqlite.init()
|
40
|
+
|
39
41
|
try:
|
40
|
-
|
41
|
-
|
42
|
-
self.primitive.hardware._sync_children()
|
42
|
+
active_reservation_id = None
|
43
|
+
active_reservation_pk = None
|
43
44
|
|
44
|
-
|
45
|
-
|
45
|
+
while True:
|
46
|
+
# FIRST, check for jobs in the database that are running
|
47
|
+
db_job_runs = JobRun.objects.all()
|
48
|
+
for job_run in db_job_runs:
|
49
|
+
if job_run.pid is None:
|
50
|
+
pid_sleep_amount = 0.1
|
51
|
+
logger.debug(
|
52
|
+
f"Job run {job_run.job_run_id} has no PID. Agent has not started."
|
53
|
+
)
|
54
|
+
logger.debug(
|
55
|
+
f"Sleeping {pid_sleep_amount} seconds before checking again..."
|
56
|
+
)
|
57
|
+
sleep(pid_sleep_amount)
|
58
|
+
continue
|
46
59
|
|
47
|
-
# No procs in the database => nothing to monitor
|
48
|
-
if len(procs) == 0:
|
49
|
-
sleep_amount = 5
|
50
60
|
logger.debug(
|
51
|
-
f"
|
61
|
+
f"Checking process PID {job_run.pid} for JobRun {job_run.job_run_id}..."
|
52
62
|
)
|
53
|
-
sleep(sleep_amount)
|
54
|
-
continue
|
55
63
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
# For each process, check status and kill if cancelled
|
63
|
-
for proc in procs:
|
64
|
-
logger.debug(f"Checking process {proc.pid}...")
|
64
|
+
status = self.primitive.jobs.get_job_status(job_run.job_run_id)
|
65
|
+
if status is None or status.data is None:
|
66
|
+
logger.error(
|
67
|
+
f"Error fetching status of <JobRun {job_run.job_run_id}>."
|
68
|
+
)
|
69
|
+
continue
|
65
70
|
|
66
|
-
status = self.primitive.jobs.get_job_status(proc.job_run_id)
|
67
71
|
status_value = status.data["jobRun"]["status"]
|
68
72
|
conclusion_value = status.data["jobRun"]["conclusion"]
|
69
73
|
|
@@ -71,7 +75,7 @@ class Monitor(BaseAction):
|
|
71
75
|
logger.debug(f"- Conclusion: {conclusion_value}")
|
72
76
|
|
73
77
|
try:
|
74
|
-
parent = psutil.Process(
|
78
|
+
parent = psutil.Process(job_run.pid)
|
75
79
|
except psutil.NoSuchProcess:
|
76
80
|
logger.debug("Process not found")
|
77
81
|
continue
|
@@ -87,7 +91,100 @@ class Monitor(BaseAction):
|
|
87
91
|
logger.debug(f"Killing parent process {parent.pid}...")
|
88
92
|
parent.kill()
|
89
93
|
|
90
|
-
|
94
|
+
if status != "completed":
|
95
|
+
sleep(1)
|
96
|
+
continue
|
97
|
+
|
98
|
+
# Second, check for active reservations
|
99
|
+
hardware = self.primitive.hardware.get_own_hardware_details()
|
100
|
+
if hardware["activeReservation"]:
|
101
|
+
if (
|
102
|
+
hardware["activeReservation"]["id"] != active_reservation_id
|
103
|
+
or hardware["activeReservation"]["pk"] != active_reservation_pk
|
104
|
+
):
|
105
|
+
logger.info("New reservation for this hardware.")
|
106
|
+
active_reservation_id = hardware["activeReservation"]["id"]
|
107
|
+
active_reservation_pk = hardware["activeReservation"]["pk"]
|
108
|
+
logger.debug("Active Reservation:")
|
109
|
+
logger.debug(f"Node ID: {active_reservation_id}")
|
110
|
+
logger.debug(f"PK: {active_reservation_pk}")
|
111
|
+
|
112
|
+
logger.debug("Running pre provisioning steps for reservation.")
|
113
|
+
self.primitive.provisioning.add_reservation_authorized_keys(
|
114
|
+
reservation_id=active_reservation_id
|
115
|
+
)
|
116
|
+
|
117
|
+
if not active_reservation_id:
|
118
|
+
self.primitive.hardware.check_in_http(
|
119
|
+
is_available=True, is_online=True
|
120
|
+
)
|
121
|
+
logger.debug("Syncing children...")
|
122
|
+
self.primitive.hardware._sync_children(hardware=hardware)
|
123
|
+
|
124
|
+
sleep_amount = 5
|
125
|
+
logger.debug(
|
126
|
+
f"No active reservation found... [sleeping {sleep_amount} seconds]"
|
127
|
+
)
|
128
|
+
sleep(sleep_amount)
|
129
|
+
continue
|
130
|
+
else:
|
131
|
+
if (
|
132
|
+
hardware["activeReservation"] is None
|
133
|
+
and active_reservation_id is not None
|
134
|
+
# and hardware["isAvailable"] NOTE: this condition was causing the CLI to get into a loop searching for job runs
|
135
|
+
):
|
136
|
+
logger.debug("Previous Reservation is Complete:")
|
137
|
+
logger.debug(f"Node ID: {active_reservation_id}")
|
138
|
+
logger.debug(f"PK: {active_reservation_pk}")
|
139
|
+
logger.debug(
|
140
|
+
"Running cleanup provisioning steps for reservation."
|
141
|
+
)
|
142
|
+
self.primitive.provisioning.remove_reservation_authorized_keys(
|
143
|
+
reservation_id=active_reservation_id
|
144
|
+
)
|
145
|
+
active_reservation_id = None
|
146
|
+
active_reservation_pk = None
|
147
|
+
|
148
|
+
# Third, see if the active reservation has any pending job runs
|
149
|
+
job_runs_for_reservation = self.primitive.jobs.get_job_runs(
|
150
|
+
status="pending", first=1, reservation_id=active_reservation_id
|
151
|
+
)
|
152
|
+
|
153
|
+
if (
|
154
|
+
job_runs_for_reservation is None
|
155
|
+
or job_runs_for_reservation.data is None
|
156
|
+
):
|
157
|
+
logger.error("Error fetching job runs.")
|
158
|
+
sleep_amount = 5
|
159
|
+
logger.debug(
|
160
|
+
f"Error fetching job runs... [sleeping {sleep_amount} seconds]"
|
161
|
+
)
|
162
|
+
sleep(sleep_amount)
|
163
|
+
continue
|
164
|
+
|
165
|
+
pending_job_runs = [
|
166
|
+
edge["node"]
|
167
|
+
for edge in job_runs_for_reservation.data["jobRuns"]["edges"]
|
168
|
+
]
|
169
|
+
|
170
|
+
if not pending_job_runs:
|
171
|
+
self.primitive.hardware.check_in_http(
|
172
|
+
is_available=False, is_online=True
|
173
|
+
)
|
174
|
+
sleep_amount = 5
|
175
|
+
logger.debug(
|
176
|
+
f"Waiting for Job Runs... [sleeping {sleep_amount} seconds]"
|
177
|
+
)
|
178
|
+
sleep(sleep_amount)
|
179
|
+
continue
|
180
|
+
|
181
|
+
# If we did find a pending job run, check if it exists in the database
|
182
|
+
# and create it if it doesn't.
|
183
|
+
# This will trigger the agent to start the job run.
|
184
|
+
job_run = pending_job_runs[0]
|
185
|
+
if not JobRun.objects.filter_by(job_run_id=job_run["id"]).exists():
|
186
|
+
JobRun.objects.create(job_run_id=job_run["id"], pid=None)
|
187
|
+
logger.debug(f"Creating job run in database: {job_run['id']}")
|
91
188
|
|
92
189
|
except KeyboardInterrupt:
|
93
190
|
logger.info("[*] Stopping primitive monitor...")
|
primitive/utils/daemons.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: primitive
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.13
|
4
4
|
Project-URL: Documentation, https://github.com//primitivecorp/primitive-cli#readme
|
5
5
|
Project-URL: Issues, https://github.com//primitivecorp/primitive-cli/issues
|
6
6
|
Project-URL: Source, https://github.com//primitivecorp/primitive-cli
|
@@ -1,26 +1,26 @@
|
|
1
|
-
primitive/__about__.py,sha256=
|
1
|
+
primitive/__about__.py,sha256=bGWt1PkABewYSRyq23zRxqOW2ES-H58OSO6OmpHi7ac,130
|
2
2
|
primitive/__init__.py,sha256=bwKdgggKNVssJFVPfKSxqFMz4IxSr54WWbmiZqTMPNI,106
|
3
3
|
primitive/cli.py,sha256=g7EtHI9MATAB0qQu5w-WzbXtxz_8zu8z5E7sETmMkKU,2509
|
4
4
|
primitive/client.py,sha256=h8WZVnQylVe0vbpuyC8YZHl2JyITSPC-1HbUcmrE5pc,3623
|
5
5
|
primitive/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
-
primitive/agent/actions.py,sha256=
|
7
|
-
primitive/agent/commands.py,sha256
|
8
|
-
primitive/agent/runner.py,sha256=
|
9
|
-
primitive/agent/uploader.py,sha256=
|
6
|
+
primitive/agent/actions.py,sha256=PzFOgxuRrhbUGpUygnTZVru58Fv87GI8tgJYkZB1LjI,3773
|
7
|
+
primitive/agent/commands.py,sha256=cK7d3OcN5Z65gQWVZFQ-Y9ddw9Pes4f9OVBpeMsj5sE,255
|
8
|
+
primitive/agent/runner.py,sha256=CoRyReO3jPV8B7vILVWdszFD4GVop7HsVEUo1hoRXjo,14556
|
9
|
+
primitive/agent/uploader.py,sha256=ZzrzsajNBogwEC7mT6Ejy0h2Jd9axMYGzt9pbCvVMlk,3171
|
10
10
|
primitive/auth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
11
|
primitive/auth/actions.py,sha256=MPsG9LcKcOPwA7gZ9Ewk0PZJhTQvIrGfODdz4GxSzgA,999
|
12
12
|
primitive/auth/commands.py,sha256=2z5u5xX64n0yILucx9emtWh3uQXLvs2QQQQIldZGr94,2341
|
13
13
|
primitive/auth/graphql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
14
|
primitive/auth/graphql/queries.py,sha256=jhrr_VFzHIn8vcVprMIzUx7V4kkWYdR6CKMKPoVFv60,180
|
15
15
|
primitive/daemons/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
|
-
primitive/daemons/actions.py,sha256=
|
16
|
+
primitive/daemons/actions.py,sha256=jUE2DSNI5GDbbAZiGaauwfh50UQZf_uDLqpZLEwDq6w,3292
|
17
17
|
primitive/daemons/commands.py,sha256=Xt4qFymNrDLdHJhRnEH_4Re-2xX6w1OT-chV9k7dFCs,2670
|
18
|
-
primitive/daemons/launch_agents.py,sha256=
|
19
|
-
primitive/daemons/launch_service.py,sha256=
|
18
|
+
primitive/daemons/launch_agents.py,sha256=VQ-c9PVTOr3JVt2jfLxIPDS1glKyBM2oDAzSUsFP9_A,7455
|
19
|
+
primitive/daemons/launch_service.py,sha256=JA2kj1obCzu9sGmLFUMdEVKI-aj018b_tZBor0wVJSQ,7531
|
20
20
|
primitive/daemons/ui.py,sha256=Af3OJWJ0jdGlb1nfA5yaGYdhBEqqpM8zP2U2vUQdCbw,1236
|
21
21
|
primitive/db/base.py,sha256=mH7f2d_jiyxJSSx9Gk53QBXRa3LiKBsBjkFgvmtH1WA,83
|
22
|
-
primitive/db/models.py,sha256=
|
23
|
-
primitive/db/sqlite.py,sha256=
|
22
|
+
primitive/db/models.py,sha256=GfnJdAq4Tb68CI4BKAuJDZVqioGavveaAHbCPeLNngw,2840
|
23
|
+
primitive/db/sqlite.py,sha256=3V9ZxbgME1ThfJp90MPLUxU8b9imgNZM5CHOnA-WkaQ,953
|
24
24
|
primitive/exec/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
25
25
|
primitive/exec/actions.py,sha256=4d_TCjNDcVFoZ9Zw7ZuBa6hKMv2Xzm7_UX_8wcX1aSk,4124
|
26
26
|
primitive/exec/commands.py,sha256=66LO2kkJC-ynNZQpUCXv4Ol15QoacdSZAHblePDcmLo,510
|
@@ -42,7 +42,7 @@ primitive/graphql/relay.py,sha256=bmij2AjdpURQ6GGVCxwWhauF-r_SxuAU2oJ4sDbLxpI,72
|
|
42
42
|
primitive/graphql/sdk.py,sha256=KhVWDZms_eMBgt6ftSJitRALguagy-nmrj4IC2taeXY,1535
|
43
43
|
primitive/graphql/utility_fragments.py,sha256=uIjwILC4QtWNyO5vu77VjQf_p0jvP3A9q_6zRq91zqs,303
|
44
44
|
primitive/hardware/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
45
|
-
primitive/hardware/actions.py,sha256=
|
45
|
+
primitive/hardware/actions.py,sha256=d5KwuSsceOhDH9rgOL7YTCpQPhqT2inRTiZnROtiDic,26076
|
46
46
|
primitive/hardware/android.py,sha256=tu7pBPxWFrIwb_mm5CEdFFf1_veNDOKjOCQg13i_Lh4,2758
|
47
47
|
primitive/hardware/commands.py,sha256=ixMPhDOpsU-eONxmimqKVynus-Eaq2XPKEK017WM_rM,3229
|
48
48
|
primitive/hardware/ui.py,sha256=12rucuZ2s-w5R4bKyxON5dEbrdDnVf5sbj3K_nbdo44,2473
|
@@ -57,7 +57,7 @@ primitive/jobs/graphql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3
|
|
57
57
|
primitive/jobs/graphql/fragments.py,sha256=1_ZttT7dx36KDC3DClJz9M8LMpsPwXySBygHSiUEcGg,619
|
58
58
|
primitive/jobs/graphql/mutations.py,sha256=8ASvCmwQh7cMeeiykOdYaYVryG8FRIuVF6v_J8JJZuw,219
|
59
59
|
primitive/jobs/graphql/queries.py,sha256=BrU_GnLjK0bTAmWsLSmGEUea7EM8MqTKxN1Qp6sSjwc,1597
|
60
|
-
primitive/monitor/actions.py,sha256=
|
60
|
+
primitive/monitor/actions.py,sha256=GUQrwuan82pOJ5gI2FvQYzgDoP4fs28PdcI_fg_aXRs,8692
|
61
61
|
primitive/monitor/commands.py,sha256=dZsD8WKGU4OYO_AlKawfeRNVTMN0xJ-DFRkmKTS464s,258
|
62
62
|
primitive/organizations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
63
63
|
primitive/organizations/actions.py,sha256=Tgp_rox0jcvfhQ-LmcWc9vkPdeJu5Bk6U1rNuT9oDnw,1088
|
@@ -90,14 +90,14 @@ primitive/utils/auth.py,sha256=uBIZNPF2CpbaPV2UMi6eWVUKghV6WIm-pG3-UM29bNs,1465
|
|
90
90
|
primitive/utils/cache.py,sha256=FHGmVWYLJFQOazpXXcEwI0YJEZbdkgG39nOLdOv6VNk,1575
|
91
91
|
primitive/utils/chunk_size.py,sha256=PAuVuirUTA9oRXyjo1c6MWxo31WVBRkWMuWw-AS58Bw,2914
|
92
92
|
primitive/utils/config.py,sha256=DlFM5Nglo22WPtbpZSVtH7NX-PTMaKYlcrUE7GPRG4c,1058
|
93
|
-
primitive/utils/daemons.py,sha256=
|
93
|
+
primitive/utils/daemons.py,sha256=mSoSHitiGfS4KYAEK9sKsiv_YcACHKgY3qISnDpUUIE,1086
|
94
94
|
primitive/utils/exceptions.py,sha256=DrYHTcCAJGC7cCUwOx_FmdlVLWRdpzvDvpLb82heppE,311
|
95
95
|
primitive/utils/memory_size.py,sha256=4xfha21kW82nFvOTtDFx9Jk2ZQoEhkfXii-PGNTpIUk,3058
|
96
96
|
primitive/utils/printer.py,sha256=f1XUpqi5dkTL3GWvYRUGlSwtj2IxU1q745T4Fxo7Tn4,370
|
97
97
|
primitive/utils/shell.py,sha256=jWzb7ky7p987dJas6ZvarK3IJNZ5cwBXcryRWb9Uh6U,2072
|
98
98
|
primitive/utils/text.py,sha256=XiESMnlhjQ534xE2hMNf08WehE1SKaYFRNih0MmnK0k,829
|
99
|
-
primitive-0.2.
|
100
|
-
primitive-0.2.
|
101
|
-
primitive-0.2.
|
102
|
-
primitive-0.2.
|
103
|
-
primitive-0.2.
|
99
|
+
primitive-0.2.13.dist-info/METADATA,sha256=Zq5jWlNGydETK3dKcPdCw0yUZsnJkwCsxuRq1LC4fnM,3733
|
100
|
+
primitive-0.2.13.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
101
|
+
primitive-0.2.13.dist-info/entry_points.txt,sha256=p1K8DMCWka5FqLlqP1sPek5Uovy9jq8u51gUsP-z334,48
|
102
|
+
primitive-0.2.13.dist-info/licenses/LICENSE.txt,sha256=B8kmQMJ2sxYygjCLBk770uacaMci4mPSoJJ8WoDBY_c,1098
|
103
|
+
primitive-0.2.13.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|