sensorium-mcp 3.0.3 → 3.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/dashboard/routes/data.d.ts.map +1 -1
- package/dist/dashboard/routes/data.js +2 -1
- package/dist/dashboard/routes/data.js.map +1 -1
- package/dist/dashboard/routes/threads.js +1 -1
- package/dist/dashboard/routes/threads.js.map +1 -1
- package/dist/dashboard/routes.d.ts.map +1 -1
- package/dist/dashboard/routes.js +1 -3
- package/dist/dashboard/routes.js.map +1 -1
- package/dist/data/memory/migration-runner.d.ts +1 -1
- package/dist/data/memory/migration-runner.d.ts.map +1 -1
- package/dist/data/memory/migration-runner.js +59 -3
- package/dist/data/memory/migration-runner.js.map +1 -1
- package/dist/data/memory/schema-ddl.d.ts +1 -1
- package/dist/data/memory/schema-ddl.d.ts.map +1 -1
- package/dist/data/memory/schema-ddl.js +2 -1
- package/dist/data/memory/schema-ddl.js.map +1 -1
- package/dist/data/memory/thread-registry.js +1 -1
- package/dist/data/memory/thread-registry.js.map +1 -1
- package/dist/http-server.d.ts.map +1 -1
- package/dist/http-server.js +1 -9
- package/dist/http-server.js.map +1 -1
- package/dist/index.js +3 -6
- package/dist/index.js.map +1 -1
- package/dist/server/factory.js +1 -1
- package/dist/server/factory.js.map +1 -1
- package/dist/services/agent-spawn.service.d.ts +7 -1
- package/dist/services/agent-spawn.service.d.ts.map +1 -1
- package/dist/services/agent-spawn.service.js +69 -45
- package/dist/services/agent-spawn.service.js.map +1 -1
- package/dist/services/consolidation.service.d.ts.map +1 -1
- package/dist/services/consolidation.service.js +49 -35
- package/dist/services/consolidation.service.js.map +1 -1
- package/dist/services/keeper.service.d.ts +21 -0
- package/dist/services/keeper.service.d.ts.map +1 -0
- package/dist/services/keeper.service.js +195 -0
- package/dist/services/keeper.service.js.map +1 -0
- package/dist/services/maintenance-signal.d.ts +2 -0
- package/dist/services/maintenance-signal.d.ts.map +1 -1
- package/dist/services/maintenance-signal.js +7 -1
- package/dist/services/maintenance-signal.js.map +1 -1
- package/dist/services/process.service.d.ts +19 -2
- package/dist/services/process.service.d.ts.map +1 -1
- package/dist/services/process.service.js +104 -10
- package/dist/services/process.service.js.map +1 -1
- package/dist/services/reconnect-snapshot.service.d.ts.map +1 -1
- package/dist/services/reconnect-snapshot.service.js +20 -3
- package/dist/services/reconnect-snapshot.service.js.map +1 -1
- package/dist/services/thread-lifecycle.service.d.ts +5 -0
- package/dist/services/thread-lifecycle.service.d.ts.map +1 -1
- package/dist/services/thread-lifecycle.service.js +33 -8
- package/dist/services/thread-lifecycle.service.js.map +1 -1
- package/dist/services/worker-cleanup.service.d.ts +14 -1
- package/dist/services/worker-cleanup.service.d.ts.map +1 -1
- package/dist/services/worker-cleanup.service.js +48 -27
- package/dist/services/worker-cleanup.service.js.map +1 -1
- package/dist/sessions.d.ts +0 -5
- package/dist/sessions.d.ts.map +1 -1
- package/dist/sessions.js +0 -7
- package/dist/sessions.js.map +1 -1
- package/dist/stdio-server.d.ts.map +1 -1
- package/dist/stdio-server.js +1 -7
- package/dist/stdio-server.js.map +1 -1
- package/dist/tools/delegate-tool.d.ts.map +1 -1
- package/dist/tools/delegate-tool.js +2 -2
- package/dist/tools/delegate-tool.js.map +1 -1
- package/dist/tools/session-tools.js +1 -1
- package/dist/tools/session-tools.js.map +1 -1
- package/dist/tools/start-session-tool.d.ts.map +1 -1
- package/dist/tools/start-session-tool.js +8 -9
- package/dist/tools/start-session-tool.js.map +1 -1
- package/dist/tools/wait/message-processing.d.ts.map +1 -1
- package/dist/tools/wait/message-processing.js +28 -0
- package/dist/tools/wait/message-processing.js.map +1 -1
- package/dist/tools/wait/poll-loop.js +1 -1
- package/dist/tools/wait/poll-loop.js.map +1 -1
- package/package.json +1 -1
- package/dist/tools/thread-lifecycle.d.ts +0 -6
- package/dist/tools/thread-lifecycle.d.ts.map +0 -1
- package/dist/tools/thread-lifecycle.js +0 -6
- package/dist/tools/thread-lifecycle.js.map +0 -1
- package/supervisor/config.go +0 -253
- package/supervisor/config_test.go +0 -78
- package/supervisor/go.mod +0 -15
- package/supervisor/go.sum +0 -20
- package/supervisor/health.go +0 -433
- package/supervisor/health_test.go +0 -93
- package/supervisor/keeper.go +0 -309
- package/supervisor/keeper_test.go +0 -27
- package/supervisor/lock.go +0 -57
- package/supervisor/lock_test.go +0 -54
- package/supervisor/log.go +0 -195
- package/supervisor/log_test.go +0 -125
- package/supervisor/main.go +0 -461
- package/supervisor/main_test.go +0 -130
- package/supervisor/notify.go +0 -53
- package/supervisor/process.go +0 -294
- package/supervisor/process_test.go +0 -108
- package/supervisor/process_unix.go +0 -14
- package/supervisor/process_windows.go +0 -15
- package/supervisor/secrets.go +0 -95
- package/supervisor/secrets_securevault_test.go +0 -98
- package/supervisor/secrets_test.go +0 -119
- package/supervisor/self_update.go +0 -282
- package/supervisor/self_update_test.go +0 -177
- package/supervisor/service_restart_stub.go +0 -9
- package/supervisor/service_restart_windows.go +0 -63
- package/supervisor/service_stub.go +0 -15
- package/supervisor/service_windows.go +0 -194
- package/supervisor/update_state.go +0 -264
- package/supervisor/update_state_test.go +0 -306
- package/supervisor/updater.go +0 -613
- package/supervisor/updater_test.go +0 -64
|
@@ -1,194 +0,0 @@
|
|
|
1
|
-
//go:build windows
|
|
2
|
-
|
|
3
|
-
package main
|
|
4
|
-
|
|
5
|
-
import (
|
|
6
|
-
"fmt"
|
|
7
|
-
"os"
|
|
8
|
-
"path/filepath"
|
|
9
|
-
"time"
|
|
10
|
-
|
|
11
|
-
"golang.org/x/sys/windows/svc"
|
|
12
|
-
"golang.org/x/sys/windows/svc/mgr"
|
|
13
|
-
)
|
|
14
|
-
|
|
15
|
-
const serviceName = "SensoriumSupervisor"
|
|
16
|
-
const serviceDisplay = "Sensorium Supervisor"
|
|
17
|
-
const serviceDesc = "Keeps the sensorium-mcp server and agent threads running."
|
|
18
|
-
|
|
19
|
-
type supervisorService struct{}
|
|
20
|
-
|
|
21
|
-
func (s *supervisorService) Execute(args []string, r <-chan svc.ChangeRequest, status chan<- svc.Status) (bool, uint32) {
|
|
22
|
-
status <- svc.Status{State: svc.StartPending}
|
|
23
|
-
|
|
24
|
-
done := make(chan error, 1)
|
|
25
|
-
go func() {
|
|
26
|
-
done <- runSupervisor(true)
|
|
27
|
-
}()
|
|
28
|
-
|
|
29
|
-
status <- svc.Status{State: svc.Running, Accepts: svc.AcceptStop | svc.AcceptShutdown}
|
|
30
|
-
|
|
31
|
-
for {
|
|
32
|
-
select {
|
|
33
|
-
case c := <-r:
|
|
34
|
-
switch c.Cmd {
|
|
35
|
-
case svc.Interrogate:
|
|
36
|
-
status <- c.CurrentStatus
|
|
37
|
-
case svc.Stop, svc.Shutdown:
|
|
38
|
-
status <- svc.Status{State: svc.StopPending}
|
|
39
|
-
stopSupervisor()
|
|
40
|
-
select {
|
|
41
|
-
case err := <-done:
|
|
42
|
-
if err != nil {
|
|
43
|
-
fmt.Fprintf(os.Stderr, "Service shutdown with error: %v\n", err)
|
|
44
|
-
}
|
|
45
|
-
case <-time.After(15 * time.Second):
|
|
46
|
-
}
|
|
47
|
-
return false, 0
|
|
48
|
-
}
|
|
49
|
-
case err := <-done:
|
|
50
|
-
if err != nil {
|
|
51
|
-
fmt.Fprintf(os.Stderr, "Service failed: %v\n", err)
|
|
52
|
-
}
|
|
53
|
-
return false, 0
|
|
54
|
-
}
|
|
55
|
-
}
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
func runAsService() error {
|
|
59
|
-
return svc.Run(serviceName, &supervisorService{})
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
func withServiceManager(fn func(*mgr.Mgr) error) error {
|
|
63
|
-
m, err := mgr.Connect()
|
|
64
|
-
if err != nil {
|
|
65
|
-
return err
|
|
66
|
-
}
|
|
67
|
-
defer m.Disconnect()
|
|
68
|
-
return fn(m)
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
func installService(exePath, serviceUser, servicePassword string) error {
|
|
72
|
-
return withServiceManager(func(m *mgr.Mgr) error {
|
|
73
|
-
s, err := m.OpenService(serviceName)
|
|
74
|
-
if err == nil {
|
|
75
|
-
s.Close()
|
|
76
|
-
return fmt.Errorf("install failed: service %q already exists", serviceName)
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
cfg := mgr.Config{
|
|
80
|
-
DisplayName: serviceDisplay,
|
|
81
|
-
Description: serviceDesc,
|
|
82
|
-
StartType: mgr.StartAutomatic,
|
|
83
|
-
DelayedAutoStart: true,
|
|
84
|
-
}
|
|
85
|
-
if serviceUser != "" {
|
|
86
|
-
cfg.ServiceStartName = serviceUser
|
|
87
|
-
cfg.Password = servicePassword
|
|
88
|
-
if servicePassword == "" {
|
|
89
|
-
fmt.Printf("Installing service as passwordless identity %q\n", serviceUser)
|
|
90
|
-
} else {
|
|
91
|
-
fmt.Printf("Installing service as user %q\n", serviceUser)
|
|
92
|
-
}
|
|
93
|
-
} else {
|
|
94
|
-
fmt.Println("Installing service as LocalSystem (default). Use -service-user to run as a specific user account.")
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
s, err = m.CreateService(serviceName, exePath, cfg)
|
|
98
|
-
if err != nil {
|
|
99
|
-
return fmt.Errorf("install failed: create service: %w", err)
|
|
100
|
-
}
|
|
101
|
-
defer s.Close()
|
|
102
|
-
|
|
103
|
-
fmt.Printf("Service %q installed successfully.\n", serviceName)
|
|
104
|
-
fmt.Printf("Start it with: %s start\n", filepath.Base(exePath))
|
|
105
|
-
return nil
|
|
106
|
-
})
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
func uninstallService() error {
|
|
110
|
-
return withServiceManager(func(m *mgr.Mgr) error {
|
|
111
|
-
s, err := m.OpenService(serviceName)
|
|
112
|
-
if err != nil {
|
|
113
|
-
return fmt.Errorf("uninstall failed: service %q not found: %w", serviceName, err)
|
|
114
|
-
}
|
|
115
|
-
defer s.Close()
|
|
116
|
-
|
|
117
|
-
if err := s.Delete(); err != nil {
|
|
118
|
-
return fmt.Errorf("uninstall failed: delete service: %w", err)
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
fmt.Printf("Service %q uninstalled.\n", serviceName)
|
|
122
|
-
return nil
|
|
123
|
-
})
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
func startService() error {
|
|
127
|
-
return withServiceManager(func(m *mgr.Mgr) error {
|
|
128
|
-
s, err := m.OpenService(serviceName)
|
|
129
|
-
if err != nil {
|
|
130
|
-
return fmt.Errorf("start failed: service %q not found: %w", serviceName, err)
|
|
131
|
-
}
|
|
132
|
-
defer s.Close()
|
|
133
|
-
|
|
134
|
-
if err := s.Start(); err != nil {
|
|
135
|
-
return fmt.Errorf("start failed: %w", err)
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
fmt.Printf("Service %q started.\n", serviceName)
|
|
139
|
-
return nil
|
|
140
|
-
})
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
func stopService() error {
|
|
144
|
-
return withServiceManager(func(m *mgr.Mgr) error {
|
|
145
|
-
s, err := m.OpenService(serviceName)
|
|
146
|
-
if err != nil {
|
|
147
|
-
return fmt.Errorf("stop failed: service %q not found: %w", serviceName, err)
|
|
148
|
-
}
|
|
149
|
-
defer s.Close()
|
|
150
|
-
|
|
151
|
-
if _, err := s.Control(svc.Stop); err != nil {
|
|
152
|
-
return fmt.Errorf("stop failed: %w", err)
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
fmt.Printf("Service %q stopping.\n", serviceName)
|
|
156
|
-
return nil
|
|
157
|
-
})
|
|
158
|
-
}
|
|
159
|
-
|
|
160
|
-
func serviceStatus() error {
|
|
161
|
-
return withServiceManager(func(m *mgr.Mgr) error {
|
|
162
|
-
s, err := m.OpenService(serviceName)
|
|
163
|
-
if err != nil {
|
|
164
|
-
return fmt.Errorf("status failed: service %q not found: %w", serviceName, err)
|
|
165
|
-
}
|
|
166
|
-
defer s.Close()
|
|
167
|
-
|
|
168
|
-
st, err := s.Query()
|
|
169
|
-
if err != nil {
|
|
170
|
-
return fmt.Errorf("status failed: query service: %w", err)
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
states := map[svc.State]string{
|
|
174
|
-
svc.Stopped: "Stopped",
|
|
175
|
-
svc.StartPending: "StartPending",
|
|
176
|
-
svc.StopPending: "StopPending",
|
|
177
|
-
svc.Running: "Running",
|
|
178
|
-
svc.ContinuePending: "ContinuePending",
|
|
179
|
-
svc.PausePending: "PausePending",
|
|
180
|
-
svc.Paused: "Paused",
|
|
181
|
-
}
|
|
182
|
-
state, ok := states[st.State]
|
|
183
|
-
if !ok {
|
|
184
|
-
state = fmt.Sprintf("Unknown(%d)", st.State)
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
fmt.Printf("Service %q: %s\n", serviceName, state)
|
|
188
|
-
return nil
|
|
189
|
-
})
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
func isWindowsService() (bool, error) {
|
|
193
|
-
return svc.IsWindowsService()
|
|
194
|
-
}
|
|
@@ -1,264 +0,0 @@
|
|
|
1
|
-
package main
|
|
2
|
-
|
|
3
|
-
import (
|
|
4
|
-
"encoding/json"
|
|
5
|
-
"errors"
|
|
6
|
-
"fmt"
|
|
7
|
-
"os"
|
|
8
|
-
"path/filepath"
|
|
9
|
-
"strings"
|
|
10
|
-
"time"
|
|
11
|
-
)
|
|
12
|
-
|
|
13
|
-
const (
|
|
14
|
-
updateScopeMCP = "mcp"
|
|
15
|
-
updateScopeSupervisor = "supervisor"
|
|
16
|
-
|
|
17
|
-
// A coordinator lock older than this is treated as stale metadata for
|
|
18
|
-
// diagnostics, but age never overrides live-owner safety.
|
|
19
|
-
updateCoordinatorLockMaxAge = 10 * time.Minute
|
|
20
|
-
|
|
21
|
-
updatePhaseIdle = "idle"
|
|
22
|
-
updatePhaseStaged = "staged"
|
|
23
|
-
updatePhaseApplying = "applying"
|
|
24
|
-
updatePhaseRestarting = "restarting"
|
|
25
|
-
updatePhaseVerifying = "verifying"
|
|
26
|
-
updatePhaseRollback = "rollback"
|
|
27
|
-
updatePhaseFailed = "failed"
|
|
28
|
-
)
|
|
29
|
-
|
|
30
|
-
type UpdateState struct {
|
|
31
|
-
Scope string `json:"scope"`
|
|
32
|
-
Phase string `json:"phase"`
|
|
33
|
-
TargetVersion string `json:"targetVersion"`
|
|
34
|
-
PreviousVersion string `json:"previousVersion"`
|
|
35
|
-
UpdatedAt time.Time `json:"updatedAt"`
|
|
36
|
-
LastError string `json:"lastError"`
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
type UpdateStateStore struct {
|
|
40
|
-
path string
|
|
41
|
-
log *Logger
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
func NewUpdateStateStore(path string, log *Logger) *UpdateStateStore {
|
|
45
|
-
return &UpdateStateStore{path: path, log: log}
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
func (s *UpdateStateStore) Load() (UpdateState, error) {
|
|
49
|
-
data, err := os.ReadFile(s.path)
|
|
50
|
-
if err != nil {
|
|
51
|
-
if errors.Is(err, os.ErrNotExist) {
|
|
52
|
-
return UpdateState{Phase: updatePhaseIdle, UpdatedAt: time.Now().UTC()}, nil
|
|
53
|
-
}
|
|
54
|
-
return UpdateState{}, err
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
var state UpdateState
|
|
58
|
-
if err := json.Unmarshal(data, &state); err != nil {
|
|
59
|
-
return UpdateState{}, err
|
|
60
|
-
}
|
|
61
|
-
if state.Phase == "" {
|
|
62
|
-
state.Phase = updatePhaseIdle
|
|
63
|
-
}
|
|
64
|
-
return state, nil
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
func (s *UpdateStateStore) Save(state UpdateState) error {
|
|
68
|
-
if state.UpdatedAt.IsZero() {
|
|
69
|
-
state.UpdatedAt = time.Now().UTC()
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
if err := os.MkdirAll(filepath.Dir(s.path), 0755); err != nil {
|
|
73
|
-
return fmt.Errorf("create update state dir: %w", err)
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
data, err := json.Marshal(state)
|
|
77
|
-
if err != nil {
|
|
78
|
-
return fmt.Errorf("marshal update state: %w", err)
|
|
79
|
-
}
|
|
80
|
-
if err := atomicWrite(s.path, data); err != nil {
|
|
81
|
-
return fmt.Errorf("write update state: %w", err)
|
|
82
|
-
}
|
|
83
|
-
return nil
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
func (s *UpdateStateStore) Transition(scope, phase, targetVersion, previousVersion, lastError string) {
|
|
87
|
-
state := UpdateState{
|
|
88
|
-
Scope: scope,
|
|
89
|
-
Phase: phase,
|
|
90
|
-
TargetVersion: targetVersion,
|
|
91
|
-
PreviousVersion: previousVersion,
|
|
92
|
-
UpdatedAt: time.Now().UTC(),
|
|
93
|
-
LastError: lastError,
|
|
94
|
-
}
|
|
95
|
-
if err := s.Save(state); err != nil {
|
|
96
|
-
s.log.Warn("Failed to persist update state (scope=%s phase=%s): %v", scope, phase, err)
|
|
97
|
-
}
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
type updateLockOwner struct {
|
|
101
|
-
Scope string `json:"scope"`
|
|
102
|
-
PID int `json:"pid"`
|
|
103
|
-
UpdatedAt time.Time `json:"updatedAt"`
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
type UpdateCoordinatorLock struct {
|
|
107
|
-
path string
|
|
108
|
-
scope string
|
|
109
|
-
log *Logger
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
type updateLockFile interface {
|
|
113
|
-
Write([]byte) (int, error)
|
|
114
|
-
Close() error
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
var openUpdateLockFile = func(lockPath string) (updateLockFile, error) {
|
|
118
|
-
return os.OpenFile(lockPath, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0644)
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
func writeUpdateLockMetadata(lockPath string, payload []byte) error {
|
|
122
|
-
f, err := openUpdateLockFile(lockPath)
|
|
123
|
-
if err != nil {
|
|
124
|
-
return err
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
if _, err := f.Write(payload); err != nil {
|
|
128
|
-
closeErr := f.Close()
|
|
129
|
-
_ = os.Remove(lockPath)
|
|
130
|
-
if closeErr != nil {
|
|
131
|
-
return errors.Join(fmt.Errorf("write lock metadata: %w", err), fmt.Errorf("close lock file after write failure: %w", closeErr))
|
|
132
|
-
}
|
|
133
|
-
return fmt.Errorf("write lock metadata: %w", err)
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
if err := f.Close(); err != nil {
|
|
137
|
-
_ = os.Remove(lockPath)
|
|
138
|
-
return fmt.Errorf("close lock metadata file: %w", err)
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
return nil
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
func AcquireUpdateCoordinatorLock(lockPath, scope string, log *Logger) (*UpdateCoordinatorLock, bool) {
|
|
145
|
-
owner := updateLockOwner{Scope: scope, PID: os.Getpid(), UpdatedAt: time.Now().UTC()}
|
|
146
|
-
payload, _ := json.Marshal(owner)
|
|
147
|
-
|
|
148
|
-
err := writeUpdateLockMetadata(lockPath, payload)
|
|
149
|
-
if err == nil {
|
|
150
|
-
log.Debug("Update coordinator lock acquired by %s", scope)
|
|
151
|
-
return &UpdateCoordinatorLock{path: lockPath, scope: scope, log: log}, true
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
if !errors.Is(err, os.ErrExist) {
|
|
155
|
-
log.Warn("Failed to acquire update coordinator lock %s: %v", lockPath, err)
|
|
156
|
-
return nil, false
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
data, readErr := os.ReadFile(lockPath)
|
|
160
|
-
if readErr != nil {
|
|
161
|
-
log.Warn("Update coordinator lock exists but could not be read (%s): %v", lockPath, readErr)
|
|
162
|
-
return nil, false
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
var holder updateLockOwner
|
|
166
|
-
if json.Unmarshal(data, &holder) == nil {
|
|
167
|
-
lockAge := time.Since(holder.UpdatedAt)
|
|
168
|
-
alive := holder.PID > 0 && IsProcessAlive(holder.PID)
|
|
169
|
-
staleByAge := !holder.UpdatedAt.IsZero() && lockAge > updateCoordinatorLockMaxAge
|
|
170
|
-
|
|
171
|
-
if alive {
|
|
172
|
-
holderScope := holder.Scope
|
|
173
|
-
if holderScope == "" {
|
|
174
|
-
holderScope = "unknown"
|
|
175
|
-
}
|
|
176
|
-
ageNote := ""
|
|
177
|
-
if staleByAge {
|
|
178
|
-
ageNote = fmt.Sprintf("; stale metadata age %v exceeds %v", lockAge.Round(time.Second), updateCoordinatorLockMaxAge)
|
|
179
|
-
}
|
|
180
|
-
log.Info("Skipping %s update apply: coordinator lock held by %s (PID %d, age %v%s)", scope, holderScope, holder.PID, lockAge.Round(time.Second), ageNote)
|
|
181
|
-
return nil, false
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
holderScope := holder.Scope
|
|
185
|
-
if holderScope == "" {
|
|
186
|
-
holderScope = "unknown"
|
|
187
|
-
}
|
|
188
|
-
reason := "owner PID not alive"
|
|
189
|
-
if staleByAge {
|
|
190
|
-
reason = fmt.Sprintf("owner PID not alive (metadata age %v exceeds %v)", lockAge.Round(time.Second), updateCoordinatorLockMaxAge)
|
|
191
|
-
}
|
|
192
|
-
log.Warn("Reclaiming update coordinator lock for %s: previous owner=%s pid=%d (%s)", scope, holderScope, holder.PID, reason)
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
_ = os.Remove(lockPath)
|
|
196
|
-
err = writeUpdateLockMetadata(lockPath, payload)
|
|
197
|
-
if err != nil {
|
|
198
|
-
log.Warn("Failed to reclaim update coordinator lock %s: %v", lockPath, err)
|
|
199
|
-
return nil, false
|
|
200
|
-
}
|
|
201
|
-
log.Warn("Reclaimed stale update coordinator lock for %s", scope)
|
|
202
|
-
return &UpdateCoordinatorLock{path: lockPath, scope: scope, log: log}, true
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
func (l *UpdateCoordinatorLock) Release() {
|
|
206
|
-
if l == nil {
|
|
207
|
-
return
|
|
208
|
-
}
|
|
209
|
-
_ = os.Remove(l.path)
|
|
210
|
-
l.log.Debug("Update coordinator lock released by %s", l.scope)
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
func recoverPersistedUpdateStateOnStartup(cfg Config, log *Logger) {
|
|
214
|
-
store := NewUpdateStateStore(cfg.Paths.UpdateState, log)
|
|
215
|
-
state, err := store.Load()
|
|
216
|
-
if err != nil {
|
|
217
|
-
log.Warn("Failed to load persisted update state for startup recovery: %v", err)
|
|
218
|
-
return
|
|
219
|
-
}
|
|
220
|
-
|
|
221
|
-
scope := state.Scope
|
|
222
|
-
if scope == "" {
|
|
223
|
-
scope = updateScopeMCP
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
if scope == updateScopeSupervisor {
|
|
227
|
-
currentVersion := readTrimmedFile(cfg.Paths.SupervisorVersion)
|
|
228
|
-
targetVersion := strings.TrimSpace(state.TargetVersion)
|
|
229
|
-
if targetVersion != "" && currentVersion == targetVersion && state.Phase != updatePhaseIdle {
|
|
230
|
-
log.Info("Startup recovery: supervisor update %s already applied locally; transitioning state to idle", targetVersion)
|
|
231
|
-
store.Transition(updateScopeSupervisor, updatePhaseIdle, targetVersion, state.PreviousVersion, "")
|
|
232
|
-
return
|
|
233
|
-
}
|
|
234
|
-
}
|
|
235
|
-
|
|
236
|
-
if state.Phase == "" || state.Phase == updatePhaseIdle || state.Phase == updatePhaseFailed {
|
|
237
|
-
return
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
if !isRecoverableStartupPhase(state.Phase) {
|
|
241
|
-
return
|
|
242
|
-
}
|
|
243
|
-
|
|
244
|
-
reason := fmt.Sprintf("startup recovery: stale non-idle update state detected (%s/%s)", scope, state.Phase)
|
|
245
|
-
log.Warn("%s", reason)
|
|
246
|
-
store.Transition(scope, updatePhaseFailed, state.TargetVersion, state.PreviousVersion, reason)
|
|
247
|
-
}
|
|
248
|
-
|
|
249
|
-
func isRecoverableStartupPhase(phase string) bool {
|
|
250
|
-
switch phase {
|
|
251
|
-
case updatePhaseApplying, updatePhaseRestarting, updatePhaseVerifying, updatePhaseStaged:
|
|
252
|
-
return true
|
|
253
|
-
default:
|
|
254
|
-
return false
|
|
255
|
-
}
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
func readTrimmedFile(path string) string {
|
|
259
|
-
data, err := os.ReadFile(path)
|
|
260
|
-
if err != nil {
|
|
261
|
-
return ""
|
|
262
|
-
}
|
|
263
|
-
return strings.TrimSpace(string(data))
|
|
264
|
-
}
|