create-ekka-desktop-app 0.3.5 → 0.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -35,6 +35,34 @@ interface DemoState {
|
|
|
35
35
|
error: string | null;
|
|
36
36
|
}
|
|
37
37
|
|
|
38
|
+
// DocGen state persisted across tab switches
|
|
39
|
+
interface DocGenPersistedState {
|
|
40
|
+
runId: string | null;
|
|
41
|
+
folder: string | null;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const DOCGEN_STORAGE_KEY = 'ekka.docgen.state';
|
|
45
|
+
|
|
46
|
+
function loadDocGenState(): DocGenPersistedState {
|
|
47
|
+
try {
|
|
48
|
+
const saved = localStorage.getItem(DOCGEN_STORAGE_KEY);
|
|
49
|
+
if (saved) {
|
|
50
|
+
return JSON.parse(saved) as DocGenPersistedState;
|
|
51
|
+
}
|
|
52
|
+
} catch {
|
|
53
|
+
// Ignore parse errors
|
|
54
|
+
}
|
|
55
|
+
return { runId: null, folder: null };
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function saveDocGenState(state: DocGenPersistedState): void {
|
|
59
|
+
try {
|
|
60
|
+
localStorage.setItem(DOCGEN_STORAGE_KEY, JSON.stringify(state));
|
|
61
|
+
} catch {
|
|
62
|
+
// Ignore storage errors
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
38
66
|
export function DemoApp(): ReactElement {
|
|
39
67
|
const [selectedPage, setSelectedPage] = useState<Page>('path-permissions');
|
|
40
68
|
const [darkMode, setDarkMode] = useState<boolean>(() => {
|
|
@@ -51,6 +79,14 @@ export function DemoApp(): ReactElement {
|
|
|
51
79
|
error: null,
|
|
52
80
|
});
|
|
53
81
|
|
|
82
|
+
// DocGen state - persisted to localStorage
|
|
83
|
+
const [docGenState, setDocGenState] = useState<DocGenPersistedState>(loadDocGenState);
|
|
84
|
+
|
|
85
|
+
const handleDocGenStateChange = (newState: DocGenPersistedState) => {
|
|
86
|
+
setDocGenState(newState);
|
|
87
|
+
saveDocGenState(newState);
|
|
88
|
+
};
|
|
89
|
+
|
|
54
90
|
useEffect(() => {
|
|
55
91
|
void initializeApp();
|
|
56
92
|
}, []);
|
|
@@ -251,7 +287,13 @@ export function DemoApp(): ReactElement {
|
|
|
251
287
|
{state.error && <div style={errorStyle}>{state.error}</div>}
|
|
252
288
|
{selectedPage === 'path-permissions' && <PathPermissionsPage darkMode={darkMode} />}
|
|
253
289
|
{selectedPage === 'vault' && <VaultPage darkMode={darkMode} />}
|
|
254
|
-
{selectedPage === 'doc-gen' &&
|
|
290
|
+
{selectedPage === 'doc-gen' && (
|
|
291
|
+
<DocGenPage
|
|
292
|
+
darkMode={darkMode}
|
|
293
|
+
persistedState={docGenState}
|
|
294
|
+
onStateChange={handleDocGenStateChange}
|
|
295
|
+
/>
|
|
296
|
+
)}
|
|
255
297
|
{selectedPage === 'runner' && <RunnerPage darkMode={darkMode} />}
|
|
256
298
|
{selectedPage === 'audit-log' && <AuditLogPage darkMode={darkMode} />}
|
|
257
299
|
{selectedPage === 'system' && <SystemPage darkMode={darkMode} />}
|
|
@@ -14,8 +14,15 @@ import {
|
|
|
14
14
|
} from '../../ekka/ops/workflowRuns';
|
|
15
15
|
import * as debugOps from '../../ekka/ops/debug';
|
|
16
16
|
|
|
17
|
+
interface DocGenPersistedState {
|
|
18
|
+
runId: string | null;
|
|
19
|
+
folder: string | null;
|
|
20
|
+
}
|
|
21
|
+
|
|
17
22
|
interface DocGenPageProps {
|
|
18
23
|
darkMode: boolean;
|
|
24
|
+
persistedState?: DocGenPersistedState;
|
|
25
|
+
onStateChange?: (state: DocGenPersistedState) => void;
|
|
19
26
|
}
|
|
20
27
|
|
|
21
28
|
type GenerationStatus = 'idle' | 'queued' | 'running' | 'completed' | 'failed';
|
|
@@ -27,16 +34,22 @@ const PROMPT_CONFIG = {
|
|
|
27
34
|
prompt_version: '1',
|
|
28
35
|
} as const;
|
|
29
36
|
|
|
30
|
-
export function DocGenPage({ darkMode }: DocGenPageProps): ReactElement {
|
|
31
|
-
const [selectedFolder, setSelectedFolder] = useState<string | null>(null);
|
|
37
|
+
export function DocGenPage({ darkMode, persistedState, onStateChange }: DocGenPageProps): ReactElement {
|
|
38
|
+
const [selectedFolder, setSelectedFolder] = useState<string | null>(persistedState?.folder ?? null);
|
|
32
39
|
const [status, setStatus] = useState<GenerationStatus>('idle');
|
|
33
|
-
const [workflowRunId, setWorkflowRunId] = useState<string | null>(null);
|
|
40
|
+
const [workflowRunId, setWorkflowRunId] = useState<string | null>(persistedState?.runId ?? null);
|
|
34
41
|
const [workflowRun, setWorkflowRun] = useState<WorkflowRun | null>(null);
|
|
35
42
|
const [error, setError] = useState<string | null>(null);
|
|
36
43
|
const [copySuccess, setCopySuccess] = useState(false);
|
|
37
44
|
const [isDevMode, setIsDevMode] = useState(false);
|
|
38
45
|
const [pathCopySuccess, setPathCopySuccess] = useState(false);
|
|
39
46
|
const pollingRef = useRef<number | null>(null);
|
|
47
|
+
const hasResumedRef = useRef(false);
|
|
48
|
+
|
|
49
|
+
// Notify parent of state changes for persistence
|
|
50
|
+
const updatePersistedState = (runId: string | null, folder: string | null) => {
|
|
51
|
+
onStateChange?.({ runId, folder });
|
|
52
|
+
};
|
|
40
53
|
|
|
41
54
|
const colors = {
|
|
42
55
|
text: darkMode ? '#ffffff' : '#1d1d1f',
|
|
@@ -296,6 +309,45 @@ export function DocGenPage({ darkMode }: DocGenPageProps): ReactElement {
|
|
|
296
309
|
};
|
|
297
310
|
}, []);
|
|
298
311
|
|
|
312
|
+
// Resume from persisted state on mount
|
|
313
|
+
useEffect(() => {
|
|
314
|
+
if (hasResumedRef.current) return;
|
|
315
|
+
if (!persistedState?.runId) return;
|
|
316
|
+
|
|
317
|
+
hasResumedRef.current = true;
|
|
318
|
+
|
|
319
|
+
// Fetch current status of persisted run
|
|
320
|
+
const resumeRun = async () => {
|
|
321
|
+
try {
|
|
322
|
+
const run = await getWorkflowRun(persistedState.runId!);
|
|
323
|
+
setWorkflowRun(run);
|
|
324
|
+
|
|
325
|
+
if (run.status === 'completed') {
|
|
326
|
+
setStatus('completed');
|
|
327
|
+
} else if (run.status === 'failed') {
|
|
328
|
+
setStatus('failed');
|
|
329
|
+
setError(run.error || 'Workflow failed');
|
|
330
|
+
} else if (run.status === 'running' || run.progress > 0) {
|
|
331
|
+
setStatus('running');
|
|
332
|
+
// Resume polling
|
|
333
|
+
startPolling(persistedState.runId!);
|
|
334
|
+
} else {
|
|
335
|
+
setStatus('queued');
|
|
336
|
+
// Resume polling
|
|
337
|
+
startPolling(persistedState.runId!);
|
|
338
|
+
}
|
|
339
|
+
} catch (err) {
|
|
340
|
+
// Run may have been deleted or expired - clear persisted state
|
|
341
|
+
console.warn('[DocGen] Failed to resume run:', err);
|
|
342
|
+
updatePersistedState(null, selectedFolder);
|
|
343
|
+
setWorkflowRunId(null);
|
|
344
|
+
setStatus('idle');
|
|
345
|
+
}
|
|
346
|
+
};
|
|
347
|
+
|
|
348
|
+
void resumeRun();
|
|
349
|
+
}, [persistedState?.runId]);
|
|
350
|
+
|
|
299
351
|
// Handle folder selection
|
|
300
352
|
const handleSelectFolder = async () => {
|
|
301
353
|
setError(null);
|
|
@@ -308,6 +360,8 @@ export function DocGenPage({ darkMode }: DocGenPageProps): ReactElement {
|
|
|
308
360
|
setStatus('idle');
|
|
309
361
|
setWorkflowRunId(null);
|
|
310
362
|
setWorkflowRun(null);
|
|
363
|
+
// Persist folder, clear run
|
|
364
|
+
updatePersistedState(null, selected);
|
|
311
365
|
}
|
|
312
366
|
} catch (err) {
|
|
313
367
|
const message = err instanceof Error ? err.message : String(err);
|
|
@@ -342,6 +396,8 @@ export function DocGenPage({ darkMode }: DocGenPageProps): ReactElement {
|
|
|
342
396
|
});
|
|
343
397
|
|
|
344
398
|
setWorkflowRunId(response.id);
|
|
399
|
+
// Persist run ID for tab switch recovery
|
|
400
|
+
updatePersistedState(response.id, selectedFolder);
|
|
345
401
|
|
|
346
402
|
// Start polling
|
|
347
403
|
startPolling(response.id);
|
|
@@ -763,8 +763,7 @@ fn handle_bootstrap_node_session(payload: &Value, state: &EngineState) -> Engine
|
|
|
763
763
|
Err(e) => return EngineResponse::err("INTERNAL_ERROR", &e.to_string()),
|
|
764
764
|
};
|
|
765
765
|
|
|
766
|
-
//
|
|
767
|
-
// Do NOT fall back to user auth or Ed25519 flow
|
|
766
|
+
// Get node auth token - try auto-auth if not available
|
|
768
767
|
let node_token = match state.get_node_auth_token() {
|
|
769
768
|
Some(token) => {
|
|
770
769
|
tracing::info!(
|
|
@@ -776,14 +775,55 @@ fn handle_bootstrap_node_session(payload: &Value, state: &EngineState) -> Engine
|
|
|
776
775
|
token
|
|
777
776
|
}
|
|
778
777
|
None => {
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
778
|
+
// Token missing - try auto-auth from vault (single-flight)
|
|
779
|
+
// Check prerequisites BEFORE acquiring lock
|
|
780
|
+
if !node_credentials::has_credentials() {
|
|
781
|
+
tracing::error!(
|
|
782
|
+
op = "node_session.no_credentials",
|
|
783
|
+
"Node credentials not configured"
|
|
784
|
+
);
|
|
785
|
+
return EngineResponse::err(
|
|
786
|
+
"NODE_CREDENTIALS_MISSING",
|
|
787
|
+
"Node credentials not configured. Complete setup first.",
|
|
788
|
+
);
|
|
789
|
+
}
|
|
790
|
+
|
|
791
|
+
// Get engine URL from baked config (same source as everywhere else)
|
|
792
|
+
let engine_url = config::engine_url();
|
|
793
|
+
|
|
794
|
+
// Now acquire single-flight lock (after all prerequisite checks)
|
|
795
|
+
if !state.node_auth_state.try_start() {
|
|
796
|
+
return EngineResponse::err("NODE_AUTH_IN_PROGRESS", "Authentication in progress, please wait");
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
// From here, ALL paths must call set_authenticated() or set_failed()
|
|
800
|
+
tracing::info!(
|
|
801
|
+
op = "node_session.auto_auth",
|
|
802
|
+
"Auto-authenticating node after setup"
|
|
786
803
|
);
|
|
804
|
+
|
|
805
|
+
match node_credentials::authenticate_node(engine_url) {
|
|
806
|
+
Ok(token) => {
|
|
807
|
+
state.node_auth_token.set(token.clone());
|
|
808
|
+
state.node_auth_state.set_authenticated();
|
|
809
|
+
tracing::info!(
|
|
810
|
+
op = "node_session.auto_auth_success",
|
|
811
|
+
node_id = %token.node_id,
|
|
812
|
+
"Node auto-authenticated successfully"
|
|
813
|
+
);
|
|
814
|
+
token
|
|
815
|
+
}
|
|
816
|
+
Err(e) => {
|
|
817
|
+
let error_msg = format!("Node authentication failed: {}", e);
|
|
818
|
+
tracing::error!(
|
|
819
|
+
op = "node_session.auto_auth_failed",
|
|
820
|
+
error = %e,
|
|
821
|
+
"Node auto-authentication failed"
|
|
822
|
+
);
|
|
823
|
+
state.node_auth_state.set_failed(error_msg.clone());
|
|
824
|
+
return EngineResponse::err("NODE_NOT_AUTHENTICATED", &error_msg);
|
|
825
|
+
}
|
|
826
|
+
}
|
|
787
827
|
}
|
|
788
828
|
};
|
|
789
829
|
|
|
@@ -1,26 +1,26 @@
|
|
|
1
1
|
//! Desktop Node Session Runner
|
|
2
2
|
//!
|
|
3
|
-
//! Runner loop
|
|
3
|
+
//! Runner loop using node_id + node_secret authentication (NOT Ed25519).
|
|
4
4
|
//!
|
|
5
5
|
//! ## Architecture
|
|
6
6
|
//!
|
|
7
|
-
//! -
|
|
8
|
-
//! - Runner uses
|
|
9
|
-
//! -
|
|
10
|
-
//! - Tenant/workspace comes from
|
|
7
|
+
//! - Uses node_credentials (vault) for authentication
|
|
8
|
+
//! - Runner uses JWT token for all engine calls
|
|
9
|
+
//! - Token refreshed automatically via node_secret auth when expired
|
|
10
|
+
//! - Tenant/workspace comes from token (EKKA decides scope)
|
|
11
11
|
//!
|
|
12
12
|
//! ## Security
|
|
13
13
|
//!
|
|
14
|
-
//! - NO
|
|
14
|
+
//! - NO Ed25519 keys required
|
|
15
15
|
//! - NO environment variable credentials
|
|
16
|
-
//! -
|
|
16
|
+
//! - Tokens held in memory only
|
|
17
|
+
//! - node_secret never logged
|
|
17
18
|
|
|
18
19
|
#![allow(dead_code)] // API types and fields may not all be used yet
|
|
19
20
|
|
|
20
21
|
use crate::config;
|
|
21
|
-
use crate::node_auth::{
|
|
22
|
-
|
|
23
|
-
};
|
|
22
|
+
use crate::node_auth::{NodeSession, NodeSessionHolder, NodeSessionRunnerConfig};
|
|
23
|
+
use crate::node_credentials::authenticate_node;
|
|
24
24
|
use crate::state::RunnerState;
|
|
25
25
|
// Use ekka_runner_local for enhanced executor with debug bundle support
|
|
26
26
|
use ekka_runner_local::dispatch::{classify_error, dispatch_task};
|
|
@@ -189,10 +189,8 @@ impl NodeSessionRunner {
|
|
|
189
189
|
|
|
190
190
|
/// Get current valid session, refreshing if needed
|
|
191
191
|
///
|
|
192
|
+
/// Uses node_id + node_secret authentication (NOT Ed25519 keys).
|
|
192
193
|
/// IMPORTANT: Uses spawn_blocking to avoid Tokio runtime panic.
|
|
193
|
-
/// The refresh_node_session function uses reqwest::blocking::Client internally,
|
|
194
|
-
/// which creates its own runtime. Calling it directly in async context causes:
|
|
195
|
-
/// "Cannot drop a runtime in a context where blocking is not allowed"
|
|
196
194
|
async fn get_session(&self) -> Result<NodeSession, String> {
|
|
197
195
|
// Check if we have a valid session
|
|
198
196
|
if let Some(session) = self.session_holder.get_valid() {
|
|
@@ -200,29 +198,44 @@ impl NodeSessionRunner {
|
|
|
200
198
|
}
|
|
201
199
|
|
|
202
200
|
// Need to refresh - use spawn_blocking to avoid runtime panic
|
|
203
|
-
info!(
|
|
201
|
+
info!(
|
|
202
|
+
op = "node_runner.refresh_session.start",
|
|
203
|
+
method = "node_secret",
|
|
204
|
+
"Refreshing node session via node_secret auth"
|
|
205
|
+
);
|
|
204
206
|
|
|
205
|
-
let home_path = self.home_path.clone();
|
|
206
|
-
let node_id = self.node_id;
|
|
207
207
|
let engine_url = self.engine_url.clone();
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
refresh_node_session(
|
|
212
|
-
&home_path,
|
|
213
|
-
&node_id,
|
|
214
|
-
&engine_url,
|
|
215
|
-
device_fingerprint.as_deref(),
|
|
216
|
-
)
|
|
208
|
+
|
|
209
|
+
let auth_token = tokio::task::spawn_blocking(move || {
|
|
210
|
+
authenticate_node(&engine_url)
|
|
217
211
|
})
|
|
218
212
|
.await
|
|
219
213
|
.map_err(|e| format!("Session refresh task failed: {}", e))?
|
|
220
214
|
.map_err(|e| {
|
|
221
|
-
error!(
|
|
215
|
+
error!(
|
|
216
|
+
op = "node_runner.refresh_session.failed",
|
|
217
|
+
method = "node_secret",
|
|
218
|
+
error = %e,
|
|
219
|
+
"Session refresh via node_secret failed"
|
|
220
|
+
);
|
|
222
221
|
format!("Session refresh failed: {}", e)
|
|
223
222
|
})?;
|
|
224
223
|
|
|
225
|
-
|
|
224
|
+
// Convert NodeAuthToken to NodeSession
|
|
225
|
+
let session = NodeSession {
|
|
226
|
+
token: auth_token.token,
|
|
227
|
+
session_id: auth_token.session_id,
|
|
228
|
+
tenant_id: auth_token.tenant_id,
|
|
229
|
+
workspace_id: auth_token.workspace_id,
|
|
230
|
+
expires_at: auth_token.expires_at,
|
|
231
|
+
};
|
|
232
|
+
|
|
233
|
+
info!(
|
|
234
|
+
op = "node_runner.refresh_session.ok",
|
|
235
|
+
method = "node_secret",
|
|
236
|
+
session_id = %session.session_id,
|
|
237
|
+
"Session refreshed successfully via node_secret"
|
|
238
|
+
);
|
|
226
239
|
self.session_holder.set(session.clone());
|
|
227
240
|
Ok(session)
|
|
228
241
|
}
|
|
@@ -526,9 +539,7 @@ impl NodeSessionRunner {
|
|
|
526
539
|
engine_url: self.engine_url.clone(),
|
|
527
540
|
runner_id: self.runner_id.clone(),
|
|
528
541
|
session_holder: self.session_holder.clone(),
|
|
529
|
-
home_path: self.home_path.clone(),
|
|
530
542
|
node_id: self.node_id,
|
|
531
|
-
device_fingerprint: self.device_fingerprint.clone(),
|
|
532
543
|
};
|
|
533
544
|
|
|
534
545
|
let heartbeat_fn: Arc<
|
|
@@ -643,9 +654,7 @@ struct NodeSessionRunnerHeartbeat {
|
|
|
643
654
|
engine_url: String,
|
|
644
655
|
runner_id: String,
|
|
645
656
|
session_holder: Arc<NodeSessionHolder>,
|
|
646
|
-
|
|
647
|
-
node_id: Uuid,
|
|
648
|
-
device_fingerprint: Option<String>,
|
|
657
|
+
node_id: Uuid, // Kept for headers only
|
|
649
658
|
}
|
|
650
659
|
|
|
651
660
|
impl NodeSessionRunnerHeartbeat {
|
|
@@ -654,31 +663,45 @@ impl NodeSessionRunnerHeartbeat {
|
|
|
654
663
|
let session = if let Some(s) = self.session_holder.get_valid() {
|
|
655
664
|
s
|
|
656
665
|
} else {
|
|
657
|
-
// Try to refresh
|
|
658
|
-
|
|
659
|
-
|
|
666
|
+
// Try to refresh using node_secret auth (NOT Ed25519)
|
|
667
|
+
info!(
|
|
668
|
+
op = "node_runner.heartbeat.refresh_session.start",
|
|
669
|
+
method = "node_secret",
|
|
670
|
+
"Refreshing session for heartbeat via node_secret"
|
|
671
|
+
);
|
|
660
672
|
|
|
661
|
-
let home_path = self.home_path.clone();
|
|
662
|
-
let node_id = self.node_id;
|
|
663
673
|
let engine_url = self.engine_url.clone();
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
refresh_node_session(
|
|
668
|
-
&home_path,
|
|
669
|
-
&node_id,
|
|
670
|
-
&engine_url,
|
|
671
|
-
device_fingerprint.as_deref(),
|
|
672
|
-
)
|
|
674
|
+
|
|
675
|
+
let auth_token = tokio::task::spawn_blocking(move || {
|
|
676
|
+
authenticate_node(&engine_url)
|
|
673
677
|
})
|
|
674
678
|
.await
|
|
675
679
|
.map_err(|e| format!("Session refresh task failed: {}", e))?
|
|
676
680
|
.map_err(|e| {
|
|
677
|
-
error!(
|
|
681
|
+
error!(
|
|
682
|
+
op = "node_runner.heartbeat.refresh_session.failed",
|
|
683
|
+
method = "node_secret",
|
|
684
|
+
error = %e,
|
|
685
|
+
"Session refresh for heartbeat failed"
|
|
686
|
+
);
|
|
678
687
|
format!("Session refresh failed: {}", e)
|
|
679
688
|
})?;
|
|
680
689
|
|
|
681
|
-
|
|
690
|
+
// Convert NodeAuthToken to NodeSession
|
|
691
|
+
let session = NodeSession {
|
|
692
|
+
token: auth_token.token,
|
|
693
|
+
session_id: auth_token.session_id,
|
|
694
|
+
tenant_id: auth_token.tenant_id,
|
|
695
|
+
workspace_id: auth_token.workspace_id,
|
|
696
|
+
expires_at: auth_token.expires_at,
|
|
697
|
+
};
|
|
698
|
+
|
|
699
|
+
info!(
|
|
700
|
+
op = "node_runner.heartbeat.refresh_session.ok",
|
|
701
|
+
method = "node_secret",
|
|
702
|
+
session_id = %session.session_id,
|
|
703
|
+
"Session refreshed for heartbeat via node_secret"
|
|
704
|
+
);
|
|
682
705
|
self.session_holder.set(session.clone());
|
|
683
706
|
session
|
|
684
707
|
};
|
|
@@ -742,10 +765,15 @@ impl NodeSessionRunnerHeartbeat {
|
|
|
742
765
|
// Public API
|
|
743
766
|
// =============================================================================
|
|
744
767
|
|
|
768
|
+
/// Max consecutive errors before entering backoff mode
|
|
769
|
+
const MAX_CONSECUTIVE_ERRORS: u32 = 3;
|
|
770
|
+
/// Max backoff delay in seconds
|
|
771
|
+
const MAX_BACKOFF_SECS: u64 = 60;
|
|
772
|
+
|
|
745
773
|
/// Start the node session runner loop
|
|
746
774
|
///
|
|
747
|
-
///
|
|
748
|
-
///
|
|
775
|
+
/// Uses node_id + node_secret auth for session refresh (NOT Ed25519).
|
|
776
|
+
/// Includes backoff on repeated failures to prevent poll spam.
|
|
749
777
|
pub async fn run_node_session_runner_loop(
|
|
750
778
|
config: NodeSessionRunnerConfig,
|
|
751
779
|
session_holder: Arc<NodeSessionHolder>,
|
|
@@ -764,9 +792,12 @@ pub async fn run_node_session_runner_loop(
|
|
|
764
792
|
op = "node_runner.start",
|
|
765
793
|
runner_id = %runner.runner_id,
|
|
766
794
|
node_id = %runner.node_id,
|
|
767
|
-
|
|
795
|
+
auth_method = "node_secret",
|
|
796
|
+
"Node session runner starting (uses node_secret auth)"
|
|
768
797
|
);
|
|
769
798
|
|
|
799
|
+
let mut consecutive_errors: u32 = 0;
|
|
800
|
+
|
|
770
801
|
loop {
|
|
771
802
|
// Check for shutdown signal
|
|
772
803
|
if *shutdown_rx.borrow() {
|
|
@@ -777,6 +808,8 @@ pub async fn run_node_session_runner_loop(
|
|
|
777
808
|
|
|
778
809
|
match runner.poll_tasks().await {
|
|
779
810
|
Ok(tasks) => {
|
|
811
|
+
// Reset error count on success
|
|
812
|
+
consecutive_errors = 0;
|
|
780
813
|
cb.on_poll();
|
|
781
814
|
|
|
782
815
|
if tasks.is_empty() {
|
|
@@ -811,9 +844,29 @@ pub async fn run_node_session_runner_loop(
|
|
|
811
844
|
}
|
|
812
845
|
}
|
|
813
846
|
Err(e) => {
|
|
814
|
-
|
|
847
|
+
consecutive_errors += 1;
|
|
848
|
+
|
|
849
|
+
// Calculate backoff: exponential up to MAX_BACKOFF_SECS
|
|
850
|
+
let backoff_secs = if consecutive_errors >= MAX_CONSECUTIVE_ERRORS {
|
|
851
|
+
std::cmp::min(
|
|
852
|
+
POLL_INTERVAL_SECS * (1 << (consecutive_errors - MAX_CONSECUTIVE_ERRORS)),
|
|
853
|
+
MAX_BACKOFF_SECS,
|
|
854
|
+
)
|
|
855
|
+
} else {
|
|
856
|
+
POLL_INTERVAL_SECS
|
|
857
|
+
};
|
|
858
|
+
|
|
859
|
+
error!(
|
|
860
|
+
op = "node_runner.poll.error",
|
|
861
|
+
error = %e,
|
|
862
|
+
consecutive_errors = consecutive_errors,
|
|
863
|
+
backoff_secs = backoff_secs,
|
|
864
|
+
"Poll failed"
|
|
865
|
+
);
|
|
815
866
|
cb.on_error(&e);
|
|
816
|
-
|
|
867
|
+
|
|
868
|
+
// Wait with backoff
|
|
869
|
+
tokio::time::sleep(Duration::from_secs(backoff_secs)).await;
|
|
817
870
|
}
|
|
818
871
|
}
|
|
819
872
|
|