embark-ai 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +351 -0
- package/botSupervisor.js +237 -0
- package/mc-server/bot/bot.js +1415 -0
- package/mc-server/bot/damagePipeline.js +402 -0
- package/mc-server/bot/engine.js +212 -0
- package/mc-server/bot/entityLiveness.js +121 -0
- package/mc-server/bot/env.js +38 -0
- package/mc-server/bot/environmentPerception.js +384 -0
- package/mc-server/bot/fatalDesyncRecovery.js +42 -0
- package/mc-server/bot/goalRegistry.js +49 -0
- package/mc-server/bot/healthIntegrityWatchdog.js +59 -0
- package/mc-server/bot/llm.js +232 -0
- package/mc-server/bot/locomotionRecovery.js +190 -0
- package/mc-server/bot/logger.js +63 -0
- package/mc-server/bot/memory.js +59 -0
- package/mc-server/bot/movementController.js +110 -0
- package/mc-server/bot/package.json +14 -0
- package/mc-server/bot/positionGuard.js +75 -0
- package/mc-server/bot/recoveryEngine.js +315 -0
- package/mc-server/bot/safeMineflayer.js +129 -0
- package/mc-server/bot/state.js +105 -0
- package/mc-server/bot/tasks.js +939 -0
- package/mc-server/server.properties +74 -0
- package/package.json +44 -0
- package/tui.js +1099 -0
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
// movementController.js — Single owner of bot.pathfinder.
|
|
2
|
+
// All pathfinder writes route through this module; every transition is logged.
|
|
3
|
+
// Priority model: CRITICAL(4) > HIGH(3) > NORMAL(2) > LOW(1) > PASSIVE(0)
|
|
4
|
+
|
|
5
|
+
const NAVIGATE_TIMEOUT_MS = 15000
|
|
6
|
+
|
|
7
|
+
const PRIORITY = Object.freeze({ CRITICAL: 4, HIGH: 3, NORMAL: 2, LOW: 1, PASSIVE: 0 })
|
|
8
|
+
|
|
9
|
+
module.exports = function createMovementController(bot, goals, makeMovements, log) {
|
|
10
|
+
let owner = null // { source, priority, type, startedAt }
|
|
11
|
+
let savedThinkingTimeout = null
|
|
12
|
+
|
|
13
|
+
function _acquire(source, priority, type) {
|
|
14
|
+
if (owner && priority < owner.priority) {
|
|
15
|
+
log.warn('movement_blocked', { source, priority, blockedBy: owner.source, blockedPriority: owner.priority })
|
|
16
|
+
return false
|
|
17
|
+
}
|
|
18
|
+
if (owner) {
|
|
19
|
+
log.debug('movement_preempted', { by: source, was: owner.source, heldMs: Date.now() - owner.startedAt })
|
|
20
|
+
}
|
|
21
|
+
owner = { source, priority, type, startedAt: Date.now() }
|
|
22
|
+
log.debug('movement_acquired', { source, priority, type })
|
|
23
|
+
return true
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function release(source) {
|
|
27
|
+
if (!owner || owner.source !== source) return
|
|
28
|
+
log.debug('movement_released', { source, heldMs: Date.now() - owner.startedAt })
|
|
29
|
+
owner = null
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// stop: stop pathfinder and release ownership if caller is current owner.
|
|
33
|
+
function stop(source) {
|
|
34
|
+
try { bot.pathfinder.stop() } catch {}
|
|
35
|
+
release(source)
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// forceStop: stop pathfinder and clear ownership regardless of who holds it.
|
|
39
|
+
// Use when a higher-level interrupt (cancel, error, evade) supersedes movement.
|
|
40
|
+
function forceStop(source) {
|
|
41
|
+
try { bot.pathfinder.stop() } catch {}
|
|
42
|
+
if (owner) log.debug('movement_force_stopped', { by: source, was: owner.source, heldMs: Date.now() - owner.startedAt })
|
|
43
|
+
owner = null
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// navigate: async goto — acquires ownership, drives pathfinder.goto, releases on settle.
|
|
47
|
+
// Rejects if blocked by a higher-priority owner or if pathfinding times out.
|
|
48
|
+
async function navigate(x, y, z, range = 3, priority = PRIORITY.NORMAL, source = 'task') {
|
|
49
|
+
if (!_acquire(source, priority, 'navigate')) {
|
|
50
|
+
throw new Error(`movement_blocked: ${source} (p${priority}) blocked by ${owner?.source} (p${owner?.priority})`)
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
bot.pathfinder.setMovements(makeMovements())
|
|
54
|
+
let timer
|
|
55
|
+
const pathPromise = bot.pathfinder.goto(new goals.GoalNear(x, y, z, range))
|
|
56
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
57
|
+
timer = setTimeout(() => {
|
|
58
|
+
try { bot.pathfinder.stop() } catch {}
|
|
59
|
+
reject(new Error(`pathfinding timeout to (${x},${y},${z})`))
|
|
60
|
+
}, NAVIGATE_TIMEOUT_MS)
|
|
61
|
+
})
|
|
62
|
+
|
|
63
|
+
try {
|
|
64
|
+
await Promise.race([pathPromise, timeoutPromise])
|
|
65
|
+
} finally {
|
|
66
|
+
clearTimeout(timer)
|
|
67
|
+
release(source)
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// follow: fire-and-forget dynamic goal — acquires ownership and sets GoalFollow.
|
|
72
|
+
// Ownership persists until stop/forceStop/another follow call (no async release).
|
|
73
|
+
function follow(entity, range = 2, priority = PRIORITY.LOW, source = 'follow') {
|
|
74
|
+
if (!_acquire(source, priority, 'follow')) return false
|
|
75
|
+
try {
|
|
76
|
+
bot.pathfinder.setMovements(makeMovements())
|
|
77
|
+
bot.pathfinder.setGoal(new goals.GoalFollow(entity, range), true)
|
|
78
|
+
} catch (e) {
|
|
79
|
+
log.error('movement_follow_error', { source, message: e.message })
|
|
80
|
+
release(source)
|
|
81
|
+
return false
|
|
82
|
+
}
|
|
83
|
+
return true
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// setThinkingTimeout / restoreThinkingTimeout: let callers temporarily tighten
|
|
87
|
+
// pathfinder planning budget (explore = 2s, escape = 5s). Saves original on first call.
|
|
88
|
+
function setThinkingTimeout(ms) {
|
|
89
|
+
if (savedThinkingTimeout === null) {
|
|
90
|
+
savedThinkingTimeout = bot.pathfinder.thinkingTimeout ?? 1500
|
|
91
|
+
}
|
|
92
|
+
try { bot.pathfinder.thinkingTimeout = ms } catch {}
|
|
93
|
+
log.debug('movement_thinking_timeout_set', { ms })
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
function restoreThinkingTimeout() {
|
|
97
|
+
if (savedThinkingTimeout === null) return
|
|
98
|
+
try { bot.pathfinder.thinkingTimeout = savedThinkingTimeout } catch {}
|
|
99
|
+
log.debug('movement_thinking_timeout_restored', { ms: savedThinkingTimeout })
|
|
100
|
+
savedThinkingTimeout = null
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
function isActive() {
|
|
104
|
+
return !!(bot.pathfinder?.isMoving() || bot.pathfinder?.goal != null)
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
function getOwner() { return owner ? { ...owner } : null }
|
|
108
|
+
|
|
109
|
+
return { PRIORITY, navigate, follow, stop, forceStop, release, setThinkingTimeout, restoreThinkingTimeout, isActive, getOwner }
|
|
110
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "mc-agent-bot",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Minecraft autonomous agent bot",
|
|
5
|
+
"main": "bot.js",
|
|
6
|
+
"scripts": {
|
|
7
|
+
"start": "node bot.js"
|
|
8
|
+
},
|
|
9
|
+
"dependencies": {
|
|
10
|
+
"mineflayer": "^4.37.0",
|
|
11
|
+
"mineflayer-pathfinder": "^2.4.5",
|
|
12
|
+
"mineflayer-pvp": "^1.3.2"
|
|
13
|
+
}
|
|
14
|
+
}
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
// positionGuard.js — Repair bot.entity.position in place when a hit corrupts it to NaN.
|
|
2
|
+
//
|
|
3
|
+
// A single knockback can make prismarine-physics integrate a bad velocity into a
|
|
4
|
+
// NaN/null position (a long-standing Mineflayer quirk). The server's view of the bot
|
|
5
|
+
// is fine, but the *client* is then stuck with a corrupt position for several seconds:
|
|
6
|
+
// the bot can't pathfind or react, so it stands still until either a server position
|
|
7
|
+
// packet arrives or fatalDesyncRecovery reconnects (~6s of "freeze when you hit her").
|
|
8
|
+
//
|
|
9
|
+
// This guard runs every physics tick (~50ms): if the live position has any non-finite
|
|
10
|
+
// component, it overwrites it with the last cached valid position (from entityLiveness)
|
|
11
|
+
// and zeroes any non-finite velocity component. Physics then continues from there with
|
|
12
|
+
// finite values; the next server position packet corrects any small drift. Net effect:
|
|
13
|
+
// the ~6s freeze becomes a ~50ms invisible blip.
|
|
14
|
+
//
|
|
15
|
+
// Because the restore makes entityLiveness see a valid position again, isDesynced()
|
|
16
|
+
// never fires and fatalDesyncRecovery never reconnects in the common case. As a backstop
|
|
17
|
+
// for the rare scenario where the corruption keeps reappearing despite restores, after
|
|
18
|
+
// RECONNECT_AFTER_TICKS of continuous restoring we ask the recovery arbiter to reconnect.
|
|
19
|
+
|
|
20
|
+
const RECONNECT_AFTER_TICKS = 100 // ~5s of nonstop restoring ⇒ something deeper is wrong
|
|
21
|
+
|
|
22
|
+
function finite(n) { return typeof n === 'number' && Number.isFinite(n) }
|
|
23
|
+
|
|
24
|
+
module.exports = function createPositionGuard(bot, liveness, log, recoveryEngine) {
|
|
25
|
+
let restoreCount = 0
|
|
26
|
+
let consecutiveBad = 0
|
|
27
|
+
let reconnectRequested = false
|
|
28
|
+
|
|
29
|
+
bot.on('physicsTick', () => {
|
|
30
|
+
const e = bot.entity
|
|
31
|
+
if (!e) return
|
|
32
|
+
const p = e.position
|
|
33
|
+
const v = e.velocity
|
|
34
|
+
|
|
35
|
+
const pBad = !p || !finite(p.x) || !finite(p.y) || !finite(p.z)
|
|
36
|
+
const vBad = !!v && (!finite(v.x) || !finite(v.y) || !finite(v.z))
|
|
37
|
+
|
|
38
|
+
if (!pBad && !vBad) {
|
|
39
|
+
consecutiveBad = 0
|
|
40
|
+
reconnectRequested = false
|
|
41
|
+
return
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
consecutiveBad++
|
|
45
|
+
|
|
46
|
+
if (pBad) {
|
|
47
|
+
const cached = liveness.getCachedPos()
|
|
48
|
+
if (!cached) return // nothing valid recorded yet (pre-spawn) — leave it; desync path will handle
|
|
49
|
+
if (!finite(p.x)) p.x = cached.x
|
|
50
|
+
if (!finite(p.y)) p.y = cached.y
|
|
51
|
+
if (!finite(p.z)) p.z = cached.z
|
|
52
|
+
}
|
|
53
|
+
if (vBad && v) {
|
|
54
|
+
if (!finite(v.x)) v.x = 0
|
|
55
|
+
if (!finite(v.y)) v.y = 0
|
|
56
|
+
if (!finite(v.z)) v.z = 0
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
restoreCount++
|
|
60
|
+
if (restoreCount === 1 || restoreCount % 20 === 0) {
|
|
61
|
+
log.warn('position_guard_restored', {
|
|
62
|
+
restoreCount, consecutiveBad, pBad, vBad,
|
|
63
|
+
restoredTo: pBad ? liveness.getCachedPos() : null,
|
|
64
|
+
})
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
if (consecutiveBad > RECONNECT_AFTER_TICKS && !reconnectRequested) {
|
|
68
|
+
reconnectRequested = true
|
|
69
|
+
log.error('position_guard_giving_up', { consecutiveBad, restoreCount })
|
|
70
|
+
try { recoveryEngine?.report('DESYNC', { source: 'position_guard_unrecoverable', consecutiveBad }) } catch {}
|
|
71
|
+
}
|
|
72
|
+
})
|
|
73
|
+
|
|
74
|
+
return { getRestoreCount: () => restoreCount }
|
|
75
|
+
}
|
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
// recoveryEngine.js — The single recovery arbiter.
|
|
2
|
+
//
|
|
3
|
+
// Recovery used to be multi-headed (diagnosis#2 §2.1): freeze-forensics,
|
|
4
|
+
// fatalDesyncRecovery, the health-integrity watchdog, the task watchdog, the
|
|
5
|
+
// movement-timeout streak, and the activity watchdog all independently detected
|
|
6
|
+
// overlapping symptoms AND independently acted — and they raced (the log shows two
|
|
7
|
+
// of them firing on the same incident seconds apart). Now there is one arbiter:
|
|
8
|
+
// watchdogs DETECT and call report(symptom, context); this module DECIDES and ACTS.
|
|
9
|
+
// Nothing else in the codebase calls bot.quit() — reconnection is owned here.
|
|
10
|
+
//
|
|
11
|
+
// SYMPTOMS (what watchdogs observe) → CLASSES (escalation ladders):
|
|
12
|
+
// DESYNC → DESYNC position structurally invalid; reconnect only
|
|
13
|
+
// STUCK → STUCK physically wedged; jiggle / blind-survival / escape
|
|
14
|
+
// MOVEMENT_TIMEOUT_STREAK → MOVEMENT pathfinder timeout streak
|
|
15
|
+
// TASK_HUNG → TASK task watchdog tripped
|
|
16
|
+
// ENTITY_ORPHAN → ENTITY orphaned task state
|
|
17
|
+
// COMBAT_STALL → COMBAT combat-specific recovery
|
|
18
|
+
// CRITICAL_HP → CRITICAL_HP HP draining, damage pipeline blind, position VALID
|
|
19
|
+
// IDLE → IDLE activity watchdog
|
|
20
|
+
//
|
|
21
|
+
// PRIORITY (highest first): DESYNC > STUCK > MOVEMENT_TIMEOUT_STREAK > TASK_HUNG >
|
|
22
|
+
// ENTITY_ORPHAN > COMBAT_STALL > CRITICAL_HP > IDLE. If a higher-priority class is
|
|
23
|
+
// mid-recovery (acted within its own cooldown), lower-priority reports are
|
|
24
|
+
// suppressed so the watchdogs can't pile on during an in-flight recovery.
|
|
25
|
+
//
|
|
26
|
+
// STUCK vs DESYNC (diagnosis#2 §1.3): being physically wedged in terrain and being
|
|
27
|
+
// server-state desynced are different failure modes with different remedies.
|
|
28
|
+
// STUCK — live position is valid but not progressing: jiggle / blind-survival / escape.
|
|
29
|
+
// DESYNC — live position is structurally invalid (NaN/null component): NO local maneuver
|
|
30
|
+
// can help; the only action at any level is to reconnect.
|
|
31
|
+
//
|
|
32
|
+
// Per-class failure counts decay after DECAY_MS of silence so transient issues don't
|
|
33
|
+
// permanently poison the escalation level.
|
|
34
|
+
|
|
35
|
+
const CLASSES = Object.freeze({
|
|
36
|
+
TASK: 'TASK',
|
|
37
|
+
STUCK: 'STUCK',
|
|
38
|
+
DESYNC: 'DESYNC',
|
|
39
|
+
MOVEMENT: 'MOVEMENT',
|
|
40
|
+
CRITICAL_HP: 'CRITICAL_HP',
|
|
41
|
+
IDLE: 'IDLE',
|
|
42
|
+
ENTITY: 'ENTITY',
|
|
43
|
+
COMBAT: 'COMBAT',
|
|
44
|
+
})
|
|
45
|
+
|
|
46
|
+
const SYMPTOMS = Object.freeze({
|
|
47
|
+
DESYNC: 'DESYNC',
|
|
48
|
+
STUCK: 'STUCK',
|
|
49
|
+
MOVEMENT_TIMEOUT_STREAK: 'MOVEMENT_TIMEOUT_STREAK',
|
|
50
|
+
TASK_HUNG: 'TASK_HUNG',
|
|
51
|
+
ENTITY_ORPHAN: 'ENTITY_ORPHAN',
|
|
52
|
+
COMBAT_STALL: 'COMBAT_STALL',
|
|
53
|
+
CRITICAL_HP: 'CRITICAL_HP',
|
|
54
|
+
IDLE: 'IDLE',
|
|
55
|
+
})
|
|
56
|
+
|
|
57
|
+
const SYMPTOM_TO_CLASS = {
|
|
58
|
+
DESYNC: CLASSES.DESYNC,
|
|
59
|
+
STUCK: CLASSES.STUCK,
|
|
60
|
+
MOVEMENT_TIMEOUT_STREAK: CLASSES.MOVEMENT,
|
|
61
|
+
TASK_HUNG: CLASSES.TASK,
|
|
62
|
+
ENTITY_ORPHAN: CLASSES.ENTITY,
|
|
63
|
+
COMBAT_STALL: CLASSES.COMBAT,
|
|
64
|
+
CRITICAL_HP: CLASSES.CRITICAL_HP,
|
|
65
|
+
IDLE: CLASSES.IDLE,
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Highest priority first.
|
|
69
|
+
const PRIORITY = [
|
|
70
|
+
'DESYNC',
|
|
71
|
+
'STUCK',
|
|
72
|
+
'MOVEMENT_TIMEOUT_STREAK',
|
|
73
|
+
'TASK_HUNG',
|
|
74
|
+
'ENTITY_ORPHAN',
|
|
75
|
+
'COMBAT_STALL',
|
|
76
|
+
'CRITICAL_HP',
|
|
77
|
+
'IDLE',
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
const COOLDOWN_MS = {
|
|
81
|
+
TASK: 8_000,
|
|
82
|
+
STUCK: 5_000,
|
|
83
|
+
DESYNC: 3_000,
|
|
84
|
+
MOVEMENT: 10_000,
|
|
85
|
+
CRITICAL_HP: 8_000, // matches healthIntegrityWatchdog's TRIGGER_COOLDOWN_MS
|
|
86
|
+
IDLE: 120_000,
|
|
87
|
+
ENTITY: 10_000,
|
|
88
|
+
COMBAT: 5_000,
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
const DECAY_MS = {
|
|
92
|
+
TASK: 120_000,
|
|
93
|
+
STUCK: 60_000,
|
|
94
|
+
DESYNC: 60_000,
|
|
95
|
+
MOVEMENT: 120_000,
|
|
96
|
+
CRITICAL_HP: 60_000,
|
|
97
|
+
IDLE: 600_000,
|
|
98
|
+
ENTITY: 60_000,
|
|
99
|
+
COMBAT: 30_000,
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
const MAX_LEVEL = {
|
|
103
|
+
TASK: 4, STUCK: 3, DESYNC: 1, MOVEMENT: 3, CRITICAL_HP: 2, IDLE: 2, ENTITY: 2, COMBAT: 2,
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
module.exports = function createRecoveryEngine(deps) {
|
|
107
|
+
const { bot, movement, state, log } = deps
|
|
108
|
+
const { getTaskContext, cancelTask, replaceTask, runTask } = deps
|
|
109
|
+
const { taskBlindSurvival, taskEscape, taskExplore } = deps
|
|
110
|
+
const { writeExitReason } = deps
|
|
111
|
+
|
|
112
|
+
// Per-class state: failures, current escalation level, last attempt timestamp
|
|
113
|
+
const cs = {}
|
|
114
|
+
for (const cls of Object.values(CLASSES)) {
|
|
115
|
+
cs[cls] = { failures: 0, level: 0, lastAttemptAt: 0 }
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// Once we've committed to a reconnect the client is going away — ignore all
|
|
119
|
+
// further reports so nothing tries to act on a disconnecting bot.
|
|
120
|
+
let quitPending = false
|
|
121
|
+
|
|
122
|
+
// ── Public entrypoint ───────────────────────────────────────────────────────
|
|
123
|
+
// Watchdogs call this and nothing else.
|
|
124
|
+
function report(symptom, context = {}) {
|
|
125
|
+
if (quitPending) return
|
|
126
|
+
|
|
127
|
+
const cls = SYMPTOM_TO_CLASS[symptom]
|
|
128
|
+
if (!cls) {
|
|
129
|
+
log.warn('recovery_unknown_symptom', { symptom, ...context })
|
|
130
|
+
return
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// Cross-class priority suppression: if a higher-priority class acted within its
|
|
134
|
+
// own cooldown, it owns the situation right now — drop this lower-priority report.
|
|
135
|
+
const now = Date.now()
|
|
136
|
+
const myIdx = PRIORITY.indexOf(symptom)
|
|
137
|
+
for (let i = 0; i < myIdx; i++) {
|
|
138
|
+
const higherCls = SYMPTOM_TO_CLASS[PRIORITY[i]]
|
|
139
|
+
const hs = cs[higherCls]
|
|
140
|
+
if (hs.failures > 0 && (now - hs.lastAttemptAt) < COOLDOWN_MS[higherCls]) {
|
|
141
|
+
log.debug('recovery_suppressed', { symptom, class: cls, suppressedBy: higherCls, ...context })
|
|
142
|
+
return
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
_recover(cls, { symptom, ...context })
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// ── Internal escalation ladder ──────────────────────────────────────────────
|
|
150
|
+
// Same-symptom dedup falls out of the per-class cooldown below.
|
|
151
|
+
function _recover(cls, context = {}) {
|
|
152
|
+
const s = cs[cls]
|
|
153
|
+
const now = Date.now()
|
|
154
|
+
const elapsed = now - s.lastAttemptAt
|
|
155
|
+
|
|
156
|
+
if (elapsed < COOLDOWN_MS[cls]) {
|
|
157
|
+
log.debug('recovery_cooldown', { class: cls, remainingMs: COOLDOWN_MS[cls] - elapsed, ...context })
|
|
158
|
+
return
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// Decay: long silence between failures resets escalation
|
|
162
|
+
if (s.failures > 0 && elapsed > DECAY_MS[cls]) {
|
|
163
|
+
log.debug('recovery_decay_reset', { class: cls, hadLevel: s.level, hadFailures: s.failures })
|
|
164
|
+
s.failures = 0
|
|
165
|
+
s.level = 0
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
s.failures++
|
|
169
|
+
s.lastAttemptAt = now
|
|
170
|
+
|
|
171
|
+
const prevLevel = s.level
|
|
172
|
+
s.level = Math.min(s.failures, MAX_LEVEL[cls])
|
|
173
|
+
|
|
174
|
+
log.warn('recovery_attempt', {
|
|
175
|
+
class: cls,
|
|
176
|
+
level: s.level,
|
|
177
|
+
failures: s.failures,
|
|
178
|
+
...(context.symptom && { symptom: context.symptom }),
|
|
179
|
+
...(context.source && { source: context.source }),
|
|
180
|
+
...(context.goalName && { goalName: context.goalName }),
|
|
181
|
+
...(context.reason && { reason: context.reason }),
|
|
182
|
+
})
|
|
183
|
+
|
|
184
|
+
if (s.level > prevLevel && prevLevel > 0) {
|
|
185
|
+
log.warn('recovery_escalated', { class: cls, from: prevLevel, to: s.level, ...context })
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
_execute(cls, s.level, context)
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
function reset(cls) {
|
|
192
|
+
const s = cs[cls]
|
|
193
|
+
if (!s) return
|
|
194
|
+
if (s.failures > 0) {
|
|
195
|
+
log.debug('recovery_class_reset', { class: cls, failures: s.failures, level: s.level })
|
|
196
|
+
}
|
|
197
|
+
s.failures = 0
|
|
198
|
+
s.level = 0
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
function _execute(cls, level, context) {
|
|
202
|
+
switch (cls) {
|
|
203
|
+
case CLASSES.TASK:
|
|
204
|
+
if (level <= 2) {
|
|
205
|
+
cancelTask()
|
|
206
|
+
} else if (level === 3) {
|
|
207
|
+
cancelTask()
|
|
208
|
+
movement.forceStop('recovery')
|
|
209
|
+
setTimeout(() => {
|
|
210
|
+
if (!getTaskContext().taskBusy && state.goal === 'idle') {
|
|
211
|
+
runTask('escaping', taskEscape)
|
|
212
|
+
}
|
|
213
|
+
}, 300)
|
|
214
|
+
} else {
|
|
215
|
+
_quitSafely(cls, level, context)
|
|
216
|
+
}
|
|
217
|
+
break
|
|
218
|
+
|
|
219
|
+
case CLASSES.STUCK:
|
|
220
|
+
if (level === 1) {
|
|
221
|
+
cancelTask()
|
|
222
|
+
} else if (level === 2) {
|
|
223
|
+
cancelTask()
|
|
224
|
+
replaceTask('blind_survival', taskBlindSurvival, { silent: true })
|
|
225
|
+
} else {
|
|
226
|
+
_quitSafely(cls, level, context)
|
|
227
|
+
}
|
|
228
|
+
break
|
|
229
|
+
|
|
230
|
+
case CLASSES.DESYNC:
|
|
231
|
+
// Position is structurally invalid — no control-state maneuver can fix it.
|
|
232
|
+
// Reconnect immediately, at any level.
|
|
233
|
+
_quitSafely(cls, level, context)
|
|
234
|
+
break
|
|
235
|
+
|
|
236
|
+
case CLASSES.MOVEMENT:
|
|
237
|
+
if (level === 1) {
|
|
238
|
+
movement.forceStop('recovery_movement')
|
|
239
|
+
setTimeout(() => {
|
|
240
|
+
if (!getTaskContext().taskBusy && state.goal === 'idle') {
|
|
241
|
+
runTask('escaping', taskEscape)
|
|
242
|
+
}
|
|
243
|
+
}, 300)
|
|
244
|
+
} else if (level === 2) {
|
|
245
|
+
cancelTask()
|
|
246
|
+
movement.forceStop('recovery_movement')
|
|
247
|
+
setTimeout(() => {
|
|
248
|
+
if (!getTaskContext().taskBusy && state.goal === 'idle') {
|
|
249
|
+
runTask('escaping', taskEscape)
|
|
250
|
+
}
|
|
251
|
+
}, 300)
|
|
252
|
+
} else {
|
|
253
|
+
_quitSafely(cls, level, context)
|
|
254
|
+
}
|
|
255
|
+
break
|
|
256
|
+
|
|
257
|
+
case CLASSES.CRITICAL_HP:
|
|
258
|
+
// HP draining and the normal damage pipeline can't react (NaN window) BUT the
|
|
259
|
+
// position is valid — so raw-control-state survival can still help. Sprint away
|
|
260
|
+
// at L1; escalate to a full escape task at L2.
|
|
261
|
+
if (level === 1) {
|
|
262
|
+
replaceTask('blind_survival', taskBlindSurvival, { silent: true })
|
|
263
|
+
} else {
|
|
264
|
+
cancelTask()
|
|
265
|
+
movement.forceStop('recovery_critical_hp')
|
|
266
|
+
setTimeout(() => {
|
|
267
|
+
if (!getTaskContext().taskBusy && state.goal === 'idle') {
|
|
268
|
+
runTask('escaping', taskEscape)
|
|
269
|
+
}
|
|
270
|
+
}, 300)
|
|
271
|
+
}
|
|
272
|
+
break
|
|
273
|
+
|
|
274
|
+
case CLASSES.IDLE:
|
|
275
|
+
if (level === 1) {
|
|
276
|
+
state.idleTicks = 999 // trigger autonomous decision on next state tick
|
|
277
|
+
} else {
|
|
278
|
+
if (!getTaskContext().taskBusy) runTask('exploring', taskExplore)
|
|
279
|
+
}
|
|
280
|
+
break
|
|
281
|
+
|
|
282
|
+
case CLASSES.ENTITY:
|
|
283
|
+
cancelTask()
|
|
284
|
+
if (level > 1) movement.forceStop('recovery_entity')
|
|
285
|
+
break
|
|
286
|
+
|
|
287
|
+
case CLASSES.COMBAT:
|
|
288
|
+
cancelTask()
|
|
289
|
+
break
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
// The single owner of bot.quit(). Writes the exit reason up front (so botSupervisor
|
|
294
|
+
// applies the right backoff even if bot.on('end') doesn't run cleanly), announces
|
|
295
|
+
// in-game, then quits after a short defer.
|
|
296
|
+
function _quitSafely(cls, level, context) {
|
|
297
|
+
if (quitPending) return
|
|
298
|
+
quitPending = true
|
|
299
|
+
const isDesync = cls === CLASSES.DESYNC
|
|
300
|
+
log.error('recovery_reconnect', { class: cls, level, ...context })
|
|
301
|
+
try { if (writeExitReason) writeExitReason(isDesync ? 'entity_desync' : 'stuck_reconnect') } catch {}
|
|
302
|
+
try { bot.chat(isDesync ? 'Entity desync — reconnecting.' : 'Stuck — reconnecting.') } catch {}
|
|
303
|
+
setTimeout(() => {
|
|
304
|
+
try { bot.quit() } catch { process.exit(0) }
|
|
305
|
+
}, 500)
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
function getState() {
|
|
309
|
+
return Object.fromEntries(
|
|
310
|
+
Object.entries(cs).map(([cls, s]) => [cls, { ...s }])
|
|
311
|
+
)
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
return { CLASSES, SYMPTOMS, report, reset, getState }
|
|
315
|
+
}
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
// safeMineflayer.js — Bounded wrappers for Mineflayer async API calls.
|
|
2
|
+
//
|
|
3
|
+
// Every Mineflayer API call that returns a Promise can hang indefinitely
|
|
4
|
+
// if the server stops responding mid-operation (confirmed: bot.dig took 38s
|
|
5
|
+
// on a sand block — events.jsonl 06:48:34–06:49:14). These wrappers enforce
|
|
6
|
+
// per-call timeouts via Promise.race, mirroring navNear (bot.js:914-955).
|
|
7
|
+
//
|
|
8
|
+
// On timeout: rejects with a clear Error and emits a structured log event
|
|
9
|
+
// so events.jsonl shows exactly which call stalled and for how long.
|
|
10
|
+
//
|
|
11
|
+
// Cleanup (pathfinder.stop, clearControlStates) is NOT done here — the
|
|
12
|
+
// calling task's existing catch/finally handlers already own that path.
|
|
13
|
+
|
|
14
|
+
const log = require('./logger')
|
|
15
|
+
|
|
16
|
+
function withTimeout(promise, timeoutMs, eventName, meta = {}) {
|
|
17
|
+
let timer
|
|
18
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
19
|
+
timer = setTimeout(() => {
|
|
20
|
+
log.warn(eventName, { timeoutMs, ...meta })
|
|
21
|
+
reject(new Error(`${eventName}: timed out after ${timeoutMs}ms`))
|
|
22
|
+
}, timeoutMs)
|
|
23
|
+
})
|
|
24
|
+
return Promise.race([promise, timeoutPromise]).finally(() => clearTimeout(timer))
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function safeDig(bot, block, timeoutMs = 8000) {
|
|
28
|
+
return withTimeout(bot.dig(block), timeoutMs, 'safe_dig_timeout', {
|
|
29
|
+
block: block?.name,
|
|
30
|
+
pos: block?.position
|
|
31
|
+
? { x: block.position.x, y: block.position.y, z: block.position.z }
|
|
32
|
+
: null,
|
|
33
|
+
})
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function safeCraft(bot, recipe, count, table, timeoutMs = 5000) {
|
|
37
|
+
return withTimeout(bot.craft(recipe, count, table), timeoutMs, 'safe_craft_timeout', {
|
|
38
|
+
count,
|
|
39
|
+
hasTable: !!table,
|
|
40
|
+
})
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function safeEquip(bot, item, destination, timeoutMs = 3000) {
|
|
44
|
+
return withTimeout(bot.equip(item, destination), timeoutMs, 'safe_equip_timeout', {
|
|
45
|
+
item: item?.name,
|
|
46
|
+
destination,
|
|
47
|
+
})
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function safeConsume(bot, timeoutMs = 5000) {
|
|
51
|
+
return withTimeout(bot.consume(), timeoutMs, 'safe_consume_timeout', {})
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function safePlaceBlock(bot, referenceBlock, faceVector, timeoutMs = 5000) {
|
|
55
|
+
return withTimeout(bot.placeBlock(referenceBlock, faceVector), timeoutMs, 'safe_place_timeout', {
|
|
56
|
+
ref: referenceBlock?.name,
|
|
57
|
+
pos: referenceBlock?.position
|
|
58
|
+
? { x: referenceBlock.position.x, y: referenceBlock.position.y, z: referenceBlock.position.z }
|
|
59
|
+
: null,
|
|
60
|
+
})
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// safeAttack — bounded bot.pvp.attack() wrapper.
|
|
64
|
+
//
|
|
65
|
+
// bot.pvp.attack() is fire-and-forget: the only escape is waiting for
|
|
66
|
+
// 'stoppedAttacking', which never fires if the target dies/desyncs/leaves
|
|
67
|
+
// range without the pvp plugin noticing. Without a bound, the calling task
|
|
68
|
+
// hangs for the full 30 s timeout (diagnosis#2 §1.4).
|
|
69
|
+
//
|
|
70
|
+
// Resolution order (whichever comes first):
|
|
71
|
+
// 1. bot event 'stoppedAttacking'
|
|
72
|
+
// 2. maxMs wall-clock timeout
|
|
73
|
+
// 3. ~500 ms poll: target invalid / position gone / out of engage range / HP ≤ 0
|
|
74
|
+
//
|
|
75
|
+
// On early-out (timeout or poll): bot.pvp.stop() is called before resolving
|
|
76
|
+
// so the pvp plugin doesn't keep trying to attack a gone target.
|
|
77
|
+
//
|
|
78
|
+
// Always resolves (never rejects) — attack tasks should not crash on a
|
|
79
|
+
// target disappearing mid-fight; the task checks target.isValid after return.
|
|
80
|
+
const SAFE_ATTACK_POLL_MS = 500
|
|
81
|
+
const SAFE_ATTACK_ENGAGE_R = 6 // blocks — reasonable melee + bow range
|
|
82
|
+
|
|
83
|
+
function safeAttack(bot, target, { maxMs = 10_000, label = 'target' } = {}) {
|
|
84
|
+
return new Promise(resolve => {
|
|
85
|
+
let done = false
|
|
86
|
+
|
|
87
|
+
function finish(reason) {
|
|
88
|
+
if (done) return
|
|
89
|
+
done = true
|
|
90
|
+
clearInterval(pollTimer)
|
|
91
|
+
clearTimeout(wallTimer)
|
|
92
|
+
bot.removeListener('stoppedAttacking', onStopped)
|
|
93
|
+
|
|
94
|
+
const timedOut = reason !== 'stopped'
|
|
95
|
+
if (timedOut) {
|
|
96
|
+
log.warn('safe_attack_timeout', { label, reason, maxMs })
|
|
97
|
+
try { bot.pvp.stop() } catch {}
|
|
98
|
+
} else {
|
|
99
|
+
log.debug('safe_attack_done', { label, reason })
|
|
100
|
+
}
|
|
101
|
+
resolve()
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function onStopped() { finish('stopped') }
|
|
105
|
+
|
|
106
|
+
function poll() {
|
|
107
|
+
if (!target || !target.isValid) return finish('target_invalid')
|
|
108
|
+
if (!target.position) return finish('target_no_position')
|
|
109
|
+
if ((target.health ?? 1) <= 0) return finish('target_dead')
|
|
110
|
+
const botPos = bot.entity?.position
|
|
111
|
+
if (botPos && target.position.distanceTo(botPos) > SAFE_ATTACK_ENGAGE_R * 3) {
|
|
112
|
+
return finish('target_out_of_range')
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
bot.on('stoppedAttacking', onStopped)
|
|
117
|
+
const pollTimer = setInterval(poll, SAFE_ATTACK_POLL_MS)
|
|
118
|
+
const wallTimer = setTimeout(() => finish('timeout'), maxMs)
|
|
119
|
+
|
|
120
|
+
try {
|
|
121
|
+
bot.pvp.attack(target)
|
|
122
|
+
} catch (err) {
|
|
123
|
+
log.warn('safe_attack_start_error', { label, message: err.message })
|
|
124
|
+
finish('start_error')
|
|
125
|
+
}
|
|
126
|
+
})
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
module.exports = { safeDig, safeCraft, safeEquip, safeConsume, safePlaceBlock, safeAttack }
|