@geekbeer/minion 4.4.0 → 4.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,48 @@
1
+ /**
2
+ * Add `chat_runs` — the durability backbone for detached chat execution.
3
+ *
4
+ * A "run" is one Claude invocation owned by the chat-run-manager, NOT by the
5
+ * HTTP request that started it. The SSE connection is a mere subscriber: when
6
+ * it drops, the run keeps going. This table lets a reconnecting client (new
7
+ * tab, refresh, dropped network) discover the in-flight run for a workspace and
8
+ * resume tailing its event log from the last seen sequence number.
9
+ *
10
+ * Columns:
11
+ * - run_id: manager-minted UUID (also the NDJSON event-log filename)
12
+ * - session_id: resolved Claude CLI session id (null until reported)
13
+ * - pending_session_id: local pending id used to persist the user message
14
+ * before the CLI session id is known
15
+ * - workspace_id: scope (null = minion-wide bucket)
16
+ * - status: running | done | error | aborted | interrupted
17
+ * - last_seq: highest event sequence appended (reconnect cursor hint)
18
+ *
19
+ * On boot, any row still marked `running` is stale (its in-process owner died
20
+ * with the previous server) and is swept to `interrupted` by the store.
21
+ */
22
+
23
+ module.exports = {
24
+ version: 20260607000000,
25
+ name: 'chat_runs',
26
+
27
+ up(db, { tableExists }) {
28
+ if (tableExists(db, 'chat_runs')) return
29
+
30
+ db.exec(`
31
+ CREATE TABLE chat_runs (
32
+ run_id TEXT PRIMARY KEY,
33
+ session_id TEXT DEFAULT NULL,
34
+ pending_session_id TEXT DEFAULT NULL,
35
+ workspace_id TEXT DEFAULT NULL,
36
+ status TEXT NOT NULL DEFAULT 'running',
37
+ started_at INTEGER NOT NULL,
38
+ ended_at INTEGER DEFAULT NULL,
39
+ last_seq INTEGER NOT NULL DEFAULT 0
40
+ );
41
+
42
+ CREATE INDEX IF NOT EXISTS idx_chat_runs_workspace
43
+ ON chat_runs(workspace_id, started_at DESC);
44
+ CREATE INDEX IF NOT EXISTS idx_chat_runs_status
45
+ ON chat_runs(status);
46
+ `)
47
+ },
48
+ }
@@ -0,0 +1,22 @@
1
+ /**
2
+ * page_recipes.ready_selector — SPA-aware wait support (v4.7.0).
3
+ *
4
+ * The cold path now asks the LLM for a `ready_selector`: a CSS selector for an
5
+ * element that only exists once the page's primary content has rendered. On hot
6
+ * replays the extractor waits for it before reading the DOM, so client-rendered
7
+ * (SPA) pages are captured after hydration instead of as an empty shell.
8
+ *
9
+ * Nullable: recipes learned before this column existed simply fall back to
10
+ * DOM-settle detection until they are regenerated.
11
+ */
12
+
13
+ module.exports = {
14
+ version: 20260607120000,
15
+ name: 'page_recipes_ready_selector',
16
+
17
+ up(db, { tableExists, hasColumn }) {
18
+ if (!tableExists(db, 'page_recipes')) return
19
+ if (hasColumn(db, 'page_recipes', 'ready_selector')) return
20
+ db.exec('ALTER TABLE page_recipes ADD COLUMN ready_selector TEXT;')
21
+ },
22
+ }
@@ -0,0 +1,406 @@
1
+ /**
2
+ * Chat Run Manager — detached execution backbone for chat.
3
+ *
4
+ * THE PROBLEM THIS SOLVES
5
+ * -----------------------
6
+ * Historically a chat message spawned the LLM as a child of the HTTP request
7
+ * handler and streamed its stdout straight into the SSE response. A
8
+ * `res.on('close')` handler then killed that child. So ANY break in the
9
+ * browser → HQ → minion connection chain (tab close, navigation, reverse-proxy
10
+ * idle timeout, network blip) silently SIGTERM'd the work mid-task.
11
+ *
12
+ * THE MODEL
13
+ * ---------
14
+ * A "run" is one LLM invocation owned by THIS manager, not by any HTTP request.
15
+ * The manager spawns the LLM (via a caller-supplied `invoke` executor),
16
+ * records every wire event into an append-only NDJSON log + an in-memory
17
+ * buffer, and notifies subscribers. The SSE endpoints are mere SUBSCRIBERS:
18
+ * when a connection drops they unsubscribe — the run keeps going. A
19
+ * reconnecting client (new tab / refresh / dropped network) finds the in-flight
20
+ * run via `getActiveRunId()` and resumes tailing from the last sequence number
21
+ * it saw. Only an explicit `/api/chat/abort` kills the process.
22
+ *
23
+ * SEPARATION OF CONCERNS
24
+ * ----------------------
25
+ * The manager is LLM-agnostic. The route supplies an `invoke(emit, activeRef)`
26
+ * executor that does the actual plugin.stream() / legacy spawn and:
27
+ * - calls emit(wireEvent) for each delta / text / tool / result event
28
+ * - sets activeRef.current = <child process> so abort can reach it
29
+ * - returns { fullResponse, resolvedSessionId, turnCount } (or throws)
30
+ * The manager owns everything durable: run id, event log, pub/sub, session
31
+ * persistence/rekey, the terminal `done`/`error` event, abort, and eviction.
32
+ *
33
+ * DURABILITY vs MEMORY
34
+ * --------------------
35
+ * While a run is live (and during a post-completion TTL) its events live in
36
+ * memory and memory is authoritative for replay — the common reconnect (refresh
37
+ * during an active run) never touches disk. The NDJSON log is the fallback for
38
+ * reconnects that arrive after the run was evicted from memory. Phase 1 does NOT
39
+ * survive a minion process restart (the in-process owner dies with it); on boot
40
+ * any run still flagged `running` is swept to `interrupted` so clients stop
41
+ * waiting. (A future phase could relaunch under tmux like board tasks.)
42
+ */
43
+
44
+ const crypto = require('crypto')
45
+ const fs = require('fs')
46
+ const path = require('path')
47
+
48
+ const { DATA_DIR } = require('./platform')
49
+ const { config } = require('../config')
50
+ const chatStore = require('../stores/chat-store')
51
+
52
+ // Keep a finished run inspectable for late reconnects before freeing its memory.
53
+ const RUN_TTL_MS = 10 * 60 * 1000
54
+ // Terminal event types that end a subscriber's stream.
55
+ const TERMINAL_TYPES = new Set(['done', 'error', 'aborted'])
56
+ // Prune NDJSON logs / run rows older than this on boot.
57
+ const LOG_MAX_AGE_MS = 24 * 60 * 60 * 1000
58
+
59
+ /** @type {Map<string, RunState>} */
60
+ const registry = new Map()
61
+
62
+ /**
63
+ * @typedef {Object} RunState
64
+ * @property {string} runId
65
+ * @property {string|null} sessionId resume target (null for new sessions)
66
+ * @property {string|null} pendingSessionId local id the user message was stored under
67
+ * @property {string|null} workspaceId
68
+ * @property {string} status running | done | error | aborted
69
+ * @property {Array<object>} events in-memory mirror of logged events (seq-stamped)
70
+ * @property {number} seq highest sequence assigned
71
+ * @property {Set<function>} subscribers live listeners
72
+ * @property {{ current: any }} activeRef handle to the child process for abort
73
+ * @property {string} logPath
74
+ * @property {import('fs').WriteStream|null} logStream
75
+ * @property {boolean} aborting
76
+ */
77
+
78
+ function resolveRunsDir() {
79
+ // Mirror the db module's writability fallback (DATA_DIR may be read-only in
80
+ // some deployments; HOME_DIR is always writable).
81
+ let base = DATA_DIR
82
+ try {
83
+ fs.accessSync(DATA_DIR, fs.constants.W_OK)
84
+ } catch {
85
+ base = path.join(config.HOME_DIR, '.minion')
86
+ }
87
+ return path.join(base, 'chat-runs')
88
+ }
89
+
90
+ const RUNS_DIR = resolveRunsDir()
91
+
92
+ function logPathFor(runId) {
93
+ return path.join(RUNS_DIR, `${runId}.ndjson`)
94
+ }
95
+
96
+ /**
97
+ * One-time boot sweep: stale `running` rows belong to a dead previous process,
98
+ * and old logs/rows are pruned. Safe to call more than once.
99
+ */
100
+ function init() {
101
+ try {
102
+ fs.mkdirSync(RUNS_DIR, { recursive: true })
103
+ } catch (err) {
104
+ console.error(`[ChatRun] failed to create runs dir: ${err.message}`)
105
+ }
106
+ try {
107
+ const swept = chatStore.markRunningInterrupted()
108
+ if (swept) console.log(`[ChatRun] swept ${swept} interrupted run(s) from previous boot`)
109
+ } catch (err) {
110
+ console.error(`[ChatRun] interrupted sweep failed: ${err.message}`)
111
+ }
112
+ try {
113
+ chatStore.pruneRuns(LOG_MAX_AGE_MS)
114
+ } catch (err) {
115
+ console.error(`[ChatRun] run prune failed: ${err.message}`)
116
+ }
117
+ pruneOldLogs()
118
+ }
119
+
120
+ function pruneOldLogs() {
121
+ let files
122
+ try {
123
+ files = fs.readdirSync(RUNS_DIR)
124
+ } catch {
125
+ return
126
+ }
127
+ const cutoff = Date.now() - LOG_MAX_AGE_MS
128
+ for (const f of files) {
129
+ if (!f.endsWith('.ndjson')) continue
130
+ const full = path.join(RUNS_DIR, f)
131
+ try {
132
+ if (fs.statSync(full).mtimeMs < cutoff) fs.unlinkSync(full)
133
+ } catch { /* ignore */ }
134
+ }
135
+ }
136
+
137
+ /**
138
+ * Start a detached run. Returns the run id immediately; the LLM keeps streaming
139
+ * in the background regardless of who (if anyone) is subscribed.
140
+ *
141
+ * @param {Object} params
142
+ * @param {string|null} params.sessionId
143
+ * @param {string|null} params.pendingSessionId
144
+ * @param {string|null} params.workspaceId
145
+ * @param {(emit: (event: object) => void, activeRef: { current: any }) => Promise<{ fullResponse: string, resolvedSessionId: string|null, turnCount: number }>} params.invoke
146
+ * @returns {string} runId
147
+ */
148
+ function start({ sessionId = null, pendingSessionId = null, workspaceId = null, invoke }) {
149
+ const runId = crypto.randomUUID()
150
+ /** @type {RunState} */
151
+ const run = {
152
+ runId,
153
+ sessionId,
154
+ pendingSessionId,
155
+ workspaceId,
156
+ status: 'running',
157
+ events: [],
158
+ seq: 0,
159
+ subscribers: new Set(),
160
+ activeRef: { current: null },
161
+ logPath: logPathFor(runId),
162
+ logStream: null,
163
+ aborting: false,
164
+ }
165
+ registry.set(runId, run)
166
+
167
+ try {
168
+ run.logStream = fs.createWriteStream(run.logPath, { flags: 'w' })
169
+ run.logStream.on('error', err => console.error(`[ChatRun] log write error (${runId}): ${err.message}`))
170
+ } catch (err) {
171
+ console.error(`[ChatRun] failed to open log for ${runId}: ${err.message}`)
172
+ }
173
+
174
+ try {
175
+ chatStore.createRun({ runId, sessionId, pendingSessionId, workspaceId })
176
+ } catch (err) {
177
+ console.error(`[ChatRun] createRun failed: ${err.message}`)
178
+ }
179
+
180
+ const emit = event => appendEvent(run, event)
181
+
182
+ // Fire-and-forget. The executor owns the child process; we own durability.
183
+ Promise.resolve()
184
+ .then(() => invoke(emit, run.activeRef))
185
+ .then(result => finalize(run, result, null))
186
+ .catch(err => finalize(run, null, err))
187
+
188
+ console.log(`[ChatRun] started run ${runId} (ws=${workspaceId || 'none'}, resume=${sessionId || 'new'})`)
189
+ return runId
190
+ }
191
+
192
+ /**
193
+ * Append a wire event: stamp a sequence number, persist, mirror in memory,
194
+ * notify subscribers. Single-threaded JS guarantees subscribers attached
195
+ * between events never miss one (no await between push and notify).
196
+ */
197
+ function appendEvent(run, event) {
198
+ run.seq += 1
199
+ const stamped = { seq: run.seq, ...event }
200
+ run.events.push(stamped)
201
+ if (run.logStream && !run.logStream.destroyed) {
202
+ try { run.logStream.write(JSON.stringify(stamped) + '\n') } catch { /* best-effort */ }
203
+ }
204
+ for (const fn of run.subscribers) {
205
+ try { fn(stamped) } catch (err) { console.error(`[ChatRun] subscriber error: ${err.message}`) }
206
+ }
207
+ return stamped
208
+ }
209
+
210
+ /**
211
+ * Persist the assistant message, rekey the session, emit the terminal event,
212
+ * and schedule eviction. Runs whether the invoke resolved or threw.
213
+ */
214
+ async function finalize(run, result, err) {
215
+ const resolvedSessionId = (result && result.resolvedSessionId) || run.sessionId || null
216
+ const fullResponse = (result && result.fullResponse) || ''
217
+ const turnCount = (result && result.turnCount) || 0
218
+
219
+ // Rekey the pending session to the real CLI session id, and persist the
220
+ // assistant response — even a partial one from an aborted run. These are
221
+ // INDEPENDENT: a rekey failure must never block persisting the reply. If the
222
+ // rekey doesn't succeed, we keep the whole conversation under the pending id
223
+ // (messages stay together) rather than splitting it across two session rows.
224
+ let persistSessionId = resolvedSessionId || run.pendingSessionId
225
+ if (!run.sessionId && resolvedSessionId && run.pendingSessionId && run.pendingSessionId !== resolvedSessionId) {
226
+ try {
227
+ const rekeyed = chatStore.rekeySession(run.pendingSessionId, resolvedSessionId)
228
+ persistSessionId = rekeyed ? resolvedSessionId : run.pendingSessionId
229
+ } catch (e) {
230
+ console.error(`[ChatRun] rekey failed (${run.runId}): ${e.message}`)
231
+ persistSessionId = run.pendingSessionId
232
+ }
233
+ }
234
+ try {
235
+ if (fullResponse && persistSessionId) {
236
+ await chatStore.addMessage(persistSessionId, { role: 'assistant', content: fullResponse }, turnCount, run.workspaceId)
237
+ }
238
+ } catch (e) {
239
+ console.error(`[ChatRun] persist failed (${run.runId}): ${e.message}`)
240
+ }
241
+
242
+ if (err) {
243
+ console.error(`[ChatRun] run ${run.runId} errored: ${err.message}`)
244
+ appendEvent(run, { type: 'error', error: err.message, partial: !!fullResponse })
245
+ }
246
+
247
+ let totalTurnCount = turnCount
248
+ try {
249
+ const session = chatStore.load(run.workspaceId)
250
+ totalTurnCount = (session && session.turn_count) || turnCount
251
+ } catch (e) {
252
+ console.error(`[ChatRun] load for done event failed: ${e.message}`)
253
+ }
254
+
255
+ run.status = run.aborting ? 'aborted' : (err ? 'error' : 'done')
256
+ appendEvent(run, { type: 'done', session_id: resolvedSessionId, turn_count: totalTurnCount })
257
+
258
+ try {
259
+ chatStore.updateRun(run.runId, { status: run.status, sessionId: resolvedSessionId, lastSeq: run.seq })
260
+ } catch (e) {
261
+ console.error(`[ChatRun] updateRun failed: ${e.message}`)
262
+ }
263
+
264
+ if (run.logStream && !run.logStream.destroyed) {
265
+ try { run.logStream.end() } catch { /* ignore */ }
266
+ }
267
+ run.activeRef.current = null
268
+
269
+ console.log(`[ChatRun] run ${run.runId} ${run.status} (${fullResponse.length} chars, ${run.seq} events)`)
270
+
271
+ // Keep the run inspectable for late reconnects, then free its memory.
272
+ setTimeout(() => evict(run.runId), RUN_TTL_MS)
273
+ }
274
+
275
+ function evict(runId) {
276
+ const run = registry.get(runId)
277
+ if (!run) return
278
+ registry.delete(runId)
279
+ }
280
+
281
+ /**
282
+ * Subscribe to a run's events from a cursor. Replays everything with seq >
283
+ * fromSeq, then streams live events. The subscriber should close its own
284
+ * transport when it sees a terminal event (done/error/aborted).
285
+ *
286
+ * @param {string} runId
287
+ * @param {number} fromSeq last sequence the client already has (0 = from start)
288
+ * @param {(event: object) => void} onEvent
289
+ * @returns {() => void} unsubscribe
290
+ */
291
+ function subscribe(runId, fromSeq, onEvent) {
292
+ const from = Number(fromSeq) || 0
293
+ const run = registry.get(runId)
294
+
295
+ if (!run) {
296
+ // Evicted or owned by a previous process: replay from disk, no live tail.
297
+ for (const e of readEventsFromFile(runId)) {
298
+ if (e.seq > from) onEvent(e)
299
+ }
300
+ return () => {}
301
+ }
302
+
303
+ // In memory: replay buffered events, then attach for everything after. No
304
+ // await between the two, so the live listener cannot miss an event.
305
+ for (const e of run.events) {
306
+ if (e.seq > from) onEvent(e)
307
+ }
308
+ // Already terminal (run finished, still within TTL): the replay above
309
+ // included its `done`, and no further events will come — don't register a
310
+ // live listener that would never fire (and never be cleaned up).
311
+ if (run.status !== 'running') {
312
+ return () => {}
313
+ }
314
+ // Only forward events strictly newer than what we just replayed.
315
+ const replayedUpTo = run.seq
316
+ const guarded = e => { if (e.seq > replayedUpTo) onEvent(e) }
317
+ run.subscribers.add(guarded)
318
+ return () => run.subscribers.delete(guarded)
319
+ }
320
+
321
+ function readEventsFromFile(runId) {
322
+ try {
323
+ const raw = fs.readFileSync(logPathFor(runId), 'utf-8')
324
+ const out = []
325
+ for (const line of raw.split('\n')) {
326
+ if (!line.trim()) continue
327
+ try { out.push(JSON.parse(line)) } catch { /* skip malformed line */ }
328
+ }
329
+ return out
330
+ } catch {
331
+ return []
332
+ }
333
+ }
334
+
335
+ /**
336
+ * Abort a run: SIGTERM, then SIGKILL after a 2s grace period. The run's invoke
337
+ * promise then settles and finalize() persists whatever partial response was
338
+ * collected. No-op if the run isn't live.
339
+ * @param {string} runId
340
+ * @returns {boolean} true if a signal was sent
341
+ */
342
+ function abort(runId) {
343
+ const run = registry.get(runId)
344
+ const child = run && run.activeRef.current
345
+ if (!child) return false
346
+ run.aborting = true
347
+ console.log(`[ChatRun] aborting run ${runId} (PID ${child.pid})`)
348
+ try { child.kill('SIGTERM') } catch { /* already dead */ }
349
+ const pid = child.pid
350
+ setTimeout(() => {
351
+ try {
352
+ if (run.activeRef.current && run.activeRef.current.pid === pid) {
353
+ run.activeRef.current.kill('SIGKILL')
354
+ }
355
+ } catch { /* already dead */ }
356
+ }, 2000)
357
+ return true
358
+ }
359
+
360
+ /**
361
+ * Most recent running run for a workspace — the reconnect target. Prefers the
362
+ * live registry, falls back to the durable index.
363
+ * @param {string|null} workspaceId
364
+ * @returns {string|null} runId
365
+ */
366
+ function getActiveRunId(workspaceId) {
367
+ const wsKey = workspaceId || null
368
+ for (const run of registry.values()) {
369
+ if (run.status === 'running' && (run.workspaceId || null) === wsKey) return run.runId
370
+ }
371
+ try {
372
+ const row = chatStore.getActiveRun(workspaceId)
373
+ return row ? row.run_id : null
374
+ } catch {
375
+ return null
376
+ }
377
+ }
378
+
379
+ /**
380
+ * Snapshot of a run's status for the session endpoint. Returns null if unknown.
381
+ * @param {string} runId
382
+ */
383
+ function getRunInfo(runId) {
384
+ const run = registry.get(runId)
385
+ if (run) {
386
+ return { run_id: run.runId, status: run.status, last_seq: run.seq, session_id: run.sessionId }
387
+ }
388
+ try {
389
+ const row = chatStore.getRun(runId)
390
+ if (!row) return null
391
+ return { run_id: row.run_id, status: row.status, last_seq: row.last_seq, session_id: row.session_id }
392
+ } catch {
393
+ return null
394
+ }
395
+ }
396
+
397
+ module.exports = {
398
+ init,
399
+ start,
400
+ subscribe,
401
+ abort,
402
+ getActiveRunId,
403
+ getRunInfo,
404
+ // exposed for tests
405
+ _registry: registry,
406
+ }
@@ -16,7 +16,7 @@
16
16
 
17
17
  const { normalizeUrl } = require('./url-normalize')
18
18
  const { computeFingerprint } = require('./fingerprint')
19
- const { renderPage, extractWithSelectors } = require('./playwright-runner')
19
+ const { renderPage, extractWithSelectors, normalizeScroll } = require('./playwright-runner')
20
20
  const { cleanHtml } = require('./html-cleaner')
21
21
  const { generateRecipe } = require('./recipe-generator')
22
22
  const pageRecipeStore = require('../../stores/page-recipe-store')
@@ -33,11 +33,13 @@ function isEmptyResult(data) {
33
33
  })
34
34
  }
35
35
 
36
- async function extract({ url, hint }) {
36
+ async function extract({ url, hint, scroll }) {
37
37
  const { template, canonicalUrl } = normalizeUrl(url)
38
38
 
39
39
  // Always render once up-front so we can compute the fingerprint regardless
40
- // of cache state. Cold path reuses the HTML; hot path discards it.
40
+ // of cache state. Cold path reuses the HTML; hot path discards it. No
41
+ // readySelector is known yet here, so renderPage falls back to DOM-settle
42
+ // detection — enough to let an SPA hydrate before we fingerprint/clean.
41
43
  const rendered = await renderPage(canonicalUrl)
42
44
  const fingerprint = computeFingerprint(rendered.html)
43
45
 
@@ -47,7 +49,11 @@ async function extract({ url, hint }) {
47
49
  })
48
50
 
49
51
  if (cached) {
50
- const data = await extractWithSelectors(canonicalUrl, cached.selectors)
52
+ const scrollCfg = normalizeScroll(scroll, cached.selectors)
53
+ const { data, scrollInfo } = await extractWithSelectors(canonicalUrl, cached.selectors, {
54
+ readySelector: cached.ready_selector,
55
+ scroll: scrollCfg,
56
+ })
51
57
  if (!isEmptyResult(data)) {
52
58
  pageRecipeStore.incrementHit({ urlTemplate: template, domFingerprint: fingerprint })
53
59
  pageRecipeStore.setLastVerified({ urlTemplate: template, domFingerprint: fingerprint })
@@ -62,6 +68,7 @@ async function extract({ url, hint }) {
62
68
  selectors: cached.selectors,
63
69
  data,
64
70
  cleaned: null,
71
+ scrollInfo,
65
72
  })
66
73
  }
67
74
  // Hot replay returned nothing — penalize and fall through to cold.
@@ -76,8 +83,13 @@ async function extract({ url, hint }) {
76
83
  hint,
77
84
  })
78
85
 
79
- // Verify the recipe against this exact page before persisting.
80
- const verifyData = await extractWithSelectors(canonicalUrl, recipe.selectors)
86
+ // Verify the recipe against this exact page before persisting. Now that we
87
+ // have a readySelector, the verify render waits for real content.
88
+ const scrollCfg = normalizeScroll(scroll, recipe.selectors)
89
+ const { data: verifyData, scrollInfo } = await extractWithSelectors(canonicalUrl, recipe.selectors, {
90
+ readySelector: recipe.readySelector,
91
+ scroll: scrollCfg,
92
+ })
81
93
  const verified = !isEmptyResult(verifyData)
82
94
 
83
95
  if (verified) {
@@ -86,6 +98,7 @@ async function extract({ url, hint }) {
86
98
  domFingerprint: fingerprint,
87
99
  selectors: recipe.selectors,
88
100
  pageType: recipe.pageType,
101
+ readySelector: recipe.readySelector,
89
102
  })
90
103
  pageRecipeStore.incrementHit({ urlTemplate: template, domFingerprint: fingerprint })
91
104
  }
@@ -102,10 +115,11 @@ async function extract({ url, hint }) {
102
115
  data: verified ? verifyData : recipe.extracted,
103
116
  cleaned,
104
117
  recipePersisted: verified,
118
+ scrollInfo,
105
119
  })
106
120
  }
107
121
 
108
- function shape({ url, finalUrl, statusCode, recipeMode, urlTemplate, fingerprint, pageType, selectors, data, cleaned, recipePersisted }) {
122
+ function shape({ url, finalUrl, statusCode, recipeMode, urlTemplate, fingerprint, pageType, selectors, data, cleaned, recipePersisted, scrollInfo }) {
109
123
  const out = {
110
124
  experimental: true,
111
125
  url,
@@ -119,6 +133,12 @@ function shape({ url, finalUrl, statusCode, recipeMode, urlTemplate, fingerprint
119
133
  structured: data || {},
120
134
  selectors: selectors || {},
121
135
  }
136
+ if (scrollInfo) {
137
+ out.scrollInfo = scrollInfo
138
+ if (scrollInfo.reachedTarget === false) {
139
+ out.warning = `Scroll stopped before reaching target (reason: ${scrollInfo.stoppedReason}, items: ${scrollInfo.items}). Raise scroll.maxScrolls / scroll.maxMs to collect more.`
140
+ }
141
+ }
122
142
  if (recipeMode === 'cold' && recipePersisted === false) {
123
143
  out.warning = 'Recipe verification failed (selectors returned empty). Result reflects LLM extraction; recipe was not persisted.'
124
144
  }