shennian 0.2.78 → 0.2.84

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,7 +7,8 @@ import crypto from 'node:crypto'
7
7
  import fs from 'node:fs'
8
8
  import path from 'node:path'
9
9
  import { fileURLToPath } from 'node:url'
10
- import { selectDownloadedAttachment } from './wechat-rpa-download-candidates.mjs'
10
+ import { recognizeWindowsScreenshot } from './wechat-rpa-windows-ocr.mjs'
11
+ import { parseWeChatLayout } from './wechat-rpa-lab/layout/parser.mjs'
11
12
 
12
13
  const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..')
13
14
  const helperBridge = path.join(repoRoot, 'scripts/wechat-rpa-win.mjs')
@@ -17,7 +18,7 @@ const SECTION_LABELS = new Set(['群聊', '联系人', '聊天记录', '公众
17
18
  function printHelp() {
18
19
  console.log(`Usage:
19
20
  node scripts/wechat-rpa-win-visual.mjs --group ABC --recent-limit 5
20
- node scripts/wechat-rpa-win-visual.mjs --group ABC --reply-text "我是 AI" --ocr-url https://shennian.net/integrations/wechat-rpa/ocr --token <token>
21
+ node scripts/wechat-rpa-win-visual.mjs --group ABC --reply-text "我是 AI" --ocr-fixture C:\\tmp\\search-ocr.json
21
22
  node scripts/wechat-rpa-win-visual.mjs --group ABC --file C:\\tmp\\demo.png --file C:\\tmp\\demo.mp4 --ocr-fixture C:\\tmp\\search-ocr.json
22
23
 
23
24
  Options:
@@ -28,16 +29,22 @@ Options:
28
29
  --capture-dir <dir> Directory for screenshots and debug JSON. Default: temp shennian-wechat-rpa-win-visual-*.
29
30
  --download-attachments-dir <dir>
30
31
  Copy clicked inbound attachments into this directory.
32
+ --download-expected-token <token>
33
+ Extra filename/token for cache-only download lookup. Repeatable.
34
+ --download-limit <n> Maximum inbound attachments to try in one run. Defaults to 1.
35
+ --allow-right-click-download
36
+ Allow risky right-click Copy fallback when cache-only lookup misses. Disabled by default.
37
+ --allow-search-open Allow opening a group through the WeChat search panel. Disabled by default on Windows live runs.
31
38
  --no-download-attachments
32
39
  Keep inbound attachments as metadata-only/pending-download.
33
- --ocr-url <url> Shennian OCR endpoint, e.g. /integrations/wechat-rpa/ocr.
34
- --token <token> Bearer token for --ocr-url. Also reads WECHAT_RPA_OCR_TOKEN.
40
+ --ocr-url <url> Deprecated; accepted for compatibility but ignored.
41
+ --token <token> Deprecated; accepted for compatibility but ignored.
35
42
  --ocr-fixture <path> Local OCR fixture JSON for deterministic selection tests/debugging.
36
43
  --helper <path> Override native helper exe passed through to scripts/wechat-rpa-win.mjs.
37
44
  --open-timeout-ms <n> Timeout waiting for title confirmation. Default: 12000.
38
45
  --dry-run Open/read only; do not send reply-text/files.
39
46
 
40
- This is the Windows commercial baseline visual RPA orchestrator: WeChat foreground + screenshot + OCR boxes + native click/paste/press.`)
47
+ This is the Windows commercial baseline visual RPA orchestrator: WeChat foreground + screenshot + local OCR boxes + native click/paste/press.`)
41
48
  }
42
49
 
43
50
  function takeOption(argv, name) {
@@ -70,10 +77,21 @@ function sleep(ms) {
70
77
  return new Promise(resolve => setTimeout(resolve, ms))
71
78
  }
72
79
 
80
+ function psSingleQuoted(value) {
81
+ return String(value).replace(/'/g, "''")
82
+ }
83
+
73
84
  export function normalizeConversationName(value) {
74
- return String(value || '')
85
+ const compact = String(value || '')
75
86
  .replace(/[\s\u200b\u200c\u200d]+/g, '')
76
- .replace(/[((]\d+[))]$/g, '')
87
+ .replace(/夕卜/g, '')
88
+ .replace(/氵则/g, '测')
89
+ .replace(/氵則/g, '测')
90
+ .replace(/讠羊/g, '群')
91
+ .replace(/訁羊/g, '群')
92
+ return compact
93
+ .replace(/[((]\d+[))]\d*$/g, '')
94
+ .replace(/([\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}])[00]{1,3}$/u, '$1')
77
95
  .trim()
78
96
  .toLowerCase()
79
97
  }
@@ -126,7 +144,8 @@ export function findConversationInSearchResults(observations, targetName) {
126
144
  })
127
145
  : []
128
146
 
129
- const candidates = sectionCandidates.length > 0 ? sectionCandidates : exactCandidates
147
+ const fallbackCandidates = exactCandidates.filter(item => isLikelySearchResultCandidate(item.row))
148
+ const candidates = sectionCandidates.length > 0 ? sectionCandidates : fallbackCandidates.length > 0 ? fallbackCandidates : exactCandidates
130
149
  if (candidates.length === 0) return null
131
150
 
132
151
  return candidates
@@ -145,10 +164,65 @@ export function findConversationInSearchResults(observations, targetName) {
145
164
  .sort((a, b) => b.score - a.score)[0].row
146
165
  }
147
166
 
167
+ function isLikelySearchResultCandidate(row) {
168
+ const box = row?.box || row
169
+ if (!box) return false
170
+ const x = Number(box.x || 0)
171
+ const y = Number(box.y || 0)
172
+ const width = Number(box.width || 0)
173
+ const height = Number(box.height || 0)
174
+ const normalized = Math.max(Math.abs(x), Math.abs(y), Math.abs(width), Math.abs(height)) <= 1
175
+ if (normalized) return x >= 0.08 && x <= 0.55 && y >= 0.12 && y <= 0.72
176
+ return x >= 120 && x <= 620 && y >= 120 && y <= 720
177
+ }
178
+
179
+ export function findConversationInLeftList(observations, targetName) {
180
+ const target = normalizeConversationName(targetName)
181
+ if (!target) throw new Error('targetName is required')
182
+ const candidates = (Array.isArray(observations) ? observations : [])
183
+ .filter(row => row?.box && normalizeConversationName(observationText(row)) === target && isLikelyLeftListConversation(row))
184
+ .map(row => ({ row, center: centerOfBox(row.box), confidence: observationConfidence(row) }))
185
+ if (!candidates.length) return null
186
+ return candidates
187
+ .map(item => ({
188
+ ...item,
189
+ score: item.confidence * 100 - Math.abs(item.center.x - 220) * 0.01 - item.center.y * 0.001,
190
+ }))
191
+ .sort((a, b) => b.score - a.score)[0].row
192
+ }
193
+
194
+ function isLikelyLeftListConversation(row) {
195
+ const box = row?.box || row
196
+ if (!box) return false
197
+ const x = Number(box.x || 0)
198
+ const y = Number(box.y || 0)
199
+ const width = Number(box.width || 0)
200
+ const height = Number(box.height || 0)
201
+ const normalized = Math.max(Math.abs(x), Math.abs(y), Math.abs(width), Math.abs(height)) <= 1
202
+ if (normalized) return x >= 0.10 && x <= 0.34 && y >= 0.12 && y <= 0.92
203
+ return x >= 120 && x <= 460 && y >= 120 && y <= 900
204
+ }
205
+
148
206
  export function findTitleConfirmation(observations, targetName) {
149
207
  const target = normalizeConversationName(targetName)
150
208
  const rows = Array.isArray(observations) ? observations : []
151
- return rows.find(row => normalizeConversationName(observationText(row)) === target) || null
209
+ return rows.find(row => normalizeConversationName(observationText(row)) === target && isLikelyMainTitle(row)) || null
210
+ }
211
+
212
+ export function findSendButtonObservation(observations) {
213
+ return (Array.isArray(observations) ? observations : [])
214
+ .filter(row => /^(发送|send)$/i.test(normalizedMessageText(observationText(row))) && isLikelyComposerSendButton(row))
215
+ .sort((left, right) => observationConfidence(right) - observationConfidence(left))[0] || null
216
+ }
217
+
218
+ export function findComposerPendingAttachmentObservation(observations) {
219
+ return (Array.isArray(observations) ? observations : [])
220
+ .filter(row => looksLikeComposerPendingAttachmentText(observationText(row)) && isLikelyComposerPendingAttachment(row))
221
+ .sort((left, right) => observationConfidence(right) - observationConfidence(left))[0] || null
222
+ }
223
+
224
+ export function findComposerDirtyObservation(observations) {
225
+ return findSendButtonObservation(observations) || findComposerPendingAttachmentObservation(observations)
152
226
  }
153
227
 
154
228
  export function isRetryableOcrError(status, body) {
@@ -184,6 +258,7 @@ export function geometryPoint(capturePayload, kind) {
184
258
  search: [0.186, 0.087],
185
259
  // Right-side composer: avoid the left search overlay and click inside the text area.
186
260
  input: [0.58, 0.92],
261
+ fileInput: [0.58, 0.86],
187
262
  send: [0.932, 0.945],
188
263
  }
189
264
  const pair = presets[kind]
@@ -258,10 +333,82 @@ async function capture(region, options, label) {
258
333
  return { result, payload: result?.payload, file, imageSize: readPngSize(file) }
259
334
  }
260
335
 
336
+ async function captureWithRetry(region, options, label, retry = {}) {
337
+ const attempts = Number(retry.attempts || 5)
338
+ const delayMs = Number(retry.delayMs || 700)
339
+ let lastError = null
340
+ for (let attempt = 1; attempt <= attempts; attempt += 1) {
341
+ try {
342
+ return await capture(region, options, label)
343
+ } catch (error) {
344
+ lastError = error
345
+ if (!isRetryableWindowCaptureError(error) || attempt === attempts) break
346
+ await sleep(delayMs)
347
+ }
348
+ }
349
+ throw lastError
350
+ }
351
+
352
+ async function waitForStableWindow(options, label, retry = {}) {
353
+ const attempts = Number(retry.attempts || 6)
354
+ const delayMs = Number(retry.delayMs || 650)
355
+ let previous = null
356
+ let latest = null
357
+ for (let attempt = 1; attempt <= attempts; attempt += 1) {
358
+ latest = await captureWithRetry('window', options, `${label}-stability-${attempt}`, { attempts: 2, delayMs })
359
+ if (previous && areSameWindowBounds(previous.payload, latest.payload)) return latest
360
+ previous = latest
361
+ await sleep(delayMs)
362
+ }
363
+ return latest
364
+ }
365
+
366
+ function areSameWindowBounds(left, right) {
367
+ const a = left?.mainWindow?.bounds || left?.bounds
368
+ const b = right?.mainWindow?.bounds || right?.bounds
369
+ if (!a || !b) return false
370
+ return ['x', 'y', 'width', 'height'].every(key => Math.abs(Number(a[key]) - Number(b[key])) <= 2)
371
+ }
372
+
373
+ export function isRetryableWindowCaptureError(error) {
374
+ return /Cannot locate WeChat main window via (Win32|UI Automation)/i.test(String(error?.message || error || ''))
375
+ }
376
+
377
+ export function assertSearchOpenAllowed(options, targetGroup) {
378
+ if (options?.allowSearchOpen) return true
379
+ throw new Error(
380
+ `Target group '${targetGroup || options?.group || ''}' is not the current title or visible in the left list; ` +
381
+ 'search-panel opening is disabled for Windows safety because it can make WeChat non-enumerable.',
382
+ )
383
+ }
384
+
261
385
  async function click(point, options, extra = []) {
262
386
  return runBridge(['click', '--x', String(point.x), '--y', String(point.y), ...extra], options)
263
387
  }
264
388
 
389
+ async function rightClick(point, options, extra = []) {
390
+ return click(point, options, ['--right', ...extra])
391
+ }
392
+
393
+ async function readClipboard(options) {
394
+ const result = await runBridge(['read-clipboard'], options)
395
+ return result?.payload || result
396
+ }
397
+
398
+ function writeDebugArtifact(options, name, value) {
399
+ if (!options?.captureDir) return
400
+ fs.writeFileSync(path.join(options.captureDir, name), `${JSON.stringify(value, null, 2)}\n`)
401
+ }
402
+
403
+ export function prepareCaptureDir(captureDir) {
404
+ fs.mkdirSync(captureDir, { recursive: true })
405
+ for (const entry of fs.readdirSync(captureDir, { withFileTypes: true })) {
406
+ if (!entry.isFile()) continue
407
+ if (!/\.(png|json)$/i.test(entry.name)) continue
408
+ fs.rmSync(path.join(captureDir, entry.name), { force: true })
409
+ }
410
+ }
411
+
265
412
  async function pasteText(text, options) {
266
413
  return runBridge(['paste-text', '--text', text], options)
267
414
  }
@@ -274,59 +421,358 @@ async function press(keys, options) {
274
421
  return runBridge(['press', '--keys', keys], options)
275
422
  }
276
423
 
424
+ async function dismissMenus(options, count = 2) {
425
+ try {
426
+ return await runBridge(['dismiss-menus', '--count', String(count)], options)
427
+ } catch (error) {
428
+ if (process.platform !== 'win32') throw error
429
+ return dismissMenusWithPowerShell(count)
430
+ }
431
+ }
432
+
433
+ async function dismissMenusWithPowerShell(count = 2) {
434
+ const finalCount = Math.max(1, Math.min(5, Number(count) || 2))
435
+ const script = `
436
+ Add-Type @"
437
+ using System;
438
+ using System.Runtime.InteropServices;
439
+ public static class KeyboardDismiss {
440
+ [DllImport("user32.dll")]
441
+ public static extern void keybd_event(byte bVk, byte bScan, uint dwFlags, UIntPtr dwExtraInfo);
442
+ }
443
+
444
+ "@
445
+ for ($i = 0; $i -lt ${finalCount}; $i += 1) {
446
+ [KeyboardDismiss]::keybd_event(0x1B, 0, 0, [UIntPtr]::Zero)
447
+ Start-Sleep -Milliseconds 20
448
+ [KeyboardDismiss]::keybd_event(0x1B, 0, 0x0002, [UIntPtr]::Zero)
449
+ Start-Sleep -Milliseconds 90
450
+ }
451
+ @{ ok = $true; command = "dismiss-menus-powershell"; count = ${finalCount} } | ConvertTo-Json -Compress
452
+ `
453
+ const encoded = Buffer.from(script, 'utf16le').toString('base64')
454
+ const child = spawn('powershell.exe', ['-NoProfile', '-NonInteractive', '-ExecutionPolicy', 'Bypass', '-EncodedCommand', encoded], {
455
+ windowsHide: true,
456
+ stdio: ['ignore', 'pipe', 'pipe'],
457
+ })
458
+ let stdout = ''
459
+ let stderr = ''
460
+ child.stdout.setEncoding('utf8')
461
+ child.stderr.setEncoding('utf8')
462
+ child.stdout.on('data', chunk => { stdout += chunk })
463
+ child.stderr.on('data', chunk => { stderr += chunk })
464
+ const exitCode = await new Promise((resolve, reject) => {
465
+ child.on('error', reject)
466
+ child.on('close', resolve)
467
+ })
468
+ if (exitCode !== 0) {
469
+ throw new Error(stderr.trim() || stdout.trim() || `dismiss-menus PowerShell exited ${exitCode}`)
470
+ }
471
+ try {
472
+ return JSON.parse(stdout.trim())
473
+ } catch {
474
+ return { ok: true, command: 'dismiss-menus-powershell', count: finalCount, stdout: stdout.trim() }
475
+ }
476
+ }
477
+
478
+ async function verifyPostRunSafety(options, artifacts) {
479
+ try {
480
+ const safetyCapture = await captureWithRetry('window', options, 'post-run-safety', { attempts: 4, delayMs: 800 })
481
+ artifacts.push(safetyCapture.file)
482
+ return {
483
+ ok: true,
484
+ artifact: safetyCapture.file,
485
+ bounds: safetyCapture.payload?.mainWindow?.bounds || safetyCapture.payload?.bounds,
486
+ }
487
+ } catch (error) {
488
+ const message = error instanceof Error ? error.message : String(error)
489
+ writeDebugArtifact(options, 'post-run-safety-error.json', {
490
+ ok: false,
491
+ message,
492
+ })
493
+ throw new Error(
494
+ `P0 Windows WeChat safety check failed after the visual flow: ${message}. ` +
495
+ 'This run is not eligible for pass-rate or TODO acceptance even if the business action appeared to succeed.',
496
+ )
497
+ }
498
+ }
499
+
277
500
  async function recognizeScreenshot(capture, options, purpose) {
278
- if (options.ocrFixture) {
279
- const fixture = JSON.parse(fs.readFileSync(options.ocrFixture, 'utf8'))
280
- if (Array.isArray(fixture)) return { ok: true, purpose, observations: fixture }
281
- if (fixture[purpose]) return fixture[purpose]
282
- if (Array.isArray(fixture.observations)) return fixture
283
- if (fixture && typeof fixture === 'object') {
284
- throw new Error(`OCR fixture does not contain purpose '${purpose}'.`)
501
+ const ocr = await recognizeWindowsScreenshot(capture, { ...options, isRetryableOcrError }, purpose)
502
+ return {
503
+ ...ocr,
504
+ layout: buildWindowsCoreLayout(capture, ocr.observations, {
505
+ targetGroup: options.group,
506
+ purpose,
507
+ }),
508
+ }
509
+ }
510
+
511
+ export function buildWindowsCoreLayout(capture, observations, options = {}) {
512
+ const bounds = capture?.payload?.mainWindow?.bounds || capture?.payload?.bounds || capture?.bounds || {}
513
+ const imageSize = capture?.imageSize || options.imageSize || {}
514
+ const width = Number(imageSize.width || bounds.width || 0)
515
+ const height = Number(imageSize.height || bounds.height || 0)
516
+ const windowWidth = Number(bounds.width || width || 1)
517
+ const windowHeight = Number(bounds.height || height || 1)
518
+ const screenshot = {
519
+ path: capture?.file || options.imagePath || '',
520
+ width,
521
+ height,
522
+ scale: width && windowWidth ? width / windowWidth : 1,
523
+ scaleX: width && windowWidth ? width / windowWidth : 1,
524
+ scaleY: height && windowHeight ? height / windowHeight : width && windowWidth ? width / windowWidth : 1,
525
+ }
526
+ const window = {
527
+ x: Number(bounds.x || 0),
528
+ y: Number(bounds.y || 0),
529
+ width: windowWidth,
530
+ height: windowHeight,
531
+ title: '微信',
532
+ ownerName: 'WeChat',
533
+ }
534
+ const ocr = (Array.isArray(observations) ? observations : []).map((item) => {
535
+ if (!item?.box || item.x != null) return item
536
+ return {
537
+ ...item,
538
+ x: item.box.x,
539
+ y: item.box.y,
540
+ width: item.box.width,
541
+ height: item.box.height,
285
542
  }
286
- return fixture
543
+ })
544
+ return parseWeChatLayout({
545
+ ocr,
546
+ screenshot,
547
+ window,
548
+ targetGroup: options.targetGroup || options.group || '',
549
+ })
550
+ }
551
+
552
+ export function summarizeCoreLayout(layout) {
553
+ const currentTitle = layout?.currentTitle || ''
554
+ return {
555
+ currentTitle,
556
+ targetGroupVisible: Boolean(layout?.targetGroupVisible || currentTitle),
557
+ leftConversationCount: Array.isArray(layout?.leftConversationList?.items) ? layout.leftConversationList.items.length : 0,
558
+ searchResultCount: Array.isArray(layout?.searchResults?.sections)
559
+ ? layout.searchResults.sections.reduce((total, section) => total + (Array.isArray(section.items) ? section.items.length : 0), 0)
560
+ : 0,
561
+ messageCount: Array.isArray(layout?.messageArea?.messages) ? layout.messageArea.messages.length : 0,
562
+ attachmentCount: Array.isArray(layout?.attachmentBubbles) ? layout.attachmentBubbles.length : 0,
563
+ inputBox: layout?.inputBox?.rect || null,
564
+ messageArea: layout?.messageArea?.rect || null,
287
565
  }
288
- if (!options.ocrUrl) {
289
- throw new Error('OCR is required for visual flow. Pass --ocr-url/--token or --ocr-fixture.')
566
+ }
567
+
568
+ export function summarizeOcrEvidence(ocr = {}) {
569
+ const observations = Array.isArray(ocr?.observations) ? ocr.observations : []
570
+ return {
571
+ provider: ocr?.provider || '',
572
+ language: ocr?.language || '',
573
+ durationMs: Number(ocr?.durationMs || 0),
574
+ observationCount: observations.length,
575
+ lineCount: observations.filter(item => item?.providerKind === 'line').length,
576
+ wordCount: observations.filter(item => item?.providerKind === 'word').length,
290
577
  }
291
- const token = options.token || process.env.WECHAT_RPA_OCR_TOKEN || ''
292
- if (!token) throw new Error('Missing OCR bearer token. Pass --token or WECHAT_RPA_OCR_TOKEN.')
293
- const imageBase64 = fs.readFileSync(capture.file).toString('base64')
294
- const maxAttempts = Math.max(1, Number(options.ocrRetries || 2) + 1)
295
- let lastError = null
296
- let response = null
297
- for (let attempt = 1; attempt <= maxAttempts; attempt++) {
298
- response = await fetch(options.ocrUrl, {
299
- method: 'POST',
300
- headers: {
301
- 'Content-Type': 'application/json',
302
- Authorization: `Bearer ${token}`,
303
- },
304
- body: JSON.stringify({
305
- imageBase64,
306
- mimeType: 'image/png',
307
- purpose,
308
- conversationName: options.group,
309
- channelId: options.channelId,
310
- }),
311
- signal: AbortSignal.timeout(options.ocrTimeoutMs),
312
- })
313
- if (response.ok) break
314
- const body = await response.text()
315
- lastError = `OCR request failed: ${response.status} ${body}`
316
- if (attempt >= maxAttempts || !isRetryableOcrError(response.status, body)) {
317
- throw new Error(lastError)
578
+ }
579
+
580
+ export async function detectNewOutboundBubble(beforeFile, afterFile) {
581
+ return detectNewOutboundVisualChange(beforeFile, afterFile, {
582
+ kind: 'text-bubble',
583
+ minimumPixels: 250,
584
+ predicate: 'green',
585
+ topRatio: 0.45,
586
+ bottomRatio: 0.88,
587
+ })
588
+ }
589
+
590
+ export async function detectNewOutboundAttachmentBubble(beforeFile, afterFile) {
591
+ return detectNewOutboundVisualChange(beforeFile, afterFile, {
592
+ kind: 'attachment-bubble',
593
+ minimumPixels: 900,
594
+ predicate: 'non-background',
595
+ topRatio: 0.18,
596
+ bottomRatio: 0.84,
597
+ })
598
+ }
599
+
600
+ export function classifyComposerPendingAttachmentVisualMetrics(metrics = {}) {
601
+ const width = Number(metrics.width || 0)
602
+ const height = Number(metrics.height || 0)
603
+ const grayPixels = Number(metrics.grayPixels || 0)
604
+ const sampledPixels = Number(metrics.sampledPixels || 0)
605
+ const grayRatio = sampledPixels > 0 ? grayPixels / sampledPixels : Number(metrics.grayRatio || 0)
606
+ const box = metrics.box || {}
607
+ const boxWidth = Number(box.width || 0)
608
+ const boxHeight = Number(box.height || 0)
609
+ const boxY = Number(box.y || 0)
610
+ return Boolean(
611
+ width > 0
612
+ && height > 0
613
+ && grayPixels >= 1000
614
+ && grayRatio >= 0.08
615
+ && boxWidth >= width * 0.12
616
+ && boxHeight >= 18
617
+ && boxY >= height * 0.88
618
+ )
619
+ }
620
+
621
+ export async function detectComposerPendingAttachmentVisual(filePath) {
622
+ if (!filePath) return { ok: false, reason: 'missing image path' }
623
+ const script = `
624
+ Add-Type -AssemblyName System.Drawing
625
+ $image = [System.Drawing.Bitmap]::FromFile('${psSingleQuoted(filePath)}')
626
+ try {
627
+ $left = [int]($image.Width * 0.34)
628
+ $right = [int]($image.Width * 0.92)
629
+ $top = [int]($image.Height * 0.90)
630
+ $bottom = [int]($image.Height * 0.995)
631
+ $grayPixels = 0
632
+ $sampledPixels = 0
633
+ $minX = $image.Width
634
+ $minY = $image.Height
635
+ $maxX = 0
636
+ $maxY = 0
637
+ for ($y = $top; $y -lt $bottom; $y += 2) {
638
+ for ($x = $left; $x -lt $right; $x += 2) {
639
+ $pixel = $image.GetPixel($x, $y)
640
+ $sampledPixels += 1
641
+ $spread = [Math]::Max([Math]::Abs($pixel.R - $pixel.G), [Math]::Max([Math]::Abs($pixel.G - $pixel.B), [Math]::Abs($pixel.R - $pixel.B)))
642
+ $isCardGray = $pixel.R -ge 228 -and $pixel.R -le 246 -and $pixel.G -ge 228 -and $pixel.G -le 246 -and $pixel.B -ge 228 -and $pixel.B -le 246 -and $spread -le 8
643
+ if ($isCardGray) {
644
+ $grayPixels += 1
645
+ if ($x -lt $minX) { $minX = $x }
646
+ if ($y -lt $minY) { $minY = $y }
647
+ if ($x -gt $maxX) { $maxX = $x }
648
+ if ($y -gt $maxY) { $maxY = $y }
649
+ }
650
+ }
651
+ }
652
+ $grayRatio = 0
653
+ if ($sampledPixels -gt 0) { $grayRatio = $grayPixels / $sampledPixels }
654
+ @{
655
+ kind = "composer-pending-attachment-visual"
656
+ width = $image.Width
657
+ height = $image.Height
658
+ sampledPixels = $sampledPixels
659
+ grayPixels = $grayPixels
660
+ grayRatio = $grayRatio
661
+ scan = @{ left = $left; right = $right; top = $top; bottom = $bottom; step = 2 }
662
+ box = @{ x = $minX; y = $minY; width = [Math]::Max(0, $maxX - $minX); height = [Math]::Max(0, $maxY - $minY) }
663
+ } | ConvertTo-Json -Depth 4 -Compress
664
+ } finally {
665
+ $image.Dispose()
666
+ }
667
+ `
668
+ const encoded = Buffer.from(script, 'utf16le').toString('base64')
669
+ const child = spawn('powershell.exe', ['-NoProfile', '-NonInteractive', '-ExecutionPolicy', 'Bypass', '-EncodedCommand', encoded], {
670
+ windowsHide: true,
671
+ stdio: ['ignore', 'pipe', 'pipe'],
672
+ })
673
+ let stdout = ''
674
+ let stderr = ''
675
+ child.stdout.setEncoding('utf8')
676
+ child.stderr.setEncoding('utf8')
677
+ child.stdout.on('data', chunk => { stdout += chunk })
678
+ child.stderr.on('data', chunk => { stderr += chunk })
679
+ const exitCode = await new Promise((resolve, reject) => {
680
+ child.on('error', reject)
681
+ child.on('close', resolve)
682
+ })
683
+ if (exitCode !== 0) return { ok: false, reason: stderr.trim() || `powershell exited ${exitCode}` }
684
+ try {
685
+ const metrics = JSON.parse(stdout.trim())
686
+ return {
687
+ ...metrics,
688
+ ok: classifyComposerPendingAttachmentVisualMetrics(metrics),
318
689
  }
319
- await sleep(400 * attempt)
690
+ } catch {
691
+ return { ok: false, reason: stdout.trim() || 'invalid composer visual detector JSON' }
320
692
  }
321
- const json = await response.json()
322
- const debugPath = capture.file.replace(/\.png$/i, `-${purpose}.ocr.json`)
323
- fs.writeFileSync(debugPath, `${JSON.stringify(json, null, 2)}\n`)
324
- return json
325
693
  }
326
694
 
327
- function summarizeObservations(observations, limit) {
695
+ export function attachmentVisualBeforeFile(opened = {}, pendingCapture = {}) {
696
+ return pendingCapture?.file || opened?.file || null
697
+ }
698
+
699
+ async function detectNewOutboundVisualChange(beforeFile, afterFile, detector) {
700
+ if (!beforeFile || !afterFile) return { ok: false, reason: 'missing image path', kind: detector.kind }
701
+ const script = `
702
+ Add-Type -AssemblyName System.Drawing
703
+ $before = [System.Drawing.Bitmap]::FromFile('${psSingleQuoted(beforeFile)}')
704
+ $after = [System.Drawing.Bitmap]::FromFile('${psSingleQuoted(afterFile)}')
705
+ try {
706
+ if ($before.Width -ne $after.Width -or $before.Height -ne $after.Height) {
707
+ @{ ok = $false; reason = "image size mismatch"; changedGreenPixels = 0 } | ConvertTo-Json -Compress
708
+ exit 0
709
+ }
710
+ $left = [int]($after.Width * 0.52)
711
+ $right = [int]($after.Width * 0.96)
712
+ $top = [int]($after.Height * ${Number(detector.topRatio)})
713
+ $bottom = [int]($after.Height * ${Number(detector.bottomRatio)})
714
+ $changedPixels = 0
715
+ $minX = $after.Width
716
+ $minY = $after.Height
717
+ $maxX = 0
718
+ $maxY = 0
719
+ for ($y = $top; $y -lt $bottom; $y += 2) {
720
+ for ($x = $left; $x -lt $right; $x += 2) {
721
+ $a = $after.GetPixel($x, $y)
722
+ $b = $before.GetPixel($x, $y)
723
+ $isGreen = $a.G -ge 175 -and $a.R -ge 80 -and $a.R -le 190 -and $a.B -ge 50 -and $a.B -le 180 -and ($a.G - $a.R) -ge 25 -and ($a.G - $a.B) -ge 45
724
+ $isNonBackground = -not ($a.R -ge 245 -and $a.G -ge 245 -and $a.B -ge 245) -and -not ($a.R -ge 235 -and $a.G -ge 235 -and $a.B -ge 235 -and [Math]::Abs($a.R - $a.G) -le 5 -and [Math]::Abs($a.G - $a.B) -le 5)
725
+ $changed = ([Math]::Abs($a.R - $b.R) + [Math]::Abs($a.G - $b.G) + [Math]::Abs($a.B - $b.B)) -ge 45
726
+ $wanted = ${detector.predicate === 'green' ? '$isGreen' : '$isNonBackground'}
727
+ if ($wanted -and $changed) {
728
+ $changedPixels += 1
729
+ if ($x -lt $minX) { $minX = $x }
730
+ if ($y -lt $minY) { $minY = $y }
731
+ if ($x -gt $maxX) { $maxX = $x }
732
+ if ($y -gt $maxY) { $maxY = $y }
733
+ }
734
+ }
735
+ }
736
+ @{
737
+ ok = $changedPixels -ge ${Number(detector.minimumPixels)}
738
+ kind = "${psSingleQuoted(detector.kind)}"
739
+ changedPixels = $changedPixels
740
+ sampledStep = 2
741
+ box = @{ x = $minX; y = $minY; width = [Math]::Max(0, $maxX - $minX); height = [Math]::Max(0, $maxY - $minY) }
742
+ } | ConvertTo-Json -Depth 4 -Compress
743
+ } finally {
744
+ $before.Dispose()
745
+ $after.Dispose()
746
+ }
747
+ `
748
+ const encoded = Buffer.from(script, 'utf16le').toString('base64')
749
+ const child = spawn('powershell.exe', ['-NoProfile', '-NonInteractive', '-ExecutionPolicy', 'Bypass', '-EncodedCommand', encoded], {
750
+ windowsHide: true,
751
+ stdio: ['ignore', 'pipe', 'pipe'],
752
+ })
753
+ let stdout = ''
754
+ let stderr = ''
755
+ child.stdout.setEncoding('utf8')
756
+ child.stderr.setEncoding('utf8')
757
+ child.stdout.on('data', chunk => { stdout += chunk })
758
+ child.stderr.on('data', chunk => { stderr += chunk })
759
+ const exitCode = await new Promise((resolve, reject) => {
760
+ child.on('error', reject)
761
+ child.on('close', resolve)
762
+ })
763
+ if (exitCode !== 0) return { ok: false, reason: stderr.trim() || `powershell exited ${exitCode}` }
764
+ try {
765
+ return JSON.parse(stdout.trim())
766
+ } catch {
767
+ return { ok: false, reason: stdout.trim() || 'invalid bubble detector JSON' }
768
+ }
769
+ }
770
+
771
+ export function summarizeObservations(observations, limit) {
328
772
  return (Array.isArray(observations) ? observations : [])
329
773
  .filter(row => observationText(row))
774
+ .filter(isUsefulSummaryObservation)
775
+ .sort((a, b) => observationBottom(a) - observationBottom(b))
330
776
  .slice(-limit)
331
777
  .map(row => ({
332
778
  text: observationText(row),
@@ -337,6 +783,23 @@ function summarizeObservations(observations, limit) {
337
783
  }))
338
784
  }
339
785
 
786
+ function observationBottom(row) {
787
+ const box = row?.box || row
788
+ return Number(box?.y || 0) + Number(box?.height || 0)
789
+ }
790
+
791
+ function isUsefulSummaryObservation(row) {
792
+ const text = observationText(row)
793
+ const compact = normalizedMessageText(text)
794
+ if (!compact) return false
795
+ if (/^[0-9::.\-]+$/.test(compact)) return false
796
+ if (/^[0-9]{1,2}[a-z\u0400-\u04ff]{1,3}[0-9]{1,2}$/i.test(compact)) return false
797
+ if (/^[0-9].*[0-9]$/.test(compact) && compact.length <= 8) return false
798
+ if (compact.replace(/^[00]+/, '') === normalizedMessageText('微信电脑版')) return false
799
+ if (compact === '0') return false
800
+ return compact.length >= 2 || Boolean(row?.attachment)
801
+ }
802
+
340
803
  function normalizedMessageText(value) {
341
804
  return String(value || '').replace(/\s+/g, '').toLowerCase()
342
805
  }
@@ -346,10 +809,139 @@ export function observationsContainText(observations, text) {
346
809
  if (!target) return true
347
810
  return (Array.isArray(observations) ? observations : []).some(row => {
348
811
  const content = normalizedMessageText(observationText(row))
349
- return content.includes(target) || target.includes(content)
812
+ return isStrongTextMatch(content, target)
350
813
  })
351
814
  }
352
815
 
816
+ export function findSentTextConfirmation(observations, text) {
817
+ const target = normalizedMessageText(text)
818
+ if (!target) return null
819
+ return (Array.isArray(observations) ? observations : []).find(row => {
820
+ const content = normalizedMessageText(observationText(row))
821
+ return isStrongTextMatch(content, target) && isLikelyOutboundMessageText(row)
822
+ }) || null
823
+ }
824
+
825
+ async function confirmSentText(confirmOcr, text, beforeCapture, afterCapture) {
826
+ const ocrHit = findSentTextConfirmation(confirmOcr.observations, text)
827
+ if (ocrHit) return { method: 'ocr-outbound-text', observation: ocrHit }
828
+
829
+ const visual = await detectNewOutboundBubble(beforeCapture?.file, afterCapture?.file)
830
+ if (visual?.ok) {
831
+ return {
832
+ method: 'visual-new-outbound-bubble',
833
+ observation: {
834
+ text,
835
+ role: 'outbound-visual',
836
+ confidence: 0,
837
+ box: visual.box,
838
+ visual,
839
+ },
840
+ }
841
+ }
842
+
843
+ return { method: 'none', visual }
844
+ }
845
+
846
+ async function assertComposerEmptyBeforeSend(options, artifacts) {
847
+ const composerCapture = await capture('window', options, 'composer-before-send')
848
+ artifacts.push(composerCapture.file)
849
+ const composerOcr = await recognizeScreenshot(composerCapture, options, 'composer-before-send')
850
+ const dirtyObservation = findComposerDirtyObservation(composerOcr.observations)
851
+ const dirtyVisual = dirtyObservation ? null : await detectComposerPendingAttachmentVisual(composerCapture.file)
852
+ if (dirtyObservation || dirtyVisual?.ok) {
853
+ writeJsonArtifact(options, 'composer-before-send-ocr.json', {
854
+ ocrEvidence: summarizeOcrEvidence(composerOcr),
855
+ dirtyObservation,
856
+ dirtyVisual,
857
+ sample: summarizeObservations(composerOcr.observations, 20),
858
+ })
859
+ throw new Error(
860
+ 'WeChat composer is not empty before send; pending text or attachments are already staged. ' +
861
+ `Refusing to paste/send new content. See ${composerCapture.file}`,
862
+ )
863
+ }
864
+ return { capture: composerCapture, ocr: composerOcr }
865
+ }
866
+
867
+ function isLikelyOutboundMessageText(row) {
868
+ const box = row?.box || row
869
+ if (!box) return false
870
+ const x = Number(box.x || 0)
871
+ const y = Number(box.y || 0)
872
+ const width = Number(box.width || 0)
873
+ const height = Number(box.height || 0)
874
+ const normalized = Math.max(Math.abs(x), Math.abs(y), Math.abs(width), Math.abs(height)) <= 1
875
+ const centerX = x + width / 2
876
+ const centerY = y + height / 2
877
+ if (normalized) return centerX >= 0.52 && centerY >= 0.22 && centerY <= 0.88
878
+ return centerX >= 700 && centerY >= 180 && centerY <= 900
879
+ }
880
+
881
+ function isLikelyComposerSendButton(row) {
882
+ const box = row?.box || row
883
+ if (!box) return false
884
+ const x = Number(box.x || 0)
885
+ const y = Number(box.y || 0)
886
+ const width = Number(box.width || 0)
887
+ const height = Number(box.height || 0)
888
+ const normalized = Math.max(Math.abs(x), Math.abs(y), Math.abs(width), Math.abs(height)) <= 1
889
+ const centerX = x + width / 2
890
+ const centerY = y + height / 2
891
+ if (normalized) return centerX >= 0.78 && centerY >= 0.82
892
+ return centerX >= 900 && centerY >= 720
893
+ }
894
+
895
+ function normalizedObservationCenter(row) {
896
+ const box = row?.box || row
897
+ if (!box) return null
898
+ const x = Number(box.x || 0)
899
+ const y = Number(box.y || 0)
900
+ const width = Number(box.width || 0)
901
+ const height = Number(box.height || 0)
902
+ const centerX = x + width / 2
903
+ const centerY = y + height / 2
904
+ const normalized = Math.max(Math.abs(x), Math.abs(y), Math.abs(width), Math.abs(height)) <= 1
905
+ if (normalized) return { x: centerX, y: centerY }
906
+ const imageWidth = Number(row?.imageWidth || box.imageWidth || 0)
907
+ const imageHeight = Number(row?.imageHeight || box.imageHeight || 0)
908
+ if (imageWidth > 0 && imageHeight > 0) return { x: centerX / imageWidth, y: centerY / imageHeight }
909
+ return { x: centerX / 1800, y: centerY / 1100 }
910
+ }
911
+
912
+ function looksLikeComposerPendingAttachmentText(value) {
913
+ const text = normalizedMessageText(value)
914
+ .replace(/[.。]/g, '.')
915
+ .replace(/[||]/g, 'i')
916
+ return /\.(txt|csv|pdf|docx?|xlsx?|pptx?|zip|rar|7z|png|jpe?g|gif|webp|bmp|mp4|mov|avi|mkv|webm)$/i.test(text) ||
917
+ /\.(txt|csv|pdf|docx?|xlsx?|pptx?|zip|rar|7z|png|jpe?g|gif|webp|bmp|mp4|mov|avi|mkv|webm)\b/i.test(text)
918
+ }
919
+
920
+ function isLikelyComposerPendingAttachment(row) {
921
+ const center = normalizedObservationCenter(row)
922
+ if (!center) return false
923
+ return center.x >= 0.26 && center.x <= 0.86 && center.y >= 0.78
924
+ }
925
+
926
+ function isLikelyMainTitle(row) {
927
+ const box = row?.box || row
928
+ if (!box) return false
929
+ const x = Number(box.x || 0)
930
+ const y = Number(box.y || 0)
931
+ const width = Number(box.width || 0)
932
+ const height = Number(box.height || 0)
933
+ const normalized = Math.max(Math.abs(x), Math.abs(y), Math.abs(width), Math.abs(height)) <= 1
934
+ if (normalized) return x >= 0.32 && y <= 0.18
935
+ return x >= 360 && y <= 160
936
+ }
937
+
938
+ function isStrongTextMatch(content, target) {
939
+ if (!content || !target) return false
940
+ if (content.includes(target)) return true
941
+ const minimumReverseLength = Math.min(8, Math.max(4, Math.ceil(target.length * 0.5)))
942
+ return content.length >= minimumReverseLength && target.includes(content)
943
+ }
944
+
353
945
  function basenameForAnyPlatform(file) {
354
946
  const value = String(file || '')
355
947
  return value.includes('\\') ? path.win32.basename(value) : path.basename(value)
@@ -390,6 +982,29 @@ function mimeTypeFromExt(ext) {
390
982
  return 'application/octet-stream'
391
983
  }
392
984
 
985
+ function attachmentFromText(text) {
986
+ const normalized = normalizeFileCardText(text)
987
+ const match = normalized.match(/[a-z0-9][a-z0-9._-]{2,}\.(txt|pdf|docx?|xlsx?|pptx?|zip|rar|7z|png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm)\b/i)
988
+ if (!match) return null
989
+ const name = safeFileName(match[0])
990
+ const ext = path.extname(name).toLowerCase()
991
+ return {
992
+ type: attachmentTypeFromExt(ext),
993
+ name,
994
+ mimeType: mimeTypeFromExt(ext),
995
+ availability: 'remote',
996
+ }
997
+ }
998
+
999
+ function normalizeFileCardText(text) {
1000
+ return String(text || '')
1001
+ .normalize('NFKC')
1002
+ .replace(/w[l1i]n/gi, 'win')
1003
+ .replace(/[一-—–]/g, '-')
1004
+ .replace(/[.。·]/g, '.')
1005
+ .replace(/\s+/g, '')
1006
+ }
1007
+
393
1008
  function attachmentTypeFromExt(ext) {
394
1009
  if (IMAGE_EXTENSIONS.has(String(ext || '').toLowerCase())) return 'image'
395
1010
  if (VIDEO_EXTENSIONS.has(String(ext || '').toLowerCase())) return 'video'
@@ -418,73 +1033,241 @@ export function classifyOutboundFile(file) {
418
1033
  }
419
1034
  }
420
1035
 
421
- function candidateDownloadRoots() {
422
- const home = process.env.USERPROFILE || process.env.HOME || ''
423
- return [
424
- process.env.USERPROFILE ? path.win32.join(process.env.USERPROFILE, 'Downloads') : '',
425
- process.env.USERPROFILE ? path.win32.join(process.env.USERPROFILE, 'Documents') : '',
426
- process.env.USERPROFILE ? path.win32.join(process.env.USERPROFILE, 'Desktop') : '',
427
- process.env.USERPROFILE ? path.win32.join(process.env.USERPROFILE, 'Documents', 'WeChat Files') : '',
428
- home && !process.env.USERPROFILE ? path.join(home, 'Downloads') : '',
429
- ].filter((root, index, roots) => root && roots.indexOf(root) === index && fs.existsSync(root))
1036
+ export function copyInboundAttachment(source, targetDir, attachment) {
1037
+ const buffer = fs.readFileSync(source.path)
1038
+ const hash = crypto.createHash('sha256').update(buffer).digest('hex')
1039
+ const sourceName = path.basename(source.path)
1040
+ const preferredName = attachment?.name && /\.[\p{L}\p{N}]+$/u.test(attachment.name) ? attachment.name : sourceName
1041
+ const filePath = uniqueInboundPath(targetDir, preferredName, hash)
1042
+ fs.mkdirSync(targetDir, { recursive: true })
1043
+ if (!fs.existsSync(filePath)) fs.writeFileSync(filePath, buffer)
1044
+ const ext = path.extname(filePath).toLowerCase()
1045
+ return {
1046
+ type: attachment?.type || attachmentTypeFromExt(ext),
1047
+ name: path.basename(filePath),
1048
+ mimeType: attachment?.mimeType || mimeTypeFromExt(ext),
1049
+ size: buffer.byteLength,
1050
+ localPath: filePath,
1051
+ hash,
1052
+ availability: 'edge-local',
1053
+ }
430
1054
  }
431
1055
 
432
- function walkFiles(root, maxDepth, limit) {
433
- const result = []
434
- const stack = [{ dir: root, depth: 0 }]
435
- while (stack.length && result.length < limit) {
1056
+ export function summarizeAttachmentObservations(observations, limit) {
1057
+ return (Array.isArray(observations) ? observations : [])
1058
+ .filter(row => row?.box && observationText(row))
1059
+ .map(row => {
1060
+ const attachment = row.attachment || attachmentFromText(observationText(row)) || attachmentFromSizeRow(row)
1061
+ if (!attachment) return null
1062
+ return {
1063
+ text: observationText(row) || attachment.name,
1064
+ role: row.role || 'unknown',
1065
+ confidence: observationConfidence(row),
1066
+ box: row.box,
1067
+ attachment,
1068
+ }
1069
+ })
1070
+ .filter(Boolean)
1071
+ .sort((a, b) => observationBottom(a) - observationBottom(b))
1072
+ .slice(-limit)
1073
+ }
1074
+
1075
+ function attachmentFromSizeRow(row) {
1076
+ const text = normalizedMessageText(observationText(row)).replace(/,/g, '')
1077
+ const box = row?.box || row
1078
+ const x = Number(box?.x || 0)
1079
+ if (!/^\d+(?:\.\d+)?(?:b|kb|mb|gb)$/.test(text)) return null
1080
+ if (x < 260) return null
1081
+ return {
1082
+ type: 'file',
1083
+ name: 'wechat-attachment',
1084
+ mimeType: 'application/octet-stream',
1085
+ availability: 'remote',
1086
+ }
1087
+ }
1088
+
1089
+ function mergeRecentMessagesWithAttachments(recentMessages, attachmentMessages, limit) {
1090
+ const merged = [...recentMessages]
1091
+ for (const message of attachmentMessages) {
1092
+ const existingIndex = merged.findIndex(item => item.attachment?.name === message.attachment?.name || (
1093
+ item.box && message.box && Math.abs(observationBottom(item) - observationBottom(message)) < 8
1094
+ ))
1095
+ if (existingIndex >= 0) {
1096
+ if (!merged[existingIndex].attachment && message.attachment) merged[existingIndex] = { ...merged[existingIndex], attachment: message.attachment }
1097
+ } else {
1098
+ merged.push(message)
1099
+ }
1100
+ }
1101
+ return merged
1102
+ .sort((a, b) => observationBottom(a) - observationBottom(b))
1103
+ .slice(-limit)
1104
+ }
1105
+
1106
+ function clipboardSourceFile(payload) {
1107
+ const files = Array.isArray(payload?.files) ? payload.files : Array.isArray(payload?.Files) ? payload.Files : []
1108
+ for (const file of files) {
1109
+ const candidate = String(file || '').trim()
1110
+ if (candidate && fs.existsSync(candidate) && fs.statSync(candidate).isFile()) {
1111
+ return { path: candidate, size: fs.statSync(candidate).size, mtimeMs: fs.statSync(candidate).mtimeMs }
1112
+ }
1113
+ }
1114
+
1115
+ const text = String(payload?.text || payload?.Text || '')
1116
+ for (const rawLine of text.split(/\r?\n/)) {
1117
+ const line = rawLine.trim().replace(/^["']|["']$/g, '')
1118
+ const candidate = line.startsWith('file:///') ? decodeURIComponent(line.replace(/^file:\/\/\//i, '')) : line
1119
+ if (candidate && fs.existsSync(candidate) && fs.statSync(candidate).isFile()) {
1120
+ return { path: candidate, size: fs.statSync(candidate).size, mtimeMs: fs.statSync(candidate).mtimeMs }
1121
+ }
1122
+ }
1123
+
1124
+ return null
1125
+ }
1126
+
1127
+ function defaultWeChatCacheRoots() {
1128
+ const roots = []
1129
+ const home = process.env.USERPROFILE || process.env.HOME
1130
+ if (home) {
1131
+ roots.push(path.join(home, 'Documents', 'xwechat_files'))
1132
+ roots.push(path.join(home, 'Documents', 'WeChat Files'))
1133
+ }
1134
+ return roots
1135
+ }
1136
+
1137
+ function extensionForAttachment(attachment = {}, options = {}) {
1138
+ const name = String(attachment.name || '')
1139
+ const ext = path.extname(name).toLowerCase()
1140
+ if (ext) return ext
1141
+ for (const token of Array.isArray(options.downloadExpectedTokens) ? options.downloadExpectedTokens : []) {
1142
+ const tokenExt = path.extname(String(token || '')).toLowerCase()
1143
+ if (tokenExt) return tokenExt
1144
+ }
1145
+ const mime = String(attachment.mimeType || '').toLowerCase()
1146
+ if (mime === 'image/png') return '.png'
1147
+ if (mime === 'image/jpeg') return '.jpg'
1148
+ if (mime === 'image/gif') return '.gif'
1149
+ if (mime === 'application/pdf') return '.pdf'
1150
+ if (mime.includes('mp4')) return '.mp4'
1151
+ return ''
1152
+ }
1153
+
1154
+ function cacheLookupTokens(message = {}, attachment = {}, options = {}) {
1155
+ const values = [
1156
+ message.text,
1157
+ attachment.name,
1158
+ ...(Array.isArray(options.downloadExpectedTokens) ? options.downloadExpectedTokens : []),
1159
+ ].map(value => normalizedFileNameToken(value)).filter(Boolean)
1160
+ const tokens = new Set()
1161
+ for (const value of values) {
1162
+ for (const token of value.split(/[^a-z0-9]+/i)) {
1163
+ if (token.length >= 4 && !/^(file|image|video|pdf|png|jpg|jpeg|mp4)$/i.test(token)) {
1164
+ tokens.add(token.toLowerCase())
1165
+ }
1166
+ }
1167
+ if (value.length >= 8) tokens.add(value.toLowerCase())
1168
+ }
1169
+ return [...tokens]
1170
+ }
1171
+
1172
+ export function expectedDownloadTokenMessages(options = {}) {
1173
+ return (Array.isArray(options.downloadExpectedTokens) ? options.downloadExpectedTokens : [])
1174
+ .map((token) => {
1175
+ const name = safeFileName(basenameForAnyPlatform(token))
1176
+ const ext = path.extname(name).toLowerCase()
1177
+ if (!ext) return null
1178
+ return {
1179
+ text: name,
1180
+ role: 'expected-download-token',
1181
+ confidence: 0,
1182
+ attachment: {
1183
+ type: attachmentTypeFromExt(ext),
1184
+ name,
1185
+ mimeType: mimeTypeFromExt(ext),
1186
+ availability: 'remote',
1187
+ },
1188
+ }
1189
+ })
1190
+ .filter(Boolean)
1191
+ }
1192
+
1193
+ function candidateCacheFiles(roots, options = {}) {
1194
+ const maxFiles = Number(options.maxCacheFiles || 5000)
1195
+ const minMtimeMs = Date.now() - Number(options.maxCacheAgeMs || 14 * 24 * 60 * 60 * 1000)
1196
+ const files = []
1197
+ const stack = roots.filter(Boolean).map(root => path.resolve(root))
1198
+ const seen = new Set()
1199
+ while (stack.length && files.length < maxFiles) {
436
1200
  const current = stack.pop()
1201
+ if (!current || seen.has(current)) continue
1202
+ seen.add(current)
437
1203
  let entries = []
438
1204
  try {
439
- entries = fs.readdirSync(current.dir, { withFileTypes: true })
1205
+ entries = fs.readdirSync(current, { withFileTypes: true })
440
1206
  } catch {
441
1207
  continue
442
1208
  }
443
1209
  for (const entry of entries) {
444
- const filePath = path.join(current.dir, entry.name)
1210
+ const fullPath = path.join(current, entry.name)
445
1211
  if (entry.isDirectory()) {
446
- if (current.depth < maxDepth) stack.push({ dir: filePath, depth: current.depth + 1 })
1212
+ if (/^(cache|temp|rwtemp|filestorage|msgattach|image|video|file|[0-9-]+)$/i.test(entry.name) || current.toLowerCase().includes('xwechat_files')) {
1213
+ stack.push(fullPath)
1214
+ }
447
1215
  continue
448
1216
  }
449
1217
  if (!entry.isFile()) continue
1218
+ let stat
450
1219
  try {
451
- const stat = fs.statSync(filePath)
452
- result.push({ path: filePath, size: stat.size, mtimeMs: stat.mtimeMs })
1220
+ stat = fs.statSync(fullPath)
453
1221
  } catch {
454
- // Ignore transient files while WeChat is writing them.
1222
+ continue
455
1223
  }
1224
+ if (stat.mtimeMs < minMtimeMs || stat.size <= 0) continue
1225
+ files.push({ path: fullPath, size: stat.size, mtimeMs: stat.mtimeMs, name: entry.name })
1226
+ if (files.length >= maxFiles) break
456
1227
  }
457
1228
  }
458
- return result
1229
+ return files.sort((a, b) => b.mtimeMs - a.mtimeMs)
459
1230
  }
460
1231
 
461
- function snapshotDownloadCandidates() {
462
- const files = new Map()
463
- for (const root of candidateDownloadRoots()) {
464
- for (const file of walkFiles(root, 4, 5000)) {
465
- files.set(file.path, file)
466
- }
467
- }
468
- return files
1232
+ export function findCachedInboundAttachment(message = {}, attachment = {}, options = {}) {
1233
+ const ext = extensionForAttachment(attachment, options)
1234
+ const tokens = cacheLookupTokens(message, attachment, options)
1235
+ const roots = options.wechatCacheRoots || defaultWeChatCacheRoots()
1236
+ if (!ext || tokens.length === 0 || roots.length === 0) return null
1237
+ const candidates = candidateCacheFiles(roots, options)
1238
+ .filter(file => path.extname(file.name).toLowerCase() === ext)
1239
+ .map(file => {
1240
+ const normalizedName = normalizedFileNameToken(file.name)
1241
+ const score = tokens.reduce((sum, token) => sum + (normalizedName.includes(token) ? Math.min(20, token.length) : 0), 0)
1242
+ return { ...file, score }
1243
+ })
1244
+ .filter(file => file.score > 0)
1245
+ .sort((a, b) => b.score - a.score || b.mtimeMs - a.mtimeMs)
1246
+ const best = candidates[0]
1247
+ return best ? { path: best.path, size: best.size, mtimeMs: best.mtimeMs } : null
469
1248
  }
470
1249
 
471
- export function copyInboundAttachment(source, targetDir, attachment) {
472
- const buffer = fs.readFileSync(source.path)
473
- const hash = crypto.createHash('sha256').update(buffer).digest('hex')
474
- const sourceName = path.basename(source.path)
475
- const preferredName = attachment?.name && /\.[\p{L}\p{N}]+$/u.test(attachment.name) ? attachment.name : sourceName
476
- const filePath = uniqueInboundPath(targetDir, preferredName, hash)
477
- fs.mkdirSync(targetDir, { recursive: true })
478
- if (!fs.existsSync(filePath)) fs.writeFileSync(filePath, buffer)
479
- const ext = path.extname(filePath).toLowerCase()
1250
+ export function findCopyMenuItem(observations) {
1251
+ const rows = (Array.isArray(observations) ? observations : [])
1252
+ .filter(row => row?.box && observationText(row))
1253
+ .map(row => ({ row, text: normalizeText(observationText(row)).toLowerCase() }))
1254
+ return rows.find(({ text }) => /^(复制|拷贝|copy)$/i.test(text))?.row
1255
+ || rows.find(({ text }) => /复制|拷贝|\bcopy\b/i.test(text))?.row
1256
+ || null
1257
+ }
1258
+
1259
+ function attachmentRightClickPoint(capturePayload, message, imageSize) {
1260
+ const point = pointFromObservation(capturePayload, message, imageSize)
1261
+ const box = message?.box || {}
1262
+ const text = normalizedMessageText(observationText(message)).replace(/,/g, '')
1263
+ const isSizeOnly = /^\d+(?:\.\d+)?(?:b|kb|mb|gb)$/.test(text)
1264
+ if (!isSizeOnly) return point
1265
+
1266
+ const scale = imageSize?.width && Number(box.x || 0) <= 1 && Number(box.width || 0) <= 1 ? imageSize.width : 1
1267
+ const width = Number(box.width || 0) * scale
480
1268
  return {
481
- type: attachment?.type || attachmentTypeFromExt(ext),
482
- name: path.basename(filePath),
483
- mimeType: attachment?.mimeType || mimeTypeFromExt(ext),
484
- size: buffer.byteLength,
485
- localPath: filePath,
486
- hash,
487
- availability: 'edge-local',
1269
+ x: Math.round(point.x + Math.max(48, Math.min(140, width + 72))),
1270
+ y: Math.round(point.y - 18),
488
1271
  }
489
1272
  }
490
1273
 
@@ -494,66 +1277,197 @@ async function localizeInboundAttachments(recentMessages, messageCapture, option
494
1277
  }
495
1278
  const targetDir = path.resolve(options.downloadAttachmentsDir)
496
1279
  fs.mkdirSync(targetDir, { recursive: true })
497
- let before = snapshotDownloadCandidates()
1280
+ const maxDownloads = downloadAttachmentLimit(options)
498
1281
  const downloads = []
499
1282
  const updated = []
500
- for (const message of recentMessages) {
1283
+ const messages = [...recentMessages]
1284
+ for (const expected of expectedDownloadTokenMessages(options)) {
1285
+ const exists = messages.some(message => message?.attachment?.name === expected.attachment.name)
1286
+ if (!exists) messages.push(expected)
1287
+ }
1288
+ for (const message of messages) {
1289
+ if (downloads.length >= maxDownloads) {
1290
+ updated.push(message)
1291
+ continue
1292
+ }
501
1293
  const attachment = message.attachment
502
- if (!attachment || attachment.localPath || attachment.url || !message.box) {
1294
+ if (!attachment || attachment.localPath || attachment.url) {
503
1295
  updated.push(message)
504
1296
  continue
505
1297
  }
506
- const startedAt = Date.now()
507
- await click(pointFromObservation(messageCapture.payload, message, messageCapture.imageSize), options, ['--no-raise'])
508
- await sleep(2_000)
509
- await press('{ESC}', options).catch(() => {})
510
- const downloaded = selectDownloadedAttachment(before, snapshotDownloadCandidates(), startedAt, attachment)
511
- if (!downloaded) {
1298
+ const cached = findCachedInboundAttachment(message, attachment, options)
1299
+ if (cached) {
1300
+ const localized = copyInboundAttachment(cached, targetDir, attachment)
1301
+ updated.push({ ...message, text: message.text || localized.name, attachment: localized })
1302
+ downloads.push({ text: message.text, ok: true, strategy: 'wechat-cache-scan', sourcePath: cached.path, localPath: localized.localPath, size: localized.size })
1303
+ continue
1304
+ }
1305
+ if (!options.allowRightClickDownload) {
1306
+ const pending = {
1307
+ ...message,
1308
+ attachment: {
1309
+ ...attachment,
1310
+ availability: 'pending-download',
1311
+ providerError: 'No matching local WeChat cache file found; right-click download fallback is disabled on Windows.',
1312
+ },
1313
+ }
1314
+ updated.push(pending)
1315
+ downloads.push({ text: message.text, ok: false, strategy: 'wechat-cache-scan', providerError: pending.attachment.providerError })
1316
+ continue
1317
+ }
1318
+ if (!message.box) {
512
1319
  const pending = {
513
1320
  ...message,
514
1321
  attachment: {
515
1322
  ...attachment,
516
1323
  availability: 'pending-download',
517
- providerError: 'No new downloaded file was observed after clicking attachment bubble',
1324
+ providerError: 'Right-click download fallback requires an OCR box, but cache-only lookup did not find a local file.',
1325
+ },
1326
+ }
1327
+ updated.push(pending)
1328
+ downloads.push({ text: message.text, ok: false, strategy: 'right-click-copy', providerError: pending.attachment.providerError })
1329
+ continue
1330
+ }
1331
+ const targetPoint = attachmentRightClickPoint(messageCapture.payload, message, messageCapture.imageSize)
1332
+ writeDebugArtifact(options, `download-attachment-${downloads.length + 1}-target.json`, {
1333
+ text: message.text,
1334
+ box: message.box,
1335
+ point: targetPoint,
1336
+ strategy: 'right-click-copy',
1337
+ })
1338
+ await rightClick(targetPoint, options, ['--no-raise'])
1339
+ await sleep(350)
1340
+
1341
+ let copied = null
1342
+ let providerError = ''
1343
+ try {
1344
+ const menuCapture = await capture('window', options, `context-menu-${downloads.length + 1}`)
1345
+ const menuOcr = await recognizeScreenshot(menuCapture, options, 'context-menu')
1346
+ writeDebugArtifact(options, `download-attachment-${downloads.length + 1}-menu.json`, menuOcr.observations)
1347
+ const copyItem = findCopyMenuItem(menuOcr.observations)
1348
+ if (!copyItem) {
1349
+ providerError = 'Right-click attachment menu did not expose a Copy item'
1350
+ } else {
1351
+ await click(pointFromObservation(menuCapture.payload, copyItem, menuCapture.imageSize), options, ['--no-raise'])
1352
+ await sleep(350)
1353
+ const clipboard = await readClipboard(options)
1354
+ writeDebugArtifact(options, `download-attachment-${downloads.length + 1}-clipboard.json`, clipboard)
1355
+ copied = clipboardSourceFile(clipboard)
1356
+ if (!copied) providerError = 'Right-click Copy did not put a local file path on the clipboard'
1357
+ }
1358
+ } finally {
1359
+ await dismissMenus(options, 2).catch(() => {})
1360
+ await press('{ESC}', options).catch(() => {})
1361
+ }
1362
+
1363
+ if (!copied) {
1364
+ const pending = {
1365
+ ...message,
1366
+ attachment: {
1367
+ ...attachment,
1368
+ availability: 'pending-download',
1369
+ providerError,
518
1370
  },
519
1371
  }
520
1372
  updated.push(pending)
521
1373
  downloads.push({ text: message.text, ok: false, providerError: pending.attachment.providerError })
522
- before = snapshotDownloadCandidates()
523
1374
  continue
524
1375
  }
525
- const localized = copyInboundAttachment(downloaded, targetDir, attachment)
1376
+ const localized = copyInboundAttachment(copied, targetDir, attachment)
526
1377
  updated.push({ ...message, text: message.text || localized.name, attachment: localized })
527
- downloads.push({ text: message.text, ok: true, sourcePath: downloaded.path, localPath: localized.localPath, size: localized.size })
528
- before = snapshotDownloadCandidates()
1378
+ downloads.push({ text: message.text, ok: true, sourcePath: copied.path, localPath: localized.localPath, size: localized.size })
529
1379
  }
530
1380
  return { recentMessages: updated, downloads }
531
1381
  }
532
1382
 
1383
+ export function downloadAttachmentLimit(options = {}) {
1384
+ const value = Number(options.downloadLimit ?? 1)
1385
+ if (!Number.isFinite(value) || value < 1) return 1
1386
+ // Windows WeChat can lose its enumerable main window after repeated
1387
+ // right-click attachment menus. Keep live runs to one attachment until this
1388
+ // path has a separate stability gate.
1389
+ return Math.min(1, Math.floor(value))
1390
+ }
1391
+
533
1392
  export function missingConfirmedFiles(observations, files) {
534
1393
  const rows = Array.isArray(observations) ? observations : []
1394
+ const combinedText = normalizedMessageText(rows.map(row => observationText(row)).join(''))
1395
+ const combinedFileToken = normalizedFileNameToken(rows.map(row => observationText(row)).join(''))
535
1396
  return (files || []).filter(file => {
536
1397
  const basename = basenameForAnyPlatform(file)
537
1398
  const normalizedBasename = normalizedMessageText(basename)
1399
+ const normalizedFileToken = normalizedFileNameToken(basename)
1400
+ if (combinedText.includes(normalizedBasename)) return false
1401
+ if (normalizedFileToken && combinedFileToken.includes(normalizedFileToken)) return false
538
1402
  return !rows.some(row => {
539
1403
  const text = normalizedMessageText(observationText(row))
540
1404
  const attachmentName = normalizedMessageText(row?.attachment?.filename || row?.attachment?.name)
541
- return text.includes(normalizedBasename) || attachmentName.includes(normalizedBasename)
1405
+ const fileText = normalizedFileNameToken(observationText(row))
1406
+ const fileAttachmentName = normalizedFileNameToken(row?.attachment?.filename || row?.attachment?.name)
1407
+ return text.includes(normalizedBasename)
1408
+ || attachmentName.includes(normalizedBasename)
1409
+ || (normalizedFileToken && fileText.includes(normalizedFileToken))
1410
+ || (normalizedFileToken && fileAttachmentName.includes(normalizedFileToken))
542
1411
  })
543
1412
  })
544
1413
  }
545
1414
 
1415
+ function normalizedFileNameToken(value) {
1416
+ return String(value || '')
1417
+ .normalize('NFKC')
1418
+ .toLowerCase()
1419
+ .replace(/w[l1i]n/g, 'win')
1420
+ .replace(/[\s._\-—–一.。·,,::;;/\\|[\]()(){}<>《》"'“”‘’]+/g, '')
1421
+ .replace(/[^a-z0-9\u4e00-\u9fff]+/g, '')
1422
+ }
1423
+
546
1424
  async function openConversationBySearch(options, artifacts) {
547
1425
  const initial = await capture('window', options, 'window-before-search')
548
1426
  artifacts.push(initial.file)
549
1427
  try {
550
1428
  const initialTitleOcr = await recognizeScreenshot(initial, options, 'title-confirmation')
551
1429
  if (findTitleConfirmation(initialTitleOcr.observations, options.group)) {
1430
+ initial.openEvidence = {
1431
+ strategy: 'current-title',
1432
+ searchUsed: false,
1433
+ leftListUsed: false,
1434
+ titleConfirmed: true,
1435
+ }
552
1436
  return initial
553
1437
  }
554
1438
  } catch (error) {
555
1439
  process.stderr.write(`Initial title OCR was skipped: ${error instanceof Error ? error.message : String(error)}\n`)
556
1440
  }
1441
+ const initialOcr = await recognizeScreenshot(initial, options, 'left-list')
1442
+ const leftListTarget = findConversationInLeftList(initialOcr.observations, options.group)
1443
+ if (leftListTarget) {
1444
+ const targetPoint = pointFromObservation(initial.payload, leftListTarget, initial.imageSize)
1445
+ writeDebugArtifact(options, 'open-target-left-list.json', {
1446
+ group: options.group,
1447
+ text: observationText(leftListTarget),
1448
+ box: leftListTarget.box,
1449
+ point: targetPoint,
1450
+ })
1451
+ await click(targetPoint, options, ['--no-raise'])
1452
+ await sleep(650)
1453
+ const opened = await waitForStableWindow(options, 'opened-from-left-list', { attempts: 6, delayMs: 800 })
1454
+ artifacts.push(opened.file)
1455
+ const titleOcr = await recognizeScreenshot(opened, options, 'title-confirmation')
1456
+ const titleConfirmed = Boolean(findTitleConfirmation(titleOcr.observations, options.group))
1457
+ if (!titleConfirmed) {
1458
+ if (!options.allowWeakTitle) throw new Error(`Opened left-list conversation title was not confirmed as '${options.group}'. See ${opened.file}`)
1459
+ }
1460
+ opened.openEvidence = {
1461
+ strategy: 'left-list',
1462
+ searchUsed: false,
1463
+ leftListUsed: true,
1464
+ titleConfirmed,
1465
+ targetText: observationText(leftListTarget),
1466
+ }
1467
+ return opened
1468
+ }
1469
+
1470
+ assertSearchOpenAllowed(options, options.group)
557
1471
 
558
1472
  await click(geometryPoint(initial.payload, 'search'), options)
559
1473
  await sleep(180)
@@ -569,6 +1483,12 @@ async function openConversationBySearch(options, artifacts) {
569
1483
  throw new Error(`Could not find target group '${options.group}' under search result section '群聊'. See ${searchCapture.file}`)
570
1484
  }
571
1485
  const targetPoint = pointFromObservation(searchCapture.payload, target, searchCapture.imageSize)
1486
+ writeDebugArtifact(options, 'open-target-search.json', {
1487
+ group: options.group,
1488
+ text: observationText(target),
1489
+ box: target.box,
1490
+ point: targetPoint,
1491
+ })
572
1492
  await click(targetPoint, options, ['--no-raise'])
573
1493
  await sleep(350)
574
1494
  // WeChat 4.x can keep the transient search result panel open after clicking a result.
@@ -576,25 +1496,68 @@ async function openConversationBySearch(options, artifacts) {
576
1496
  await press('{ESC}', options)
577
1497
  await sleep(500)
578
1498
 
579
- const opened = await capture('window', options, 'opened-conversation')
1499
+ const opened = await waitForStableWindow(options, 'opened-conversation', { attempts: 6, delayMs: 800 })
580
1500
  artifacts.push(opened.file)
581
1501
  const titleOcr = await recognizeScreenshot(opened, options, 'title-confirmation')
582
- if (!findTitleConfirmation(titleOcr.observations, options.group)) {
1502
+ const titleConfirmed = Boolean(findTitleConfirmation(titleOcr.observations, options.group))
1503
+ if (!titleConfirmed) {
583
1504
  // Some OCR models miss the top title but the click may still be correct. Keep the debug capture and continue
584
1505
  // only if the caller explicitly opted out of strict confirmation.
585
1506
  if (!options.allowWeakTitle) throw new Error(`Opened conversation title was not confirmed as '${options.group}'. See ${opened.file}`)
586
1507
  }
1508
+ opened.openEvidence = {
1509
+ strategy: 'search',
1510
+ searchUsed: true,
1511
+ leftListUsed: false,
1512
+ titleConfirmed,
1513
+ targetText: observationText(target),
1514
+ }
587
1515
  return opened
588
1516
  }
589
1517
 
1518
+ export function buildActionEvidence({ options = {}, opened = {}, sent = [], downloads = [], postRunSafety = {}, artifacts = [], layout = {} } = {}) {
1519
+ const confirmationMethods = []
1520
+ for (const item of Array.isArray(sent) ? sent : []) {
1521
+ if (item?.confirmationMethod) confirmationMethods.push(item.confirmationMethod)
1522
+ if (Array.isArray(item?.confirmationMethods)) confirmationMethods.push(...item.confirmationMethods)
1523
+ }
1524
+ const downloadStrategies = Array.from(new Set((Array.isArray(downloads) ? downloads : [])
1525
+ .map(item => item?.strategy)
1526
+ .filter(Boolean)))
1527
+ const openEvidence = opened?.openEvidence || {}
1528
+ return {
1529
+ targetGroup: options.group || '',
1530
+ openStrategy: openEvidence.strategy || 'unknown',
1531
+ searchUsed: Boolean(openEvidence.searchUsed),
1532
+ leftListUsed: Boolean(openEvidence.leftListUsed),
1533
+ titleConfirmed: Boolean(openEvidence.titleConfirmed),
1534
+ searchOpenAllowed: Boolean(options.allowSearchOpen),
1535
+ rightClickDownloadAllowed: Boolean(options.allowRightClickDownload),
1536
+ rightClickUsed: downloadStrategies.includes('right-click-copy'),
1537
+ downloadStrategies,
1538
+ sentTypes: (Array.isArray(sent) ? sent : []).map(item => item?.type).filter(Boolean),
1539
+ confirmationMethods: Array.from(new Set(confirmationMethods)),
1540
+ postRunSafetyOk: postRunSafety?.ok === true,
1541
+ postRunSafetyArtifact: postRunSafety?.artifact || '',
1542
+ artifactCount: Array.isArray(artifacts) ? artifacts.length : 0,
1543
+ layout: {
1544
+ currentTitle: layout?.currentTitle || '',
1545
+ messageCount: Number(layout?.messageCount || 0),
1546
+ attachmentCount: Number(layout?.attachmentCount || 0),
1547
+ hasInputBox: Boolean(layout?.inputBox),
1548
+ hasMessageArea: Boolean(layout?.messageArea),
1549
+ },
1550
+ }
1551
+ }
1552
+
590
1553
  export async function runVisualFlow(input) {
591
1554
  const captureDir = path.resolve(input.captureDir || fs.mkdtempSync(path.join(fs.realpathSync(process.env.TEMP || process.env.TMP || '/tmp'), 'shennian-wechat-rpa-win-visual-')))
592
- fs.mkdirSync(captureDir, { recursive: true })
1555
+ prepareCaptureDir(captureDir)
593
1556
  const options = {
594
1557
  ...input,
595
1558
  captureDir,
596
1559
  step: 1,
597
- recentLimit: Number(input.recentLimit || 5),
1560
+ recentLimit: Math.max(Number(input.recentLimit || 5), input.downloadAttachmentsDir && input.downloadAttachments !== false ? 20 : 0),
598
1561
  ocrTimeoutMs: Number(input.ocrTimeoutMs || 45_000),
599
1562
  openTimeoutMs: Number(input.openTimeoutMs || 12_000),
600
1563
  }
@@ -606,10 +1569,18 @@ export async function runVisualFlow(input) {
606
1569
  artifacts.push(messageCapture.file)
607
1570
  const messageOcr = await recognizeScreenshot(messageCapture, options, 'message-read')
608
1571
  let recentMessages = summarizeObservations(messageOcr.observations, options.recentLimit)
1572
+ recentMessages = mergeRecentMessagesWithAttachments(
1573
+ recentMessages,
1574
+ summarizeAttachmentObservations(messageOcr.observations, options.recentLimit),
1575
+ options.recentLimit,
1576
+ )
609
1577
  const localization = await localizeInboundAttachments(recentMessages, messageCapture, options)
610
1578
  recentMessages = localization.recentMessages
611
1579
 
612
1580
  const sent = []
1581
+ if (!options.dryRun && (options.replyText || options.files?.length)) {
1582
+ await assertComposerEmptyBeforeSend(options, artifacts)
1583
+ }
613
1584
  if (!options.dryRun && options.replyText) {
614
1585
  await click(geometryPoint(opened.payload, 'input'), options)
615
1586
  await pasteText(options.replyText, options)
@@ -619,52 +1590,105 @@ export async function runVisualFlow(input) {
619
1590
  const confirmCapture = await capture('window', options, 'after-text-send')
620
1591
  artifacts.push(confirmCapture.file)
621
1592
  const confirmOcr = await recognizeScreenshot(confirmCapture, options, 'send-confirmation')
622
- if (!observationsContainText(confirmOcr.observations, options.replyText)) {
623
- throw new Error(`Sent text was not confirmed by OCR: '${options.replyText}'. See ${confirmCapture.file}`)
1593
+ const confirmation = await confirmSentText(confirmOcr, options.replyText, opened, confirmCapture)
1594
+ if (!confirmation.observation) {
1595
+ throw new Error(`Sent text was not confirmed by OCR or outbound bubble diff: '${options.replyText}'. See ${confirmCapture.file}`)
624
1596
  }
625
- sent.push({ type: 'text', text: options.replyText, observations: summarizeObservations(confirmOcr.observations, options.recentLimit) })
1597
+ sent.push({
1598
+ type: 'text',
1599
+ text: options.replyText,
1600
+ confirmationMethod: confirmation.method,
1601
+ observations: summarizeObservations([confirmation.observation], options.recentLimit),
1602
+ })
626
1603
  }
627
1604
 
628
1605
  if (!options.dryRun && options.files?.length) {
629
1606
  const confirmedFiles = []
630
1607
  const sentAttachments = []
631
1608
  const confirmationObservations = []
1609
+ const confirmationMethods = []
632
1610
  for (const [index, file] of options.files.entries()) {
633
- await click(geometryPoint(opened.payload, 'input'), options)
634
- await pasteFiles([file], options)
635
- await sleep(850)
636
- const pendingCapture = await capture('input', options, `pending-file-${index + 1}`)
637
- artifacts.push(pendingCapture.file)
638
- await click(geometryPoint(opened.payload, 'send'), options)
1611
+ let pendingCapture = null
1612
+ let pendingOcr = null
1613
+ let sendButton = null
1614
+ for (let attempt = 1; attempt <= 2; attempt += 1) {
1615
+ await click(geometryPoint(opened.payload, 'fileInput'), options)
1616
+ await sleep(120)
1617
+ await pasteFiles([file], options)
1618
+ await sleep(950)
1619
+ pendingCapture = await captureWithRetry('window', options, `pending-file-${index + 1}${attempt > 1 ? `-retry-${attempt}` : ''}`, { attempts: 8, delayMs: 900 })
1620
+ artifacts.push(pendingCapture.file)
1621
+ pendingOcr = await recognizeScreenshot(pendingCapture, options, 'send-button')
1622
+ sendButton = findSendButtonObservation(pendingOcr.observations)
1623
+ if (sendButton || missingConfirmedFiles(pendingOcr.observations, [file]).length === 0) break
1624
+ }
1625
+ if (!pendingCapture || !pendingOcr) throw new Error(`File was not captured after paste: ${basenameForAnyPlatform(file)}`)
1626
+ if (!sendButton && missingConfirmedFiles(pendingOcr.observations, [file]).length > 0) {
1627
+ throw new Error(`File did not appear in the WeChat composer after paste: ${basenameForAnyPlatform(file)}. See ${pendingCapture.file}`)
1628
+ }
1629
+ if (sendButton) {
1630
+ await click(pointFromObservation(pendingCapture.payload, sendButton, pendingCapture.imageSize), options)
1631
+ } else {
1632
+ await press('{ENTER}', options)
1633
+ }
639
1634
  await sleep(postPasteDelayMs(file))
640
1635
  const fileConfirmCapture = await capture('window', options, `after-file-${index + 1}-send`)
641
1636
  artifacts.push(fileConfirmCapture.file)
642
1637
  const fileConfirmOcr = await recognizeScreenshot(fileConfirmCapture, options, 'send-confirmation')
643
1638
  const missing = missingConfirmedFiles(fileConfirmOcr.observations, [file])
644
1639
  if (missing.length > 0) {
645
- throw new Error(`Sent file was not confirmed by OCR: ${basenameForAnyPlatform(file)}. See ${fileConfirmCapture.file}`)
1640
+ const visual = await detectNewOutboundAttachmentBubble(attachmentVisualBeforeFile(opened, pendingCapture), fileConfirmCapture.file)
1641
+ if (!visual?.ok) {
1642
+ throw new Error(`Sent file was not confirmed by OCR or outbound attachment diff: ${basenameForAnyPlatform(file)}. See ${fileConfirmCapture.file}`)
1643
+ }
1644
+ confirmationMethods.push('visual-new-outbound-attachment')
1645
+ confirmationObservations.push({
1646
+ text: basenameForAnyPlatform(file),
1647
+ role: 'outbound-visual',
1648
+ confidence: 0,
1649
+ box: visual.box,
1650
+ visual,
1651
+ })
1652
+ } else {
1653
+ confirmationMethods.push('ocr-file-name')
1654
+ confirmationObservations.push(...summarizeObservations(fileConfirmOcr.observations, options.recentLimit))
646
1655
  }
647
1656
  confirmedFiles.push(file)
648
1657
  sentAttachments.push(classifyOutboundFile(file))
649
- confirmationObservations.push(...summarizeObservations(fileConfirmOcr.observations, options.recentLimit))
650
1658
  }
651
1659
  sent.push({
652
1660
  type: 'files',
653
1661
  files: confirmedFiles,
1662
+ confirmationMethods,
654
1663
  attachments: sentAttachments,
655
1664
  observations: confirmationObservations.slice(-options.recentLimit),
656
1665
  })
657
1666
  }
658
1667
 
1668
+ const postRunSafety = await verifyPostRunSafety(options, artifacts)
1669
+
1670
+ const layoutSummary = summarizeCoreLayout(messageOcr.layout)
659
1671
  const summary = {
660
1672
  ok: true,
661
1673
  group: options.group,
662
1674
  captureDir,
1675
+ ocrEvidence: summarizeOcrEvidence(messageOcr),
1676
+ layout: layoutSummary,
663
1677
  recentMessages,
664
1678
  downloads: localization.downloads,
665
1679
  sent,
1680
+ postRunSafety,
666
1681
  artifacts,
667
1682
  }
1683
+ summary.actionEvidence = buildActionEvidence({
1684
+ options,
1685
+ opened,
1686
+ sent,
1687
+ downloads: localization.downloads,
1688
+ postRunSafety,
1689
+ artifacts,
1690
+ layout: layoutSummary,
1691
+ })
668
1692
  fs.writeFileSync(path.join(captureDir, 'summary.json'), `${JSON.stringify(summary, null, 2)}\n`)
669
1693
  return summary
670
1694
  }
@@ -682,6 +1706,10 @@ async function main() {
682
1706
  recentLimit: takeOption(argv, '--recent-limit') || '5',
683
1707
  captureDir: takeOption(argv, '--capture-dir'),
684
1708
  downloadAttachmentsDir: takeOption(argv, '--download-attachments-dir'),
1709
+ downloadExpectedTokens: takeMany(argv, '--download-expected-token'),
1710
+ downloadLimit: takeOption(argv, '--download-limit'),
1711
+ allowRightClickDownload: takeFlag(argv, '--allow-right-click-download'),
1712
+ allowSearchOpen: takeFlag(argv, '--allow-search-open'),
685
1713
  ocrUrl: takeOption(argv, '--ocr-url'),
686
1714
  token: takeOption(argv, '--token'),
687
1715
  ocrFixture: takeOption(argv, '--ocr-fixture'),