shennian 0.2.75 → 0.2.77

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,707 @@
1
+ #!/usr/bin/env node
2
+ // @arch docs/features/wechat-rpa-channel.md
3
+ // @test tests/wechat-rpa-win-visual.test.mjs
4
+
5
+ import { spawn } from 'node:child_process'
6
+ import crypto from 'node:crypto'
7
+ import fs from 'node:fs'
8
+ import path from 'node:path'
9
+ import { fileURLToPath } from 'node:url'
10
+ import { selectDownloadedAttachment } from './wechat-rpa-download-candidates.mjs'
11
+
12
+ const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..')
13
+ const helperBridge = path.join(repoRoot, 'scripts/wechat-rpa-win.mjs')
14
+
15
+ const SECTION_LABELS = new Set(['群聊', '联系人', '聊天记录', '公众号', '小程序', '朋友圈', '企业', '企业微信'])
16
+
17
+ function printHelp() {
18
+ console.log(`Usage:
19
+ node scripts/wechat-rpa-win-visual.mjs --group ABC --recent-limit 5
20
+ node scripts/wechat-rpa-win-visual.mjs --group ABC --reply-text "我是 AI" --ocr-url https://shennian.net/integrations/wechat-rpa/ocr --token <token>
21
+ node scripts/wechat-rpa-win-visual.mjs --group ABC --file C:\\tmp\\demo.png --file C:\\tmp\\demo.mp4 --ocr-fixture C:\\tmp\\search-ocr.json
22
+
23
+ Options:
24
+ --group <name> Required target group/conversation name.
25
+ --reply-text <text> Text to send after opening the group.
26
+ --file <path> File/image/video to send. Repeatable.
27
+ --recent-limit <n> Number of OCR message observations to include in summary. Default: 5.
28
+ --capture-dir <dir> Directory for screenshots and debug JSON. Default: temp shennian-wechat-rpa-win-visual-*.
29
+ --download-attachments-dir <dir>
30
+ Copy clicked inbound attachments into this directory.
31
+ --no-download-attachments
32
+ Keep inbound attachments as metadata-only/pending-download.
33
+ --ocr-url <url> Shennian OCR endpoint, e.g. /integrations/wechat-rpa/ocr.
34
+ --token <token> Bearer token for --ocr-url. Also reads WECHAT_RPA_OCR_TOKEN.
35
+ --ocr-fixture <path> Local OCR fixture JSON for deterministic selection tests/debugging.
36
+ --helper <path> Override native helper exe passed through to scripts/wechat-rpa-win.mjs.
37
+ --open-timeout-ms <n> Timeout waiting for title confirmation. Default: 12000.
38
+ --dry-run Open/read only; do not send reply-text/files.
39
+
40
+ This is the Windows commercial baseline visual RPA orchestrator: WeChat foreground + screenshot + OCR boxes + native click/paste/press.`)
41
+ }
42
+
43
+ function takeOption(argv, name) {
44
+ const index = argv.indexOf(name)
45
+ if (index < 0) return null
46
+ const value = argv[index + 1]
47
+ if (!value || value.startsWith('--')) throw new Error(`Missing value for ${name}`)
48
+ argv.splice(index, 2)
49
+ return value
50
+ }
51
+
52
+ function takeMany(argv, name) {
53
+ const values = []
54
+ for (;;) {
55
+ const value = takeOption(argv, name)
56
+ if (value === null) break
57
+ values.push(value)
58
+ }
59
+ return values
60
+ }
61
+
62
+ function takeFlag(argv, name) {
63
+ const index = argv.indexOf(name)
64
+ if (index < 0) return false
65
+ argv.splice(index, 1)
66
+ return true
67
+ }
68
+
69
+ function sleep(ms) {
70
+ return new Promise(resolve => setTimeout(resolve, ms))
71
+ }
72
+
73
+ export function normalizeConversationName(value) {
74
+ return String(value || '')
75
+ .replace(/[\s\u200b\u200c\u200d]+/g, '')
76
+ .replace(/[((]\d+[))]$/g, '')
77
+ .trim()
78
+ .toLowerCase()
79
+ }
80
+
81
+ function normalizeText(value) {
82
+ return String(value || '').replace(/\s+/g, ' ').trim()
83
+ }
84
+
85
+ function centerOfBox(box) {
86
+ return {
87
+ x: Number(box.x || 0) + Number(box.width || 0) / 2,
88
+ y: Number(box.y || 0) + Number(box.height || 0) / 2,
89
+ }
90
+ }
91
+
92
+ function observationText(row) {
93
+ return normalizeText(row?.text)
94
+ }
95
+
96
+ function observationConfidence(row) {
97
+ const value = Number(row?.confidence)
98
+ return Number.isFinite(value) ? Math.max(0, Math.min(1, value)) : 0.75
99
+ }
100
+
101
+ function isSection(row) {
102
+ const text = observationText(row)
103
+ return SECTION_LABELS.has(text) || /^(群聊|联系人|聊天记录|公众号|小程序|企业微信)$/.test(text)
104
+ }
105
+
106
+ export function findConversationInSearchResults(observations, targetName) {
107
+ const target = normalizeConversationName(targetName)
108
+ if (!target) throw new Error('targetName is required')
109
+ const rows = (Array.isArray(observations) ? observations : [])
110
+ .filter(row => row?.box && observationText(row))
111
+ .map((row, index) => ({ row, index, text: observationText(row), center: centerOfBox(row.box) }))
112
+ .sort((a, b) => a.center.y - b.center.y || a.center.x - b.center.x)
113
+
114
+ const sections = rows.filter(item => isSection(item.row))
115
+ const groupSection = sections.find(item => item.text === '群聊')
116
+ const nextSection = groupSection
117
+ ? sections.find(item => item.center.y > groupSection.center.y + 0.002)
118
+ : null
119
+
120
+ const exactCandidates = rows.filter(item => normalizeConversationName(item.text) === target)
121
+ const sectionCandidates = groupSection
122
+ ? exactCandidates.filter(item => {
123
+ if (item.center.y <= groupSection.center.y) return false
124
+ if (nextSection && item.center.y >= nextSection.center.y) return false
125
+ return true
126
+ })
127
+ : []
128
+
129
+ const candidates = sectionCandidates.length > 0 ? sectionCandidates : exactCandidates
130
+ if (candidates.length === 0) return null
131
+
132
+ return candidates
133
+ .map(item => {
134
+ const sectionDistance = groupSection ? Math.max(0, item.center.y - groupSection.center.y) : item.center.y
135
+ const horizontalPenalty = item.center.x <= 1
136
+ ? Math.abs(item.center.x - 0.2) * 0.5
137
+ : Math.abs(item.center.x - 200) * 0.005
138
+ const score =
139
+ observationConfidence(item.row) * 100
140
+ + (groupSection && item.center.y > groupSection.center.y ? 50 : 0)
141
+ - sectionDistance * 5
142
+ - horizontalPenalty
143
+ return { ...item, score }
144
+ })
145
+ .sort((a, b) => b.score - a.score)[0].row
146
+ }
147
+
148
+ export function findTitleConfirmation(observations, targetName) {
149
+ const target = normalizeConversationName(targetName)
150
+ const rows = Array.isArray(observations) ? observations : []
151
+ return rows.find(row => normalizeConversationName(observationText(row)) === target) || null
152
+ }
153
+
154
+ export function isRetryableOcrError(status, body) {
155
+ if ([408, 429, 500, 502, 503, 504].includes(Number(status))) return true
156
+ return /invalid model response|timeout|temporar/i.test(String(body || ''))
157
+ }
158
+
159
+ export function pointFromObservation(capturePayload, observation, imageSize) {
160
+ if (!capturePayload?.bounds) throw new Error('capture payload missing bounds')
161
+ if (!observation?.box) throw new Error('observation missing box')
162
+ const bounds = capturePayload.bounds
163
+ const width = Number(imageSize?.width || bounds.width)
164
+ const height = Number(imageSize?.height || bounds.height)
165
+ const scaleX = Number(bounds.width) / width
166
+ const scaleY = Number(bounds.height) / height
167
+ const rawX = Number(observation.box.x)
168
+ const rawY = Number(observation.box.y)
169
+ const rawWidth = Number(observation.box.width)
170
+ const rawHeight = Number(observation.box.height)
171
+ const normalizedBox = Math.max(Math.abs(rawX), Math.abs(rawY), Math.abs(rawWidth), Math.abs(rawHeight)) <= 1
172
+ const cx = normalizedBox ? (rawX + rawWidth / 2) * width : rawX + rawWidth / 2
173
+ const cy = normalizedBox ? (rawY + rawHeight / 2) * height : rawY + rawHeight / 2
174
+ return {
175
+ x: Math.round(Number(bounds.x) + cx * scaleX),
176
+ y: Math.round(Number(bounds.y) + cy * scaleY),
177
+ }
178
+ }
179
+
180
+ export function geometryPoint(capturePayload, kind) {
181
+ const bounds = capturePayload?.mainWindow?.bounds || capturePayload?.bounds
182
+ if (!bounds) throw new Error('capture payload missing main window bounds')
183
+ const presets = {
184
+ search: [0.186, 0.087],
185
+ // Right-side composer: avoid the left search overlay and click inside the text area.
186
+ input: [0.58, 0.92],
187
+ send: [0.932, 0.945],
188
+ }
189
+ const pair = presets[kind]
190
+ if (!pair) throw new Error(`Unknown geometry point: ${kind}`)
191
+ return {
192
+ x: Math.round(Number(bounds.x) + Number(bounds.width) * pair[0]),
193
+ y: Math.round(Number(bounds.y) + Number(bounds.height) * pair[1]),
194
+ }
195
+ }
196
+
197
+ export function assertUsableMainWindow(capturePayload, phase = 'capture') {
198
+ const bounds = capturePayload?.mainWindow?.bounds || capturePayload?.bounds
199
+ if (!bounds) throw new Error(`${phase}: capture payload missing main window bounds`)
200
+ const width = Number(bounds.width)
201
+ const height = Number(bounds.height)
202
+ if (width < 760 || height < 600) {
203
+ throw new Error(
204
+ `${phase}: WeChat chat main window is not available; got ${Math.round(width)}x${Math.round(height)}. ` +
205
+ 'The visible WeChat window looks like a login/prompt dialog, so the RPA flow was stopped.',
206
+ )
207
+ }
208
+ return capturePayload
209
+ }
210
+
211
+ export function readPngSize(filePath) {
212
+ const fd = fs.openSync(filePath, 'r')
213
+ try {
214
+ const buffer = Buffer.alloc(24)
215
+ fs.readSync(fd, buffer, 0, buffer.length, 0)
216
+ if (buffer.toString('ascii', 1, 4) !== 'PNG') throw new Error(`Not a PNG file: ${filePath}`)
217
+ return { width: buffer.readUInt32BE(16), height: buffer.readUInt32BE(20) }
218
+ } finally {
219
+ fs.closeSync(fd)
220
+ }
221
+ }
222
+
223
+ async function runBridge(args, options) {
224
+ const finalArgs = [helperBridge, ...args]
225
+ if (options.helper) finalArgs.push('--helper', options.helper)
226
+ const child = spawn(process.execPath, finalArgs, {
227
+ cwd: repoRoot,
228
+ windowsHide: true,
229
+ stdio: ['ignore', 'pipe', 'pipe'],
230
+ })
231
+ let stdout = ''
232
+ let stderr = ''
233
+ child.stdout.setEncoding('utf8')
234
+ child.stderr.setEncoding('utf8')
235
+ child.stdout.on('data', chunk => { stdout += chunk })
236
+ child.stderr.on('data', chunk => { stderr += chunk })
237
+ const exitCode = await new Promise((resolve, reject) => {
238
+ child.on('error', reject)
239
+ child.on('close', resolve)
240
+ })
241
+ if (stderr.trim()) process.stderr.write(stderr)
242
+ if (exitCode !== 0) {
243
+ throw new Error(`wechat-rpa-win command failed (${exitCode}): ${args.join(' ')}\n${stdout}`)
244
+ }
245
+ const trimmed = stdout.trim()
246
+ if (!trimmed) return null
247
+ try {
248
+ return JSON.parse(trimmed)
249
+ } catch (error) {
250
+ throw new Error(`Invalid helper JSON for ${args.join(' ')}: ${error.message}\n${stdout}`)
251
+ }
252
+ }
253
+
254
+ async function capture(region, options, label) {
255
+ const file = path.join(options.captureDir, `${String(options.step++).padStart(2, '0')}-${label || region}.png`)
256
+ const result = await runBridge(['capture', '--region', region, '--output', file], options)
257
+ if (region === 'window') assertUsableMainWindow(result?.payload, label || region)
258
+ return { result, payload: result?.payload, file, imageSize: readPngSize(file) }
259
+ }
260
+
261
+ async function click(point, options, extra = []) {
262
+ return runBridge(['click', '--x', String(point.x), '--y', String(point.y), ...extra], options)
263
+ }
264
+
265
+ async function pasteText(text, options) {
266
+ return runBridge(['paste-text', '--text', text], options)
267
+ }
268
+
269
+ async function pasteFiles(files, options) {
270
+ return runBridge(['paste-files', ...files.flatMap(file => ['--file', file])], options)
271
+ }
272
+
273
+ async function press(keys, options) {
274
+ return runBridge(['press', '--keys', keys], options)
275
+ }
276
+
277
+ async function recognizeScreenshot(capture, options, purpose) {
278
+ if (options.ocrFixture) {
279
+ const fixture = JSON.parse(fs.readFileSync(options.ocrFixture, 'utf8'))
280
+ if (Array.isArray(fixture)) return { ok: true, purpose, observations: fixture }
281
+ if (fixture[purpose]) return fixture[purpose]
282
+ if (Array.isArray(fixture.observations)) return fixture
283
+ if (fixture && typeof fixture === 'object') {
284
+ throw new Error(`OCR fixture does not contain purpose '${purpose}'.`)
285
+ }
286
+ return fixture
287
+ }
288
+ if (!options.ocrUrl) {
289
+ throw new Error('OCR is required for visual flow. Pass --ocr-url/--token or --ocr-fixture.')
290
+ }
291
+ const token = options.token || process.env.WECHAT_RPA_OCR_TOKEN || ''
292
+ if (!token) throw new Error('Missing OCR bearer token. Pass --token or WECHAT_RPA_OCR_TOKEN.')
293
+ const imageBase64 = fs.readFileSync(capture.file).toString('base64')
294
+ const maxAttempts = Math.max(1, Number(options.ocrRetries || 2) + 1)
295
+ let lastError = null
296
+ let response = null
297
+ for (let attempt = 1; attempt <= maxAttempts; attempt++) {
298
+ response = await fetch(options.ocrUrl, {
299
+ method: 'POST',
300
+ headers: {
301
+ 'Content-Type': 'application/json',
302
+ Authorization: `Bearer ${token}`,
303
+ },
304
+ body: JSON.stringify({
305
+ imageBase64,
306
+ mimeType: 'image/png',
307
+ purpose,
308
+ conversationName: options.group,
309
+ channelId: options.channelId,
310
+ }),
311
+ signal: AbortSignal.timeout(options.ocrTimeoutMs),
312
+ })
313
+ if (response.ok) break
314
+ const body = await response.text()
315
+ lastError = `OCR request failed: ${response.status} ${body}`
316
+ if (attempt >= maxAttempts || !isRetryableOcrError(response.status, body)) {
317
+ throw new Error(lastError)
318
+ }
319
+ await sleep(400 * attempt)
320
+ }
321
+ const json = await response.json()
322
+ const debugPath = capture.file.replace(/\.png$/i, `-${purpose}.ocr.json`)
323
+ fs.writeFileSync(debugPath, `${JSON.stringify(json, null, 2)}\n`)
324
+ return json
325
+ }
326
+
327
+ function summarizeObservations(observations, limit) {
328
+ return (Array.isArray(observations) ? observations : [])
329
+ .filter(row => observationText(row))
330
+ .slice(-limit)
331
+ .map(row => ({
332
+ text: observationText(row),
333
+ role: row.role || 'unknown',
334
+ confidence: observationConfidence(row),
335
+ ...(row.attachment ? { attachment: row.attachment } : {}),
336
+ box: row.box,
337
+ }))
338
+ }
339
+
340
+ function normalizedMessageText(value) {
341
+ return String(value || '').replace(/\s+/g, '').toLowerCase()
342
+ }
343
+
344
+ export function observationsContainText(observations, text) {
345
+ const target = normalizedMessageText(text)
346
+ if (!target) return true
347
+ return (Array.isArray(observations) ? observations : []).some(row => {
348
+ const content = normalizedMessageText(observationText(row))
349
+ return content.includes(target) || target.includes(content)
350
+ })
351
+ }
352
+
353
+ function basenameForAnyPlatform(file) {
354
+ const value = String(file || '')
355
+ return value.includes('\\') ? path.win32.basename(value) : path.basename(value)
356
+ }
357
+
358
+ function safeFileName(name) {
359
+ return path.basename(name || 'attachment')
360
+ .normalize('NFKC')
361
+ .replace(/[<>:"/\\|?*\x00-\x1F]/g, '_')
362
+ .replace(/\s+/g, ' ')
363
+ .replace(/^[ ._]+|[ ._]+$/g, '')
364
+ || 'attachment'
365
+ }
366
+
367
+ function uniqueInboundPath(dir, name, hash) {
368
+ const safe = safeFileName(name || 'attachment')
369
+ const ext = path.extname(safe)
370
+ const stem = ext ? safe.slice(0, -ext.length) : safe
371
+ const candidate = path.join(dir, safe)
372
+ if (!fs.existsSync(candidate)) return candidate
373
+ return path.join(dir, `${stem}-${hash.slice(0, 12)}${ext}`)
374
+ }
375
+
376
+ function mimeTypeFromExt(ext) {
377
+ const value = String(ext || '').toLowerCase()
378
+ if (['.jpg', '.jpeg'].includes(value)) return 'image/jpeg'
379
+ if (value === '.png') return 'image/png'
380
+ if (value === '.gif') return 'image/gif'
381
+ if (value === '.webp') return 'image/webp'
382
+ if (value === '.mp4') return 'video/mp4'
383
+ if (value === '.mov') return 'video/quicktime'
384
+ if (value === '.pdf') return 'application/pdf'
385
+ if (value === '.txt') return 'text/plain'
386
+ if (value === '.docx') return 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
387
+ if (value === '.xlsx') return 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
388
+ if (value === '.pptx') return 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
389
+ if (value === '.zip') return 'application/zip'
390
+ return 'application/octet-stream'
391
+ }
392
+
393
+ function attachmentTypeFromExt(ext) {
394
+ if (IMAGE_EXTENSIONS.has(String(ext || '').toLowerCase())) return 'image'
395
+ if (VIDEO_EXTENSIONS.has(String(ext || '').toLowerCase())) return 'video'
396
+ return 'file'
397
+ }
398
+
399
+ function postPasteDelayMs(file) {
400
+ const type = classifyOutboundFile(file).type
401
+ if (type === 'image') return 8_000
402
+ if (type === 'video') return 15_000
403
+ return 2_000
404
+ }
405
+
406
+ const IMAGE_EXTENSIONS = new Set(['.apng', '.avif', '.bmp', '.gif', '.heic', '.heif', '.jpeg', '.jpg', '.png', '.tif', '.tiff', '.webp'])
407
+ const VIDEO_EXTENSIONS = new Set(['.3g2', '.3gp', '.avi', '.m4v', '.mkv', '.mov', '.mp4', '.mpeg', '.mpg', '.webm', '.wmv'])
408
+
409
+ export function classifyOutboundFile(file) {
410
+ const name = basenameForAnyPlatform(file)
411
+ const ext = path.extname(name).toLowerCase()
412
+ const type = IMAGE_EXTENSIONS.has(ext) ? 'image' : VIDEO_EXTENSIONS.has(ext) ? 'video' : 'file'
413
+ return {
414
+ type,
415
+ name,
416
+ localPath: file,
417
+ availability: 'edge-local',
418
+ }
419
+ }
420
+
421
+ function candidateDownloadRoots() {
422
+ const home = process.env.USERPROFILE || process.env.HOME || ''
423
+ return [
424
+ process.env.USERPROFILE ? path.win32.join(process.env.USERPROFILE, 'Downloads') : '',
425
+ process.env.USERPROFILE ? path.win32.join(process.env.USERPROFILE, 'Documents') : '',
426
+ process.env.USERPROFILE ? path.win32.join(process.env.USERPROFILE, 'Desktop') : '',
427
+ process.env.USERPROFILE ? path.win32.join(process.env.USERPROFILE, 'Documents', 'WeChat Files') : '',
428
+ home && !process.env.USERPROFILE ? path.join(home, 'Downloads') : '',
429
+ ].filter((root, index, roots) => root && roots.indexOf(root) === index && fs.existsSync(root))
430
+ }
431
+
432
+ function walkFiles(root, maxDepth, limit) {
433
+ const result = []
434
+ const stack = [{ dir: root, depth: 0 }]
435
+ while (stack.length && result.length < limit) {
436
+ const current = stack.pop()
437
+ let entries = []
438
+ try {
439
+ entries = fs.readdirSync(current.dir, { withFileTypes: true })
440
+ } catch {
441
+ continue
442
+ }
443
+ for (const entry of entries) {
444
+ const filePath = path.join(current.dir, entry.name)
445
+ if (entry.isDirectory()) {
446
+ if (current.depth < maxDepth) stack.push({ dir: filePath, depth: current.depth + 1 })
447
+ continue
448
+ }
449
+ if (!entry.isFile()) continue
450
+ try {
451
+ const stat = fs.statSync(filePath)
452
+ result.push({ path: filePath, size: stat.size, mtimeMs: stat.mtimeMs })
453
+ } catch {
454
+ // Ignore transient files while WeChat is writing them.
455
+ }
456
+ }
457
+ }
458
+ return result
459
+ }
460
+
461
+ function snapshotDownloadCandidates() {
462
+ const files = new Map()
463
+ for (const root of candidateDownloadRoots()) {
464
+ for (const file of walkFiles(root, 4, 5000)) {
465
+ files.set(file.path, file)
466
+ }
467
+ }
468
+ return files
469
+ }
470
+
471
+ export function copyInboundAttachment(source, targetDir, attachment) {
472
+ const buffer = fs.readFileSync(source.path)
473
+ const hash = crypto.createHash('sha256').update(buffer).digest('hex')
474
+ const sourceName = path.basename(source.path)
475
+ const preferredName = attachment?.name && /\.[\p{L}\p{N}]+$/u.test(attachment.name) ? attachment.name : sourceName
476
+ const filePath = uniqueInboundPath(targetDir, preferredName, hash)
477
+ fs.mkdirSync(targetDir, { recursive: true })
478
+ if (!fs.existsSync(filePath)) fs.writeFileSync(filePath, buffer)
479
+ const ext = path.extname(filePath).toLowerCase()
480
+ return {
481
+ type: attachment?.type || attachmentTypeFromExt(ext),
482
+ name: path.basename(filePath),
483
+ mimeType: attachment?.mimeType || mimeTypeFromExt(ext),
484
+ size: buffer.byteLength,
485
+ localPath: filePath,
486
+ hash,
487
+ availability: 'edge-local',
488
+ }
489
+ }
490
+
491
+ async function localizeInboundAttachments(recentMessages, messageCapture, options) {
492
+ if (options.downloadAttachments === false || !options.downloadAttachmentsDir) {
493
+ return { recentMessages, downloads: [] }
494
+ }
495
+ const targetDir = path.resolve(options.downloadAttachmentsDir)
496
+ fs.mkdirSync(targetDir, { recursive: true })
497
+ let before = snapshotDownloadCandidates()
498
+ const downloads = []
499
+ const updated = []
500
+ for (const message of recentMessages) {
501
+ const attachment = message.attachment
502
+ if (!attachment || attachment.localPath || attachment.url || !message.box) {
503
+ updated.push(message)
504
+ continue
505
+ }
506
+ const startedAt = Date.now()
507
+ await click(pointFromObservation(messageCapture.payload, message, messageCapture.imageSize), options, ['--no-raise'])
508
+ await sleep(2_000)
509
+ await press('{ESC}', options).catch(() => {})
510
+ const downloaded = selectDownloadedAttachment(before, snapshotDownloadCandidates(), startedAt, attachment)
511
+ if (!downloaded) {
512
+ const pending = {
513
+ ...message,
514
+ attachment: {
515
+ ...attachment,
516
+ availability: 'pending-download',
517
+ providerError: 'No new downloaded file was observed after clicking attachment bubble',
518
+ },
519
+ }
520
+ updated.push(pending)
521
+ downloads.push({ text: message.text, ok: false, providerError: pending.attachment.providerError })
522
+ before = snapshotDownloadCandidates()
523
+ continue
524
+ }
525
+ const localized = copyInboundAttachment(downloaded, targetDir, attachment)
526
+ updated.push({ ...message, text: message.text || localized.name, attachment: localized })
527
+ downloads.push({ text: message.text, ok: true, sourcePath: downloaded.path, localPath: localized.localPath, size: localized.size })
528
+ before = snapshotDownloadCandidates()
529
+ }
530
+ return { recentMessages: updated, downloads }
531
+ }
532
+
533
+ export function missingConfirmedFiles(observations, files) {
534
+ const rows = Array.isArray(observations) ? observations : []
535
+ return (files || []).filter(file => {
536
+ const basename = basenameForAnyPlatform(file)
537
+ const normalizedBasename = normalizedMessageText(basename)
538
+ return !rows.some(row => {
539
+ const text = normalizedMessageText(observationText(row))
540
+ const attachmentName = normalizedMessageText(row?.attachment?.filename || row?.attachment?.name)
541
+ return text.includes(normalizedBasename) || attachmentName.includes(normalizedBasename)
542
+ })
543
+ })
544
+ }
545
+
546
+ async function openConversationBySearch(options, artifacts) {
547
+ const initial = await capture('window', options, 'window-before-search')
548
+ artifacts.push(initial.file)
549
+ try {
550
+ const initialTitleOcr = await recognizeScreenshot(initial, options, 'title-confirmation')
551
+ if (findTitleConfirmation(initialTitleOcr.observations, options.group)) {
552
+ return initial
553
+ }
554
+ } catch (error) {
555
+ process.stderr.write(`Initial title OCR was skipped: ${error instanceof Error ? error.message : String(error)}\n`)
556
+ }
557
+
558
+ await click(geometryPoint(initial.payload, 'search'), options)
559
+ await sleep(180)
560
+ await press('^a', options)
561
+ await pasteText(options.group, options)
562
+ await sleep(650)
563
+
564
+ const searchCapture = await capture('window', options, 'search-results')
565
+ artifacts.push(searchCapture.file)
566
+ const searchOcr = await recognizeScreenshot(searchCapture, options, 'search-results')
567
+ const target = findConversationInSearchResults(searchOcr.observations, options.group)
568
+ if (!target) {
569
+ throw new Error(`Could not find target group '${options.group}' under search result section '群聊'. See ${searchCapture.file}`)
570
+ }
571
+ const targetPoint = pointFromObservation(searchCapture.payload, target, searchCapture.imageSize)
572
+ await click(targetPoint, options, ['--no-raise'])
573
+ await sleep(350)
574
+ // WeChat 4.x can keep the transient search result panel open after clicking a result.
575
+ // Close it explicitly so later input/send geometry always targets the conversation composer.
576
+ await press('{ESC}', options)
577
+ await sleep(500)
578
+
579
+ const opened = await capture('window', options, 'opened-conversation')
580
+ artifacts.push(opened.file)
581
+ const titleOcr = await recognizeScreenshot(opened, options, 'title-confirmation')
582
+ if (!findTitleConfirmation(titleOcr.observations, options.group)) {
583
+ // Some OCR models miss the top title but the click may still be correct. Keep the debug capture and continue
584
+ // only if the caller explicitly opted out of strict confirmation.
585
+ if (!options.allowWeakTitle) throw new Error(`Opened conversation title was not confirmed as '${options.group}'. See ${opened.file}`)
586
+ }
587
+ return opened
588
+ }
589
+
590
+ export async function runVisualFlow(input) {
591
+ const captureDir = path.resolve(input.captureDir || fs.mkdtempSync(path.join(fs.realpathSync(process.env.TEMP || process.env.TMP || '/tmp'), 'shennian-wechat-rpa-win-visual-')))
592
+ fs.mkdirSync(captureDir, { recursive: true })
593
+ const options = {
594
+ ...input,
595
+ captureDir,
596
+ step: 1,
597
+ recentLimit: Number(input.recentLimit || 5),
598
+ ocrTimeoutMs: Number(input.ocrTimeoutMs || 45_000),
599
+ openTimeoutMs: Number(input.openTimeoutMs || 12_000),
600
+ }
601
+ if (!options.group) throw new Error('--group is required')
602
+ const artifacts = []
603
+
604
+ const opened = await openConversationBySearch(options, artifacts)
605
+ const messageCapture = await capture('messages', options, 'messages-before-send')
606
+ artifacts.push(messageCapture.file)
607
+ const messageOcr = await recognizeScreenshot(messageCapture, options, 'message-read')
608
+ let recentMessages = summarizeObservations(messageOcr.observations, options.recentLimit)
609
+ const localization = await localizeInboundAttachments(recentMessages, messageCapture, options)
610
+ recentMessages = localization.recentMessages
611
+
612
+ const sent = []
613
+ if (!options.dryRun && options.replyText) {
614
+ await click(geometryPoint(opened.payload, 'input'), options)
615
+ await pasteText(options.replyText, options)
616
+ await sleep(250)
617
+ await press('{ENTER}', options)
618
+ await sleep(900)
619
+ const confirmCapture = await capture('window', options, 'after-text-send')
620
+ artifacts.push(confirmCapture.file)
621
+ const confirmOcr = await recognizeScreenshot(confirmCapture, options, 'send-confirmation')
622
+ if (!observationsContainText(confirmOcr.observations, options.replyText)) {
623
+ throw new Error(`Sent text was not confirmed by OCR: '${options.replyText}'. See ${confirmCapture.file}`)
624
+ }
625
+ sent.push({ type: 'text', text: options.replyText, observations: summarizeObservations(confirmOcr.observations, options.recentLimit) })
626
+ }
627
+
628
+ if (!options.dryRun && options.files?.length) {
629
+ const confirmedFiles = []
630
+ const sentAttachments = []
631
+ const confirmationObservations = []
632
+ for (const [index, file] of options.files.entries()) {
633
+ await click(geometryPoint(opened.payload, 'input'), options)
634
+ await pasteFiles([file], options)
635
+ await sleep(850)
636
+ const pendingCapture = await capture('input', options, `pending-file-${index + 1}`)
637
+ artifacts.push(pendingCapture.file)
638
+ await click(geometryPoint(opened.payload, 'send'), options)
639
+ await sleep(postPasteDelayMs(file))
640
+ const fileConfirmCapture = await capture('window', options, `after-file-${index + 1}-send`)
641
+ artifacts.push(fileConfirmCapture.file)
642
+ const fileConfirmOcr = await recognizeScreenshot(fileConfirmCapture, options, 'send-confirmation')
643
+ const missing = missingConfirmedFiles(fileConfirmOcr.observations, [file])
644
+ if (missing.length > 0) {
645
+ throw new Error(`Sent file was not confirmed by OCR: ${basenameForAnyPlatform(file)}. See ${fileConfirmCapture.file}`)
646
+ }
647
+ confirmedFiles.push(file)
648
+ sentAttachments.push(classifyOutboundFile(file))
649
+ confirmationObservations.push(...summarizeObservations(fileConfirmOcr.observations, options.recentLimit))
650
+ }
651
+ sent.push({
652
+ type: 'files',
653
+ files: confirmedFiles,
654
+ attachments: sentAttachments,
655
+ observations: confirmationObservations.slice(-options.recentLimit),
656
+ })
657
+ }
658
+
659
+ const summary = {
660
+ ok: true,
661
+ group: options.group,
662
+ captureDir,
663
+ recentMessages,
664
+ downloads: localization.downloads,
665
+ sent,
666
+ artifacts,
667
+ }
668
+ fs.writeFileSync(path.join(captureDir, 'summary.json'), `${JSON.stringify(summary, null, 2)}\n`)
669
+ return summary
670
+ }
671
+
672
+ async function main() {
673
+ const argv = process.argv.slice(2)
674
+ if (argv.length === 0 || argv.includes('--help') || argv.includes('-h')) {
675
+ printHelp()
676
+ return
677
+ }
678
+ const files = takeMany(argv, '--file')
679
+ const options = {
680
+ group: takeOption(argv, '--group'),
681
+ replyText: takeOption(argv, '--reply-text'),
682
+ recentLimit: takeOption(argv, '--recent-limit') || '5',
683
+ captureDir: takeOption(argv, '--capture-dir'),
684
+ downloadAttachmentsDir: takeOption(argv, '--download-attachments-dir'),
685
+ ocrUrl: takeOption(argv, '--ocr-url'),
686
+ token: takeOption(argv, '--token'),
687
+ ocrFixture: takeOption(argv, '--ocr-fixture'),
688
+ helper: takeOption(argv, '--helper'),
689
+ channelId: takeOption(argv, '--channel-id'),
690
+ ocrTimeoutMs: Number(takeOption(argv, '--ocr-timeout-ms') || 45_000),
691
+ openTimeoutMs: Number(takeOption(argv, '--open-timeout-ms') || 12_000),
692
+ allowWeakTitle: takeFlag(argv, '--allow-weak-title'),
693
+ downloadAttachments: !takeFlag(argv, '--no-download-attachments'),
694
+ dryRun: takeFlag(argv, '--dry-run'),
695
+ files,
696
+ }
697
+ if (argv.length) throw new Error(`Unknown arguments: ${argv.join(' ')}`)
698
+ const summary = await runVisualFlow(options)
699
+ process.stdout.write(`${JSON.stringify(summary, null, 2)}\n`)
700
+ }
701
+
702
+ if (process.argv[1] && path.resolve(process.argv[1]) === fileURLToPath(import.meta.url)) {
703
+ main().catch(error => {
704
+ console.error(error instanceof Error ? error.message : String(error))
705
+ process.exit(1)
706
+ })
707
+ }