shennian 0.2.74 → 0.2.76

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,668 @@
1
+ #!/usr/bin/env node
2
+ // @arch docs/features/wechat-rpa-channel.md
3
+ // @test tests/wechat-rpa-win-visual.test.mjs
4
+
5
+ import { spawn } from 'node:child_process'
6
+ import crypto from 'node:crypto'
7
+ import fs from 'node:fs'
8
+ import path from 'node:path'
9
+ import { fileURLToPath } from 'node:url'
10
+ import { selectDownloadedAttachment } from './wechat-rpa-download-candidates.mjs'
11
+
12
+ const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..')
13
+ const helperBridge = path.join(repoRoot, 'scripts/wechat-rpa-win.mjs')
14
+
15
+ const SECTION_LABELS = new Set(['群聊', '联系人', '聊天记录', '公众号', '小程序', '朋友圈', '企业', '企业微信'])
16
+
17
+ function printHelp() {
18
+ console.log(`Usage:
19
+ node scripts/wechat-rpa-win-visual.mjs --group ABC --recent-limit 5
20
+ node scripts/wechat-rpa-win-visual.mjs --group ABC --reply-text "我是 AI" --ocr-url https://shennian.net/integrations/wechat-rpa/ocr --token <token>
21
+ node scripts/wechat-rpa-win-visual.mjs --group ABC --file C:\\tmp\\demo.png --file C:\\tmp\\demo.mp4 --ocr-fixture C:\\tmp\\search-ocr.json
22
+
23
+ Options:
24
+ --group <name> Required target group/conversation name.
25
+ --reply-text <text> Text to send after opening the group.
26
+ --file <path> File/image/video to send. Repeatable.
27
+ --recent-limit <n> Number of OCR message observations to include in summary. Default: 5.
28
+ --capture-dir <dir> Directory for screenshots and debug JSON. Default: temp shennian-wechat-rpa-win-visual-*.
29
+ --download-attachments-dir <dir>
30
+ Copy clicked inbound attachments into this directory.
31
+ --no-download-attachments
32
+ Keep inbound attachments as metadata-only/pending-download.
33
+ --ocr-url <url> Shennian OCR endpoint, e.g. /integrations/wechat-rpa/ocr.
34
+ --token <token> Bearer token for --ocr-url. Also reads WECHAT_RPA_OCR_TOKEN.
35
+ --ocr-fixture <path> Local OCR fixture JSON for deterministic selection tests/debugging.
36
+ --helper <path> Override native helper exe passed through to scripts/wechat-rpa-win.mjs.
37
+ --open-timeout-ms <n> Timeout waiting for title confirmation. Default: 12000.
38
+ --dry-run Open/read only; do not send reply-text/files.
39
+
40
+ This is the Windows commercial baseline visual RPA orchestrator: WeChat foreground + screenshot + OCR boxes + native click/paste/press.`)
41
+ }
42
+
43
+ function takeOption(argv, name) {
44
+ const index = argv.indexOf(name)
45
+ if (index < 0) return null
46
+ const value = argv[index + 1]
47
+ if (!value || value.startsWith('--')) throw new Error(`Missing value for ${name}`)
48
+ argv.splice(index, 2)
49
+ return value
50
+ }
51
+
52
+ function takeMany(argv, name) {
53
+ const values = []
54
+ for (;;) {
55
+ const value = takeOption(argv, name)
56
+ if (value === null) break
57
+ values.push(value)
58
+ }
59
+ return values
60
+ }
61
+
62
+ function takeFlag(argv, name) {
63
+ const index = argv.indexOf(name)
64
+ if (index < 0) return false
65
+ argv.splice(index, 1)
66
+ return true
67
+ }
68
+
69
+ function sleep(ms) {
70
+ return new Promise(resolve => setTimeout(resolve, ms))
71
+ }
72
+
73
+ export function normalizeConversationName(value) {
74
+ return String(value || '')
75
+ .replace(/[\s\u200b\u200c\u200d]+/g, '')
76
+ .replace(/[((]\d+[))]$/g, '')
77
+ .trim()
78
+ .toLowerCase()
79
+ }
80
+
81
+ function normalizeText(value) {
82
+ return String(value || '').replace(/\s+/g, ' ').trim()
83
+ }
84
+
85
+ function centerOfBox(box) {
86
+ return {
87
+ x: Number(box.x || 0) + Number(box.width || 0) / 2,
88
+ y: Number(box.y || 0) + Number(box.height || 0) / 2,
89
+ }
90
+ }
91
+
92
+ function observationText(row) {
93
+ return normalizeText(row?.text)
94
+ }
95
+
96
+ function observationConfidence(row) {
97
+ const value = Number(row?.confidence)
98
+ return Number.isFinite(value) ? Math.max(0, Math.min(1, value)) : 0.75
99
+ }
100
+
101
+ function isSection(row) {
102
+ const text = observationText(row)
103
+ return SECTION_LABELS.has(text) || /^(群聊|联系人|聊天记录|公众号|小程序|企业微信)$/.test(text)
104
+ }
105
+
106
+ export function findConversationInSearchResults(observations, targetName) {
107
+ const target = normalizeConversationName(targetName)
108
+ if (!target) throw new Error('targetName is required')
109
+ const rows = (Array.isArray(observations) ? observations : [])
110
+ .filter(row => row?.box && observationText(row))
111
+ .map((row, index) => ({ row, index, text: observationText(row), center: centerOfBox(row.box) }))
112
+ .sort((a, b) => a.center.y - b.center.y || a.center.x - b.center.x)
113
+
114
+ const sections = rows.filter(item => isSection(item.row))
115
+ const groupSection = sections.find(item => item.text === '群聊')
116
+ const nextSection = groupSection
117
+ ? sections.find(item => item.center.y > groupSection.center.y + 0.002)
118
+ : null
119
+
120
+ const exactCandidates = rows.filter(item => normalizeConversationName(item.text) === target)
121
+ const sectionCandidates = groupSection
122
+ ? exactCandidates.filter(item => {
123
+ if (item.center.y <= groupSection.center.y) return false
124
+ if (nextSection && item.center.y >= nextSection.center.y) return false
125
+ return true
126
+ })
127
+ : []
128
+
129
+ const candidates = sectionCandidates.length > 0 ? sectionCandidates : exactCandidates
130
+ if (candidates.length === 0) return null
131
+
132
+ return candidates
133
+ .map(item => {
134
+ const sectionDistance = groupSection ? Math.max(0, item.center.y - groupSection.center.y) : item.center.y
135
+ const horizontalPenalty = item.center.x <= 1
136
+ ? Math.abs(item.center.x - 0.2) * 0.5
137
+ : Math.abs(item.center.x - 200) * 0.005
138
+ const score =
139
+ observationConfidence(item.row) * 100
140
+ + (groupSection && item.center.y > groupSection.center.y ? 50 : 0)
141
+ - sectionDistance * 5
142
+ - horizontalPenalty
143
+ return { ...item, score }
144
+ })
145
+ .sort((a, b) => b.score - a.score)[0].row
146
+ }
147
+
148
+ export function findTitleConfirmation(observations, targetName) {
149
+ const target = normalizeConversationName(targetName)
150
+ const rows = Array.isArray(observations) ? observations : []
151
+ return rows.find(row => normalizeConversationName(observationText(row)) === target) || null
152
+ }
153
+
154
+ export function pointFromObservation(capturePayload, observation, imageSize) {
155
+ if (!capturePayload?.bounds) throw new Error('capture payload missing bounds')
156
+ if (!observation?.box) throw new Error('observation missing box')
157
+ const bounds = capturePayload.bounds
158
+ const width = Number(imageSize?.width || bounds.width)
159
+ const height = Number(imageSize?.height || bounds.height)
160
+ const scaleX = Number(bounds.width) / width
161
+ const scaleY = Number(bounds.height) / height
162
+ const rawX = Number(observation.box.x)
163
+ const rawY = Number(observation.box.y)
164
+ const rawWidth = Number(observation.box.width)
165
+ const rawHeight = Number(observation.box.height)
166
+ const normalizedBox = Math.max(Math.abs(rawX), Math.abs(rawY), Math.abs(rawWidth), Math.abs(rawHeight)) <= 1
167
+ const cx = normalizedBox ? (rawX + rawWidth / 2) * width : rawX + rawWidth / 2
168
+ const cy = normalizedBox ? (rawY + rawHeight / 2) * height : rawY + rawHeight / 2
169
+ return {
170
+ x: Math.round(Number(bounds.x) + cx * scaleX),
171
+ y: Math.round(Number(bounds.y) + cy * scaleY),
172
+ }
173
+ }
174
+
175
+ export function geometryPoint(capturePayload, kind) {
176
+ const bounds = capturePayload?.mainWindow?.bounds || capturePayload?.bounds
177
+ if (!bounds) throw new Error('capture payload missing main window bounds')
178
+ const presets = {
179
+ search: [0.186, 0.087],
180
+ // Right-side composer: avoid the left search overlay and click inside the text area.
181
+ input: [0.58, 0.92],
182
+ send: [0.932, 0.945],
183
+ }
184
+ const pair = presets[kind]
185
+ if (!pair) throw new Error(`Unknown geometry point: ${kind}`)
186
+ return {
187
+ x: Math.round(Number(bounds.x) + Number(bounds.width) * pair[0]),
188
+ y: Math.round(Number(bounds.y) + Number(bounds.height) * pair[1]),
189
+ }
190
+ }
191
+
192
+ export function assertUsableMainWindow(capturePayload, phase = 'capture') {
193
+ const bounds = capturePayload?.mainWindow?.bounds || capturePayload?.bounds
194
+ if (!bounds) throw new Error(`${phase}: capture payload missing main window bounds`)
195
+ const width = Number(bounds.width)
196
+ const height = Number(bounds.height)
197
+ if (width < 760 || height < 600) {
198
+ throw new Error(
199
+ `${phase}: WeChat chat main window is not available; got ${Math.round(width)}x${Math.round(height)}. ` +
200
+ 'The visible WeChat window looks like a login/prompt dialog, so the RPA flow was stopped.',
201
+ )
202
+ }
203
+ return capturePayload
204
+ }
205
+
206
+ export function readPngSize(filePath) {
207
+ const fd = fs.openSync(filePath, 'r')
208
+ try {
209
+ const buffer = Buffer.alloc(24)
210
+ fs.readSync(fd, buffer, 0, buffer.length, 0)
211
+ if (buffer.toString('ascii', 1, 4) !== 'PNG') throw new Error(`Not a PNG file: ${filePath}`)
212
+ return { width: buffer.readUInt32BE(16), height: buffer.readUInt32BE(20) }
213
+ } finally {
214
+ fs.closeSync(fd)
215
+ }
216
+ }
217
+
218
+ async function runBridge(args, options) {
219
+ const finalArgs = [helperBridge, ...args]
220
+ if (options.helper) finalArgs.push('--helper', options.helper)
221
+ const child = spawn(process.execPath, finalArgs, {
222
+ cwd: repoRoot,
223
+ windowsHide: true,
224
+ stdio: ['ignore', 'pipe', 'pipe'],
225
+ })
226
+ let stdout = ''
227
+ let stderr = ''
228
+ child.stdout.setEncoding('utf8')
229
+ child.stderr.setEncoding('utf8')
230
+ child.stdout.on('data', chunk => { stdout += chunk })
231
+ child.stderr.on('data', chunk => { stderr += chunk })
232
+ const exitCode = await new Promise((resolve, reject) => {
233
+ child.on('error', reject)
234
+ child.on('close', resolve)
235
+ })
236
+ if (stderr.trim()) process.stderr.write(stderr)
237
+ if (exitCode !== 0) {
238
+ throw new Error(`wechat-rpa-win command failed (${exitCode}): ${args.join(' ')}\n${stdout}`)
239
+ }
240
+ const trimmed = stdout.trim()
241
+ if (!trimmed) return null
242
+ try {
243
+ return JSON.parse(trimmed)
244
+ } catch (error) {
245
+ throw new Error(`Invalid helper JSON for ${args.join(' ')}: ${error.message}\n${stdout}`)
246
+ }
247
+ }
248
+
249
+ async function capture(region, options, label) {
250
+ const file = path.join(options.captureDir, `${String(options.step++).padStart(2, '0')}-${label || region}.png`)
251
+ const result = await runBridge(['capture', '--region', region, '--output', file], options)
252
+ if (region === 'window') assertUsableMainWindow(result?.payload, label || region)
253
+ return { result, payload: result?.payload, file, imageSize: readPngSize(file) }
254
+ }
255
+
256
+ async function click(point, options, extra = []) {
257
+ return runBridge(['click', '--x', String(point.x), '--y', String(point.y), ...extra], options)
258
+ }
259
+
260
+ async function pasteText(text, options) {
261
+ return runBridge(['paste-text', '--text', text], options)
262
+ }
263
+
264
+ async function pasteFiles(files, options) {
265
+ return runBridge(['paste-files', ...files.flatMap(file => ['--file', file])], options)
266
+ }
267
+
268
+ async function press(keys, options) {
269
+ return runBridge(['press', '--keys', keys], options)
270
+ }
271
+
272
+ async function recognizeScreenshot(capture, options, purpose) {
273
+ if (options.ocrFixture) {
274
+ const fixture = JSON.parse(fs.readFileSync(options.ocrFixture, 'utf8'))
275
+ if (Array.isArray(fixture)) return { ok: true, purpose, observations: fixture }
276
+ if (fixture[purpose]) return fixture[purpose]
277
+ if (Array.isArray(fixture.observations)) return fixture
278
+ if (fixture && typeof fixture === 'object') {
279
+ throw new Error(`OCR fixture does not contain purpose '${purpose}'.`)
280
+ }
281
+ return fixture
282
+ }
283
+ if (!options.ocrUrl) {
284
+ throw new Error('OCR is required for visual flow. Pass --ocr-url/--token or --ocr-fixture.')
285
+ }
286
+ const token = options.token || process.env.WECHAT_RPA_OCR_TOKEN || ''
287
+ if (!token) throw new Error('Missing OCR bearer token. Pass --token or WECHAT_RPA_OCR_TOKEN.')
288
+ const imageBase64 = fs.readFileSync(capture.file).toString('base64')
289
+ const response = await fetch(options.ocrUrl, {
290
+ method: 'POST',
291
+ headers: {
292
+ 'Content-Type': 'application/json',
293
+ Authorization: `Bearer ${token}`,
294
+ },
295
+ body: JSON.stringify({
296
+ imageBase64,
297
+ mimeType: 'image/png',
298
+ purpose,
299
+ conversationName: options.group,
300
+ channelId: options.channelId,
301
+ }),
302
+ signal: AbortSignal.timeout(options.ocrTimeoutMs),
303
+ })
304
+ if (!response.ok) throw new Error(`OCR request failed: ${response.status} ${await response.text()}`)
305
+ const json = await response.json()
306
+ const debugPath = capture.file.replace(/\.png$/i, `-${purpose}.ocr.json`)
307
+ fs.writeFileSync(debugPath, `${JSON.stringify(json, null, 2)}\n`)
308
+ return json
309
+ }
310
+
311
+ function summarizeObservations(observations, limit) {
312
+ return (Array.isArray(observations) ? observations : [])
313
+ .filter(row => observationText(row))
314
+ .slice(-limit)
315
+ .map(row => ({
316
+ text: observationText(row),
317
+ role: row.role || 'unknown',
318
+ confidence: observationConfidence(row),
319
+ ...(row.attachment ? { attachment: row.attachment } : {}),
320
+ box: row.box,
321
+ }))
322
+ }
323
+
324
+ function normalizedMessageText(value) {
325
+ return String(value || '').replace(/\s+/g, '').toLowerCase()
326
+ }
327
+
328
+ export function observationsContainText(observations, text) {
329
+ const target = normalizedMessageText(text)
330
+ if (!target) return true
331
+ return (Array.isArray(observations) ? observations : []).some(row => {
332
+ const content = normalizedMessageText(observationText(row))
333
+ return content.includes(target) || target.includes(content)
334
+ })
335
+ }
336
+
337
+ function basenameForAnyPlatform(file) {
338
+ const value = String(file || '')
339
+ return value.includes('\\') ? path.win32.basename(value) : path.basename(value)
340
+ }
341
+
342
+ function safeFileName(name) {
343
+ return path.basename(name || 'attachment')
344
+ .normalize('NFKC')
345
+ .replace(/[<>:"/\\|?*\x00-\x1F]/g, '_')
346
+ .replace(/\s+/g, ' ')
347
+ .replace(/^[ ._]+|[ ._]+$/g, '')
348
+ || 'attachment'
349
+ }
350
+
351
+ function uniqueInboundPath(dir, name, hash) {
352
+ const safe = safeFileName(name || 'attachment')
353
+ const ext = path.extname(safe)
354
+ const stem = ext ? safe.slice(0, -ext.length) : safe
355
+ const candidate = path.join(dir, safe)
356
+ if (!fs.existsSync(candidate)) return candidate
357
+ return path.join(dir, `${stem}-${hash.slice(0, 12)}${ext}`)
358
+ }
359
+
360
+ function mimeTypeFromExt(ext) {
361
+ const value = String(ext || '').toLowerCase()
362
+ if (['.jpg', '.jpeg'].includes(value)) return 'image/jpeg'
363
+ if (value === '.png') return 'image/png'
364
+ if (value === '.gif') return 'image/gif'
365
+ if (value === '.webp') return 'image/webp'
366
+ if (value === '.mp4') return 'video/mp4'
367
+ if (value === '.mov') return 'video/quicktime'
368
+ if (value === '.pdf') return 'application/pdf'
369
+ if (value === '.txt') return 'text/plain'
370
+ if (value === '.docx') return 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
371
+ if (value === '.xlsx') return 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
372
+ if (value === '.pptx') return 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
373
+ if (value === '.zip') return 'application/zip'
374
+ return 'application/octet-stream'
375
+ }
376
+
377
+ function attachmentTypeFromExt(ext) {
378
+ if (IMAGE_EXTENSIONS.has(String(ext || '').toLowerCase())) return 'image'
379
+ if (VIDEO_EXTENSIONS.has(String(ext || '').toLowerCase())) return 'video'
380
+ return 'file'
381
+ }
382
+
383
+ const IMAGE_EXTENSIONS = new Set(['.apng', '.avif', '.bmp', '.gif', '.heic', '.heif', '.jpeg', '.jpg', '.png', '.tif', '.tiff', '.webp'])
384
+ const VIDEO_EXTENSIONS = new Set(['.3g2', '.3gp', '.avi', '.m4v', '.mkv', '.mov', '.mp4', '.mpeg', '.mpg', '.webm', '.wmv'])
385
+
386
+ export function classifyOutboundFile(file) {
387
+ const name = basenameForAnyPlatform(file)
388
+ const ext = path.extname(name).toLowerCase()
389
+ const type = IMAGE_EXTENSIONS.has(ext) ? 'image' : VIDEO_EXTENSIONS.has(ext) ? 'video' : 'file'
390
+ return {
391
+ type,
392
+ name,
393
+ localPath: file,
394
+ availability: 'edge-local',
395
+ }
396
+ }
397
+
398
+ function candidateDownloadRoots() {
399
+ const home = process.env.USERPROFILE || process.env.HOME || ''
400
+ return [
401
+ process.env.USERPROFILE ? path.win32.join(process.env.USERPROFILE, 'Downloads') : '',
402
+ process.env.USERPROFILE ? path.win32.join(process.env.USERPROFILE, 'Documents') : '',
403
+ process.env.USERPROFILE ? path.win32.join(process.env.USERPROFILE, 'Desktop') : '',
404
+ process.env.USERPROFILE ? path.win32.join(process.env.USERPROFILE, 'Documents', 'WeChat Files') : '',
405
+ home && !process.env.USERPROFILE ? path.join(home, 'Downloads') : '',
406
+ ].filter((root, index, roots) => root && roots.indexOf(root) === index && fs.existsSync(root))
407
+ }
408
+
409
+ function walkFiles(root, maxDepth, limit) {
410
+ const result = []
411
+ const stack = [{ dir: root, depth: 0 }]
412
+ while (stack.length && result.length < limit) {
413
+ const current = stack.pop()
414
+ let entries = []
415
+ try {
416
+ entries = fs.readdirSync(current.dir, { withFileTypes: true })
417
+ } catch {
418
+ continue
419
+ }
420
+ for (const entry of entries) {
421
+ const filePath = path.join(current.dir, entry.name)
422
+ if (entry.isDirectory()) {
423
+ if (current.depth < maxDepth) stack.push({ dir: filePath, depth: current.depth + 1 })
424
+ continue
425
+ }
426
+ if (!entry.isFile()) continue
427
+ try {
428
+ const stat = fs.statSync(filePath)
429
+ result.push({ path: filePath, size: stat.size, mtimeMs: stat.mtimeMs })
430
+ } catch {
431
+ // Ignore transient files while WeChat is writing them.
432
+ }
433
+ }
434
+ }
435
+ return result
436
+ }
437
+
438
+ function snapshotDownloadCandidates() {
439
+ const files = new Map()
440
+ for (const root of candidateDownloadRoots()) {
441
+ for (const file of walkFiles(root, 4, 5000)) {
442
+ files.set(file.path, file)
443
+ }
444
+ }
445
+ return files
446
+ }
447
+
448
+ export function copyInboundAttachment(source, targetDir, attachment) {
449
+ const buffer = fs.readFileSync(source.path)
450
+ const hash = crypto.createHash('sha256').update(buffer).digest('hex')
451
+ const sourceName = path.basename(source.path)
452
+ const preferredName = attachment?.name && /\.[\p{L}\p{N}]+$/u.test(attachment.name) ? attachment.name : sourceName
453
+ const filePath = uniqueInboundPath(targetDir, preferredName, hash)
454
+ fs.mkdirSync(targetDir, { recursive: true })
455
+ if (!fs.existsSync(filePath)) fs.writeFileSync(filePath, buffer)
456
+ const ext = path.extname(filePath).toLowerCase()
457
+ return {
458
+ type: attachment?.type || attachmentTypeFromExt(ext),
459
+ name: path.basename(filePath),
460
+ mimeType: attachment?.mimeType || mimeTypeFromExt(ext),
461
+ size: buffer.byteLength,
462
+ localPath: filePath,
463
+ hash,
464
+ availability: 'edge-local',
465
+ }
466
+ }
467
+
468
+ async function localizeInboundAttachments(recentMessages, messageCapture, options) {
469
+ if (options.downloadAttachments === false || !options.downloadAttachmentsDir) {
470
+ return { recentMessages, downloads: [] }
471
+ }
472
+ const targetDir = path.resolve(options.downloadAttachmentsDir)
473
+ fs.mkdirSync(targetDir, { recursive: true })
474
+ let before = snapshotDownloadCandidates()
475
+ const downloads = []
476
+ const updated = []
477
+ for (const message of recentMessages) {
478
+ const attachment = message.attachment
479
+ if (!attachment || attachment.localPath || attachment.url || !message.box) {
480
+ updated.push(message)
481
+ continue
482
+ }
483
+ const startedAt = Date.now()
484
+ await click(pointFromObservation(messageCapture.payload, message, messageCapture.imageSize), options, ['--no-raise'])
485
+ await sleep(2_000)
486
+ await press('{ESC}', options).catch(() => {})
487
+ const downloaded = selectDownloadedAttachment(before, snapshotDownloadCandidates(), startedAt, attachment)
488
+ if (!downloaded) {
489
+ const pending = {
490
+ ...message,
491
+ attachment: {
492
+ ...attachment,
493
+ availability: 'pending-download',
494
+ providerError: 'No new downloaded file was observed after clicking attachment bubble',
495
+ },
496
+ }
497
+ updated.push(pending)
498
+ downloads.push({ text: message.text, ok: false, providerError: pending.attachment.providerError })
499
+ before = snapshotDownloadCandidates()
500
+ continue
501
+ }
502
+ const localized = copyInboundAttachment(downloaded, targetDir, attachment)
503
+ updated.push({ ...message, text: message.text || localized.name, attachment: localized })
504
+ downloads.push({ text: message.text, ok: true, sourcePath: downloaded.path, localPath: localized.localPath, size: localized.size })
505
+ before = snapshotDownloadCandidates()
506
+ }
507
+ return { recentMessages: updated, downloads }
508
+ }
509
+
510
+ export function missingConfirmedFiles(observations, files) {
511
+ const rows = Array.isArray(observations) ? observations : []
512
+ return (files || []).filter(file => {
513
+ const basename = basenameForAnyPlatform(file)
514
+ const normalizedBasename = normalizedMessageText(basename)
515
+ return !rows.some(row => {
516
+ const text = normalizedMessageText(observationText(row))
517
+ const attachmentName = normalizedMessageText(row?.attachment?.filename || row?.attachment?.name)
518
+ return text.includes(normalizedBasename) || attachmentName.includes(normalizedBasename)
519
+ })
520
+ })
521
+ }
522
+
523
+ async function openConversationBySearch(options, artifacts) {
524
+ const initial = await capture('window', options, 'window-before-search')
525
+ artifacts.push(initial.file)
526
+
527
+ await click(geometryPoint(initial.payload, 'search'), options)
528
+ await sleep(180)
529
+ await press('^a', options)
530
+ await pasteText(options.group, options)
531
+ await sleep(650)
532
+
533
+ const searchCapture = await capture('window', options, 'search-results')
534
+ artifacts.push(searchCapture.file)
535
+ const searchOcr = await recognizeScreenshot(searchCapture, options, 'search-results')
536
+ const target = findConversationInSearchResults(searchOcr.observations, options.group)
537
+ if (!target) {
538
+ throw new Error(`Could not find target group '${options.group}' under search result section '群聊'. See ${searchCapture.file}`)
539
+ }
540
+ const targetPoint = pointFromObservation(searchCapture.payload, target, searchCapture.imageSize)
541
+ await click(targetPoint, options, ['--no-raise'])
542
+ await sleep(350)
543
+ // WeChat 4.x can keep the transient search result panel open after clicking a result.
544
+ // Close it explicitly so later input/send geometry always targets the conversation composer.
545
+ await press('{ESC}', options)
546
+ await sleep(500)
547
+
548
+ const opened = await capture('window', options, 'opened-conversation')
549
+ artifacts.push(opened.file)
550
+ const titleOcr = await recognizeScreenshot(opened, options, 'title-confirmation')
551
+ if (!findTitleConfirmation(titleOcr.observations, options.group)) {
552
+ // Some OCR models miss the top title but the click may still be correct. Keep the debug capture and continue
553
+ // only if the caller explicitly opted out of strict confirmation.
554
+ if (!options.allowWeakTitle) throw new Error(`Opened conversation title was not confirmed as '${options.group}'. See ${opened.file}`)
555
+ }
556
+ return opened
557
+ }
558
+
559
+ export async function runVisualFlow(input) {
560
+ const captureDir = path.resolve(input.captureDir || fs.mkdtempSync(path.join(fs.realpathSync(process.env.TEMP || process.env.TMP || '/tmp'), 'shennian-wechat-rpa-win-visual-')))
561
+ fs.mkdirSync(captureDir, { recursive: true })
562
+ const options = {
563
+ ...input,
564
+ captureDir,
565
+ step: 1,
566
+ recentLimit: Number(input.recentLimit || 5),
567
+ ocrTimeoutMs: Number(input.ocrTimeoutMs || 45_000),
568
+ openTimeoutMs: Number(input.openTimeoutMs || 12_000),
569
+ }
570
+ if (!options.group) throw new Error('--group is required')
571
+ const artifacts = []
572
+
573
+ const opened = await openConversationBySearch(options, artifacts)
574
+ const messageCapture = await capture('messages', options, 'messages-before-send')
575
+ artifacts.push(messageCapture.file)
576
+ const messageOcr = await recognizeScreenshot(messageCapture, options, 'message-read')
577
+ let recentMessages = summarizeObservations(messageOcr.observations, options.recentLimit)
578
+ const localization = await localizeInboundAttachments(recentMessages, messageCapture, options)
579
+ recentMessages = localization.recentMessages
580
+
581
+ const sent = []
582
+ if (!options.dryRun && options.replyText) {
583
+ await click(geometryPoint(opened.payload, 'input'), options)
584
+ await pasteText(options.replyText, options)
585
+ await sleep(250)
586
+ await click(geometryPoint(opened.payload, 'send'), options)
587
+ await sleep(900)
588
+ const confirmCapture = await capture('messages', options, 'after-text-send')
589
+ artifacts.push(confirmCapture.file)
590
+ const confirmOcr = await recognizeScreenshot(confirmCapture, options, 'send-confirmation')
591
+ if (!observationsContainText(confirmOcr.observations, options.replyText)) {
592
+ throw new Error(`Sent text was not confirmed by OCR: '${options.replyText}'. See ${confirmCapture.file}`)
593
+ }
594
+ sent.push({ type: 'text', text: options.replyText, observations: summarizeObservations(confirmOcr.observations, options.recentLimit) })
595
+ }
596
+
597
+ if (!options.dryRun && options.files?.length) {
598
+ await click(geometryPoint(opened.payload, 'input'), options)
599
+ await pasteFiles(options.files, options)
600
+ await sleep(850)
601
+ const pendingCapture = await capture('input', options, 'pending-files')
602
+ artifacts.push(pendingCapture.file)
603
+ await click(geometryPoint(opened.payload, 'send'), options)
604
+ await sleep(1200)
605
+ const fileConfirmCapture = await capture('messages', options, 'after-file-send')
606
+ artifacts.push(fileConfirmCapture.file)
607
+ const fileConfirmOcr = await recognizeScreenshot(fileConfirmCapture, options, 'send-confirmation')
608
+ const missing = missingConfirmedFiles(fileConfirmOcr.observations, options.files)
609
+ if (missing.length > 0) {
610
+ throw new Error(`Sent files were not confirmed by OCR: ${missing.map(basenameForAnyPlatform).join(', ')}. See ${fileConfirmCapture.file}`)
611
+ }
612
+ sent.push({
613
+ type: 'files',
614
+ files: options.files,
615
+ attachments: options.files.map(classifyOutboundFile),
616
+ observations: summarizeObservations(fileConfirmOcr.observations, options.recentLimit),
617
+ })
618
+ }
619
+
620
+ const summary = {
621
+ ok: true,
622
+ group: options.group,
623
+ captureDir,
624
+ recentMessages,
625
+ downloads: localization.downloads,
626
+ sent,
627
+ artifacts,
628
+ }
629
+ fs.writeFileSync(path.join(captureDir, 'summary.json'), `${JSON.stringify(summary, null, 2)}\n`)
630
+ return summary
631
+ }
632
+
633
+ async function main() {
634
+ const argv = process.argv.slice(2)
635
+ if (argv.length === 0 || argv.includes('--help') || argv.includes('-h')) {
636
+ printHelp()
637
+ return
638
+ }
639
+ const files = takeMany(argv, '--file')
640
+ const options = {
641
+ group: takeOption(argv, '--group'),
642
+ replyText: takeOption(argv, '--reply-text'),
643
+ recentLimit: takeOption(argv, '--recent-limit') || '5',
644
+ captureDir: takeOption(argv, '--capture-dir'),
645
+ downloadAttachmentsDir: takeOption(argv, '--download-attachments-dir'),
646
+ ocrUrl: takeOption(argv, '--ocr-url'),
647
+ token: takeOption(argv, '--token'),
648
+ ocrFixture: takeOption(argv, '--ocr-fixture'),
649
+ helper: takeOption(argv, '--helper'),
650
+ channelId: takeOption(argv, '--channel-id'),
651
+ ocrTimeoutMs: Number(takeOption(argv, '--ocr-timeout-ms') || 45_000),
652
+ openTimeoutMs: Number(takeOption(argv, '--open-timeout-ms') || 12_000),
653
+ allowWeakTitle: takeFlag(argv, '--allow-weak-title'),
654
+ downloadAttachments: !takeFlag(argv, '--no-download-attachments'),
655
+ dryRun: takeFlag(argv, '--dry-run'),
656
+ files,
657
+ }
658
+ if (argv.length) throw new Error(`Unknown arguments: ${argv.join(' ')}`)
659
+ const summary = await runVisualFlow(options)
660
+ process.stdout.write(`${JSON.stringify(summary, null, 2)}\n`)
661
+ }
662
+
663
+ if (process.argv[1] && path.resolve(process.argv[1]) === fileURLToPath(import.meta.url)) {
664
+ main().catch(error => {
665
+ console.error(error instanceof Error ? error.message : String(error))
666
+ process.exit(1)
667
+ })
668
+ }