@dcrays/dcgchat-test 0.3.23 → 0.3.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/utils/searchFile.ts +53 -59
package/package.json
CHANGED
package/src/utils/searchFile.ts
CHANGED
|
@@ -4,11 +4,7 @@ import fs from 'node:fs'
|
|
|
4
4
|
import os from 'node:os'
|
|
5
5
|
import path from 'node:path'
|
|
6
6
|
|
|
7
|
-
/**
|
|
8
|
-
* 从文本中提取 /mobook 目录下的文件
|
|
9
|
-
* @param {string} text
|
|
10
|
-
* @returns {string[]}
|
|
11
|
-
*/
|
|
7
|
+
/** 参与提取的常见文件扩展名 */
|
|
12
8
|
const EXT_LIST = [
|
|
13
9
|
// 文档类
|
|
14
10
|
'doc',
|
|
@@ -92,11 +88,45 @@ const EXT_LIST = [
|
|
|
92
88
|
*/
|
|
93
89
|
const EXT_SORTED_FOR_REGEX = [...EXT_LIST].sort((a, b) => b.length - a.length)
|
|
94
90
|
|
|
91
|
+
/** 正则交替串(长扩展名优先) */
|
|
92
|
+
const EXT_ALT = `(${EXT_SORTED_FOR_REGEX.join('|')})`
|
|
93
|
+
/** 文件名片段:中文、常见符号、非贪婪 */
|
|
94
|
+
const FILE_NAME_CLASS = `[\\w\\u4e00-\\u9fa5::《》()()\\-\\s]+?`
|
|
95
|
+
|
|
96
|
+
/** 预编译,避免 extractMobookFiles 每次调用重复构建正则 */
|
|
97
|
+
const RX_EXTRACT = {
|
|
98
|
+
backtick: new RegExp(`\`([^\\\`]+?\\.${EXT_ALT})\``, 'gi'),
|
|
99
|
+
fullPath: new RegExp(`/mobook/${FILE_NAME_CLASS}\\.${EXT_ALT}`, 'gi'),
|
|
100
|
+
winMobook: new RegExp(`(?:[a-zA-Z]:)?[/\\\\]mobook[/\\\\]${FILE_NAME_CLASS}\\.${EXT_ALT}`, 'gi'),
|
|
101
|
+
inline: new RegExp(`mobook下的\\s*(${FILE_NAME_CLASS}\\.${EXT_ALT})`, 'gi'),
|
|
102
|
+
bold: new RegExp(`\\*\\*(${FILE_NAME_CLASS}\\.${EXT_ALT})\\*\\*`, 'gi'),
|
|
103
|
+
loose: new RegExp(`(${FILE_NAME_CLASS}\\.${EXT_ALT})\\s*\\(`, 'gi'),
|
|
104
|
+
inlineFile: new RegExp(`${FILE_NAME_CLASS}\\.${EXT_ALT}`, 'i')
|
|
105
|
+
}
|
|
106
|
+
|
|
95
107
|
/** 去除控制符、零宽字符等常见脏值 */
|
|
96
108
|
function stripMobookNoise(s: string) {
|
|
97
109
|
return s.replace(/[\u0000-\u001F\u007F\u200B-\u200D\u200E\u200F\uFEFF]/g, '')
|
|
98
110
|
}
|
|
99
111
|
|
|
112
|
+
/**
|
|
113
|
+
* 从路径或 mobook 引用中取出「提到的文件名」:去掉盘符、/mobook、\\mobook\\ 等前缀后取 basename
|
|
114
|
+
*/
|
|
115
|
+
function toMobookReferencedBasename(p: string): string {
|
|
116
|
+
let s = stripMobookNoise(p).trim()
|
|
117
|
+
if (!s) return ''
|
|
118
|
+
s = s.replace(/^(?:[a-zA-Z]:)?[/\\]+mobook[/\\]/i, '')
|
|
119
|
+
s = s.replace(/^\/mobook\//i, '')
|
|
120
|
+
s = s.replace(/\\/g, '/')
|
|
121
|
+
const parts = s.split('/').filter(Boolean)
|
|
122
|
+
return parts.length ? parts[parts.length - 1]! : s
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
function addMobookMentionedFile(result: Set<string>, raw: string) {
|
|
126
|
+
const base = toMobookReferencedBasename(raw)
|
|
127
|
+
if (base && isValidFileName(base)) result.add(base)
|
|
128
|
+
}
|
|
129
|
+
|
|
100
130
|
/**
|
|
101
131
|
* 从文本中扫描 `.../mobook/...` 或 `...\mobook\...` 片段,按最长后缀匹配合法扩展名(兜底)
|
|
102
132
|
*/
|
|
@@ -132,7 +162,7 @@ function collectMobookPathsAfterNeedle(text: string, lower: string, needle: stri
|
|
|
132
162
|
const base = raw.slice(0, -(matchedExt.length + 1))
|
|
133
163
|
const fileName = `${base}.${matchedExt}`
|
|
134
164
|
if (isValidFileName(fileName)) {
|
|
135
|
-
result
|
|
165
|
+
addMobookMentionedFile(result, fileName)
|
|
136
166
|
}
|
|
137
167
|
from = start + 1
|
|
138
168
|
}
|
|
@@ -144,62 +174,35 @@ function collectMobookPathsByScan(text: string, result: Set<string>): void {
|
|
|
144
174
|
collectMobookPathsAfterNeedle(text, lower, '\\mobook\\', result)
|
|
145
175
|
}
|
|
146
176
|
|
|
177
|
+
/**
|
|
178
|
+
* 从文本中提取提到的 mobook 相关文件名(仅 basename,不含目录)
|
|
179
|
+
* @param text 原始文本
|
|
180
|
+
* @returns 去重后的文件名列表,例如 `['报告.pdf', 'data.xlsx']`
|
|
181
|
+
*/
|
|
147
182
|
export function extractMobookFiles(text = '') {
|
|
148
183
|
if (typeof text !== 'string' || !text.trim()) return []
|
|
149
184
|
// 全角冒号(中文输入常见)→ 半角,便于匹配 c:\mobook\
|
|
150
185
|
text = text.replace(/\uFF1A/g, ':')
|
|
151
186
|
const result = new Set<string>()
|
|
152
|
-
// ✅ 扩展名(必须长扩展名优先,见 EXT_SORTED_FOR_REGEX)
|
|
153
|
-
const EXT = `(${EXT_SORTED_FOR_REGEX.join('|')})`
|
|
154
|
-
// ✅ 文件名字符(增强:支持中文、符号)
|
|
155
|
-
const FILE_NAME = `[\\w\\u4e00-\\u9fa5::《》()()\\-\\s]+?`
|
|
156
187
|
try {
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
;(text.match(backtickReg) || []).forEach((item) => {
|
|
160
|
-
const name = item.replace(/`/g, '').trim()
|
|
161
|
-
if (isValidFileName(name)) {
|
|
162
|
-
result.add(`${name}`)
|
|
163
|
-
}
|
|
164
|
-
})
|
|
165
|
-
// 2️⃣ /mobook/xxx.xxx
|
|
166
|
-
const fullPathReg = new RegExp(`/mobook/${FILE_NAME}\\.${EXT}`, 'gi')
|
|
167
|
-
;(text.match(fullPathReg) || []).forEach((p) => {
|
|
168
|
-
result.add(normalizePath(p))
|
|
188
|
+
;(text.match(RX_EXTRACT.backtick) || []).forEach((item) => {
|
|
189
|
+
addMobookMentionedFile(result, item.replace(/`/g, '').trim())
|
|
169
190
|
})
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
;(text.match(winMobookReg) || []).forEach((full) => {
|
|
191
|
+
;(text.match(RX_EXTRACT.fullPath) || []).forEach((p) => addMobookMentionedFile(result, p))
|
|
192
|
+
;(text.match(RX_EXTRACT.winMobook) || []).forEach((full) => {
|
|
173
193
|
const name = full.replace(/^(?:[a-zA-Z]:)?[/\\\\]mobook[/\\\\]/i, '').trim()
|
|
174
|
-
|
|
175
|
-
result.add(normalizePath(`${name}`))
|
|
176
|
-
}
|
|
194
|
+
addMobookMentionedFile(result, name)
|
|
177
195
|
})
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
const match = item.match(new RegExp(`${FILE_NAME}\\.${EXT}`, 'i'))
|
|
182
|
-
if (match && isValidFileName(match[0])) {
|
|
183
|
-
result.add(`/mobook/${match[0].trim()}`)
|
|
184
|
-
}
|
|
196
|
+
;(text.match(RX_EXTRACT.inline) || []).forEach((item) => {
|
|
197
|
+
const match = item.match(RX_EXTRACT.inlineFile)
|
|
198
|
+
if (match?.[0]) addMobookMentionedFile(result, match[0].trim())
|
|
185
199
|
})
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
;(text.match(boldReg) || []).forEach((item) => {
|
|
189
|
-
const name = item.replace(/\*\*/g, '').trim()
|
|
190
|
-
if (isValidFileName(name)) {
|
|
191
|
-
result.add(`${name}`)
|
|
192
|
-
}
|
|
200
|
+
;(text.match(RX_EXTRACT.bold) || []).forEach((item) => {
|
|
201
|
+
addMobookMentionedFile(result, item.replace(/\*\*/g, '').trim())
|
|
193
202
|
})
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
;(text.match(looseReg) || []).forEach((item) => {
|
|
197
|
-
const name = item.replace(/\s*\(.+$/, '').trim()
|
|
198
|
-
if (isValidFileName(name)) {
|
|
199
|
-
result.add(`${name}`)
|
|
200
|
-
}
|
|
203
|
+
;(text.match(RX_EXTRACT.loose) || []).forEach((item) => {
|
|
204
|
+
addMobookMentionedFile(result, item.replace(/\s*\(.+$/, '').trim())
|
|
201
205
|
})
|
|
202
|
-
// 6️⃣ 兜底:绝对路径等 `.../mobook/<文件名>.<扩展名>` + 最长后缀匹配 + 去脏字符
|
|
203
206
|
collectMobookPathsByScan(text, result)
|
|
204
207
|
} catch (e) {
|
|
205
208
|
dcgLogger(`extractMobookFiles error:${e}`)
|
|
@@ -222,15 +225,6 @@ function isValidFileName(name: string) {
|
|
|
222
225
|
return true
|
|
223
226
|
}
|
|
224
227
|
|
|
225
|
-
/**
|
|
226
|
-
* 规范路径(去重用)
|
|
227
|
-
*/
|
|
228
|
-
function normalizePath(path: string) {
|
|
229
|
-
return path
|
|
230
|
-
.replace(/\/+/g, '/') // 多斜杠 → 单斜杠
|
|
231
|
-
.replace(/\/$/, '') // 去掉结尾 /
|
|
232
|
-
}
|
|
233
|
-
|
|
234
228
|
/** mobook 下按文件名递归查找:仅在直线路径均失败时调用;有深度/目录数上限,找到即停 */
|
|
235
229
|
const MOBOOK_FIND_MAX_DEPTH = 10
|
|
236
230
|
const MOBOOK_FIND_MAX_DIRS = 2000
|