@dcrays/dcgchat-test 0.3.23 → 0.3.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/utils/searchFile.ts +58 -59
package/package.json
CHANGED
package/src/utils/searchFile.ts
CHANGED
|
@@ -4,11 +4,7 @@ import fs from 'node:fs'
|
|
|
4
4
|
import os from 'node:os'
|
|
5
5
|
import path from 'node:path'
|
|
6
6
|
|
|
7
|
-
/**
|
|
8
|
-
* 从文本中提取 /mobook 目录下的文件
|
|
9
|
-
* @param {string} text
|
|
10
|
-
* @returns {string[]}
|
|
11
|
-
*/
|
|
7
|
+
/** 参与提取的常见文件扩展名 */
|
|
12
8
|
const EXT_LIST = [
|
|
13
9
|
// 文档类
|
|
14
10
|
'doc',
|
|
@@ -92,11 +88,47 @@ const EXT_LIST = [
|
|
|
92
88
|
*/
|
|
93
89
|
const EXT_SORTED_FOR_REGEX = [...EXT_LIST].sort((a, b) => b.length - a.length)
|
|
94
90
|
|
|
91
|
+
/** 正则交替串(长扩展名优先) */
|
|
92
|
+
const EXT_ALT = `(${EXT_SORTED_FOR_REGEX.join('|')})`
|
|
93
|
+
/** 文件名片段:中文、常见符号、非贪婪 */
|
|
94
|
+
const FILE_NAME_CLASS = `[\\w\\u4e00-\\u9fa5::《》()()\\-\\s]+?`
|
|
95
|
+
|
|
96
|
+
/** 预编译,避免 extractMobookFiles 每次调用重复构建正则 */
|
|
97
|
+
const RX_EXTRACT = {
|
|
98
|
+
backtick: new RegExp(`\`([^\\\`]+?\\.${EXT_ALT})\``, 'gi'),
|
|
99
|
+
fullPath: new RegExp(`/mobook/${FILE_NAME_CLASS}\\.${EXT_ALT}`, 'gi'),
|
|
100
|
+
winMobook: new RegExp(`(?:[a-zA-Z]:)?[/\\\\]mobook[/\\\\]${FILE_NAME_CLASS}\\.${EXT_ALT}`, 'gi'),
|
|
101
|
+
inline: new RegExp(`mobook下的\\s*(${FILE_NAME_CLASS}\\.${EXT_ALT})`, 'gi'),
|
|
102
|
+
bold: new RegExp(`\\*\\*(${FILE_NAME_CLASS}\\.${EXT_ALT})\\*\\*`, 'gi'),
|
|
103
|
+
loose: new RegExp(`(${FILE_NAME_CLASS}\\.${EXT_ALT})\\s*\\(`, 'gi'),
|
|
104
|
+
/** Markdown 列表:`- 文件名.ext` — 用 matchAll 取捕获组 1,避免 FILE_NAME 含 `-`/空格 时误把「- 」并入文件名 */
|
|
105
|
+
markdownList: new RegExp(`[-*•]\\s+(${FILE_NAME_CLASS}\\.${EXT_ALT})`, 'gi'),
|
|
106
|
+
inlineFile: new RegExp(`${FILE_NAME_CLASS}\\.${EXT_ALT}`, 'i')
|
|
107
|
+
}
|
|
108
|
+
|
|
95
109
|
/** 去除控制符、零宽字符等常见脏值 */
|
|
96
110
|
function stripMobookNoise(s: string) {
|
|
97
111
|
return s.replace(/[\u0000-\u001F\u007F\u200B-\u200D\u200E\u200F\uFEFF]/g, '')
|
|
98
112
|
}
|
|
99
113
|
|
|
114
|
+
/**
|
|
115
|
+
* 从路径或 mobook 引用中取出「提到的文件名」:去掉盘符、/mobook、\\mobook\\ 等前缀后取 basename
|
|
116
|
+
*/
|
|
117
|
+
function toMobookReferencedBasename(p: string): string {
|
|
118
|
+
let s = stripMobookNoise(p).trim()
|
|
119
|
+
if (!s) return ''
|
|
120
|
+
s = s.replace(/^(?:[a-zA-Z]:)?[/\\]+mobook[/\\]/i, '')
|
|
121
|
+
s = s.replace(/^\/mobook\//i, '')
|
|
122
|
+
s = s.replace(/\\/g, '/')
|
|
123
|
+
const parts = s.split('/').filter(Boolean)
|
|
124
|
+
return parts.length ? parts[parts.length - 1]! : s
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
function addMobookMentionedFile(result: Set<string>, raw: string) {
|
|
128
|
+
const base = toMobookReferencedBasename(raw)
|
|
129
|
+
if (base && isValidFileName(base)) result.add(base)
|
|
130
|
+
}
|
|
131
|
+
|
|
100
132
|
/**
|
|
101
133
|
* 从文本中扫描 `.../mobook/...` 或 `...\mobook\...` 片段,按最长后缀匹配合法扩展名(兜底)
|
|
102
134
|
*/
|
|
@@ -132,7 +164,7 @@ function collectMobookPathsAfterNeedle(text: string, lower: string, needle: stri
|
|
|
132
164
|
const base = raw.slice(0, -(matchedExt.length + 1))
|
|
133
165
|
const fileName = `${base}.${matchedExt}`
|
|
134
166
|
if (isValidFileName(fileName)) {
|
|
135
|
-
result
|
|
167
|
+
addMobookMentionedFile(result, fileName)
|
|
136
168
|
}
|
|
137
169
|
from = start + 1
|
|
138
170
|
}
|
|
@@ -144,62 +176,38 @@ function collectMobookPathsByScan(text: string, result: Set<string>): void {
|
|
|
144
176
|
collectMobookPathsAfterNeedle(text, lower, '\\mobook\\', result)
|
|
145
177
|
}
|
|
146
178
|
|
|
179
|
+
/**
|
|
180
|
+
* 从文本中提取提到的 mobook 相关文件名(仅 basename,不含目录)
|
|
181
|
+
* @param text 原始文本
|
|
182
|
+
* @returns 去重后的文件名列表,例如 `['报告.pdf', 'data.xlsx']`
|
|
183
|
+
*/
|
|
147
184
|
export function extractMobookFiles(text = '') {
|
|
148
185
|
if (typeof text !== 'string' || !text.trim()) return []
|
|
149
186
|
// 全角冒号(中文输入常见)→ 半角,便于匹配 c:\mobook\
|
|
150
187
|
text = text.replace(/\uFF1A/g, ':')
|
|
151
188
|
const result = new Set<string>()
|
|
152
|
-
// ✅ 扩展名(必须长扩展名优先,见 EXT_SORTED_FOR_REGEX)
|
|
153
|
-
const EXT = `(${EXT_SORTED_FOR_REGEX.join('|')})`
|
|
154
|
-
// ✅ 文件名字符(增强:支持中文、符号)
|
|
155
|
-
const FILE_NAME = `[\\w\\u4e00-\\u9fa5::《》()()\\-\\s]+?`
|
|
156
189
|
try {
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
;(text.match(backtickReg) || []).forEach((item) => {
|
|
160
|
-
const name = item.replace(/`/g, '').trim()
|
|
161
|
-
if (isValidFileName(name)) {
|
|
162
|
-
result.add(`${name}`)
|
|
163
|
-
}
|
|
164
|
-
})
|
|
165
|
-
// 2️⃣ /mobook/xxx.xxx
|
|
166
|
-
const fullPathReg = new RegExp(`/mobook/${FILE_NAME}\\.${EXT}`, 'gi')
|
|
167
|
-
;(text.match(fullPathReg) || []).forEach((p) => {
|
|
168
|
-
result.add(normalizePath(p))
|
|
190
|
+
;(text.match(RX_EXTRACT.backtick) || []).forEach((item) => {
|
|
191
|
+
addMobookMentionedFile(result, item.replace(/`/g, '').trim())
|
|
169
192
|
})
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
;(text.match(winMobookReg) || []).forEach((full) => {
|
|
193
|
+
;(text.match(RX_EXTRACT.fullPath) || []).forEach((p) => addMobookMentionedFile(result, p))
|
|
194
|
+
;(text.match(RX_EXTRACT.winMobook) || []).forEach((full) => {
|
|
173
195
|
const name = full.replace(/^(?:[a-zA-Z]:)?[/\\\\]mobook[/\\\\]/i, '').trim()
|
|
174
|
-
|
|
175
|
-
result.add(normalizePath(`${name}`))
|
|
176
|
-
}
|
|
196
|
+
addMobookMentionedFile(result, name)
|
|
177
197
|
})
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
const match = item.match(new RegExp(`${FILE_NAME}\\.${EXT}`, 'i'))
|
|
182
|
-
if (match && isValidFileName(match[0])) {
|
|
183
|
-
result.add(`/mobook/${match[0].trim()}`)
|
|
184
|
-
}
|
|
198
|
+
;(text.match(RX_EXTRACT.inline) || []).forEach((item) => {
|
|
199
|
+
const match = item.match(RX_EXTRACT.inlineFile)
|
|
200
|
+
if (match?.[0]) addMobookMentionedFile(result, match[0].trim())
|
|
185
201
|
})
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
;(text.match(boldReg) || []).forEach((item) => {
|
|
189
|
-
const name = item.replace(/\*\*/g, '').trim()
|
|
190
|
-
if (isValidFileName(name)) {
|
|
191
|
-
result.add(`${name}`)
|
|
192
|
-
}
|
|
202
|
+
;(text.match(RX_EXTRACT.bold) || []).forEach((item) => {
|
|
203
|
+
addMobookMentionedFile(result, item.replace(/\*\*/g, '').trim())
|
|
193
204
|
})
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
;(text.match(looseReg) || []).forEach((item) => {
|
|
197
|
-
const name = item.replace(/\s*\(.+$/, '').trim()
|
|
198
|
-
if (isValidFileName(name)) {
|
|
199
|
-
result.add(`${name}`)
|
|
200
|
-
}
|
|
205
|
+
;(text.match(RX_EXTRACT.loose) || []).forEach((item) => {
|
|
206
|
+
addMobookMentionedFile(result, item.replace(/\s*\(.+$/, '').trim())
|
|
201
207
|
})
|
|
202
|
-
|
|
208
|
+
for (const m of text.matchAll(RX_EXTRACT.markdownList)) {
|
|
209
|
+
if (m[1]) addMobookMentionedFile(result, m[1].trim())
|
|
210
|
+
}
|
|
203
211
|
collectMobookPathsByScan(text, result)
|
|
204
212
|
} catch (e) {
|
|
205
213
|
dcgLogger(`extractMobookFiles error:${e}`)
|
|
@@ -222,15 +230,6 @@ function isValidFileName(name: string) {
|
|
|
222
230
|
return true
|
|
223
231
|
}
|
|
224
232
|
|
|
225
|
-
/**
|
|
226
|
-
* 规范路径(去重用)
|
|
227
|
-
*/
|
|
228
|
-
function normalizePath(path: string) {
|
|
229
|
-
return path
|
|
230
|
-
.replace(/\/+/g, '/') // 多斜杠 → 单斜杠
|
|
231
|
-
.replace(/\/$/, '') // 去掉结尾 /
|
|
232
|
-
}
|
|
233
|
-
|
|
234
233
|
/** mobook 下按文件名递归查找:仅在直线路径均失败时调用;有深度/目录数上限,找到即停 */
|
|
235
234
|
const MOBOOK_FIND_MAX_DEPTH = 10
|
|
236
235
|
const MOBOOK_FIND_MAX_DIRS = 2000
|