md-zh-translation-skill 1.2.3 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/anchor-normalization.d.ts +11 -0
- package/dist/src/anchor-normalization.js +558 -0
- package/dist/src/internal/prompts/scheme-h.d.ts +2 -0
- package/dist/src/internal/prompts/scheme-h.js +51 -0
- package/dist/src/markdown-protection.d.ts +1 -0
- package/dist/src/markdown-protection.js +30 -4
- package/dist/src/translate.d.ts +1 -1
- package/dist/src/translate.js +768 -66
- package/dist/src/translation-state.d.ts +180 -0
- package/dist/src/translation-state.js +235 -0
- package/package.json +1 -1
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { PromptSlice } from "./translation-state.js";
|
|
2
|
+
export type PromptAnchor = PromptSlice["requiredAnchors"][number];
|
|
3
|
+
type AnchorLike = Pick<PromptAnchor, "english" | "chineseHint">;
|
|
4
|
+
export declare function coalesceRequiredAnchors(requiredAnchors: readonly PromptAnchor[]): PromptAnchor[];
|
|
5
|
+
export declare function formatAnchorDisplay(anchor: AnchorLike): string;
|
|
6
|
+
export declare function normalizeSegmentAnchorText(text: string, slice: PromptSlice | null): string;
|
|
7
|
+
export declare function normalizeSourceSurfaceAnchorText(source: string, text: string, slice: PromptSlice | null): string;
|
|
8
|
+
export declare function injectPlannedAnchorText(source: string, text: string, slice: PromptSlice | null): string;
|
|
9
|
+
export declare function normalizeHeadingLikeAnchorText(source: string, text: string, slice: PromptSlice | null): string;
|
|
10
|
+
export declare function normalizeExplicitRepairAnchorText(source: string, text: string, slice: PromptSlice | null): string;
|
|
11
|
+
export {};
|
|
@@ -0,0 +1,558 @@
|
|
|
1
|
+
export function coalesceRequiredAnchors(requiredAnchors) {
|
|
2
|
+
return requiredAnchors.filter((anchor) => !isShadowedByLongerAnchor(anchor, requiredAnchors));
|
|
3
|
+
}
|
|
4
|
+
export function formatAnchorDisplay(anchor) {
|
|
5
|
+
return resolveAnchorDisplay(anchor).canonical;
|
|
6
|
+
}
|
|
7
|
+
export function normalizeSegmentAnchorText(text, slice) {
|
|
8
|
+
if (!slice) {
|
|
9
|
+
return text;
|
|
10
|
+
}
|
|
11
|
+
let normalized = text;
|
|
12
|
+
const requiredIds = new Set(slice.requiredAnchors.map((anchor) => anchor.anchorId));
|
|
13
|
+
const repeatIds = new Set([
|
|
14
|
+
...slice.repeatAnchors.map((anchor) => anchor.anchorId),
|
|
15
|
+
...slice.establishedAnchors.map((anchor) => anchor.anchorId)
|
|
16
|
+
]);
|
|
17
|
+
const anchors = dedupePromptAnchors([
|
|
18
|
+
...slice.requiredAnchors,
|
|
19
|
+
...slice.repeatAnchors,
|
|
20
|
+
...slice.establishedAnchors
|
|
21
|
+
]).sort((left, right) => right.english.length - left.english.length);
|
|
22
|
+
for (const anchor of anchors) {
|
|
23
|
+
normalized = normalizeSingleAnchor(normalized, anchor, requiredIds.has(anchor.anchorId), repeatIds.has(anchor.anchorId));
|
|
24
|
+
}
|
|
25
|
+
return normalizeRepeatedEnglishParenthesesWithLocalHints(normalized);
|
|
26
|
+
}
|
|
27
|
+
export function normalizeSourceSurfaceAnchorText(source, text, slice) {
|
|
28
|
+
if (!slice) {
|
|
29
|
+
return text;
|
|
30
|
+
}
|
|
31
|
+
const anchors = dedupePromptAnchors([
|
|
32
|
+
...slice.requiredAnchors,
|
|
33
|
+
...slice.repeatAnchors,
|
|
34
|
+
...slice.establishedAnchors
|
|
35
|
+
]);
|
|
36
|
+
if (anchors.length === 0) {
|
|
37
|
+
return text;
|
|
38
|
+
}
|
|
39
|
+
const sourceLines = source.split(/\r?\n/);
|
|
40
|
+
const translatedLines = text.split(/\r?\n/);
|
|
41
|
+
let changed = false;
|
|
42
|
+
for (let index = 0; index < Math.min(sourceLines.length, translatedLines.length); index += 1) {
|
|
43
|
+
const sourceLine = sourceLines[index] ?? "";
|
|
44
|
+
let translatedLine = translatedLines[index] ?? "";
|
|
45
|
+
const sourceAnchors = coalesceSourceLineAnchors(anchors.filter((anchor) => containsWholePhrase(sourceLine, anchor.english)));
|
|
46
|
+
if (sourceAnchors.length === 0) {
|
|
47
|
+
continue;
|
|
48
|
+
}
|
|
49
|
+
for (const sourceAnchor of sourceAnchors) {
|
|
50
|
+
const siblingVariants = anchors.filter((candidate) => candidate.familyId === sourceAnchor.familyId &&
|
|
51
|
+
candidate.anchorId !== sourceAnchor.anchorId &&
|
|
52
|
+
!containsWholePhrase(sourceLine, candidate.english));
|
|
53
|
+
for (const variant of siblingVariants) {
|
|
54
|
+
const normalizedLine = collapseUnexpectedFamilyVariant(translatedLine, variant, sourceAnchor);
|
|
55
|
+
if (normalizedLine !== translatedLine) {
|
|
56
|
+
translatedLine = normalizedLine;
|
|
57
|
+
changed = true;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
translatedLines[index] = translatedLine;
|
|
62
|
+
}
|
|
63
|
+
return changed ? translatedLines.join("\n") : text;
|
|
64
|
+
}
|
|
65
|
+
export function injectPlannedAnchorText(source, text, slice) {
|
|
66
|
+
if (!slice) {
|
|
67
|
+
return text;
|
|
68
|
+
}
|
|
69
|
+
const requiredAnchors = coalesceRequiredAnchors(dedupePromptAnchors(slice.requiredAnchors)).sort((left, right) => right.english.length - left.english.length);
|
|
70
|
+
if (requiredAnchors.length === 0) {
|
|
71
|
+
return text;
|
|
72
|
+
}
|
|
73
|
+
const sourceLines = source.split(/\r?\n/);
|
|
74
|
+
const translatedLines = text.split(/\r?\n/);
|
|
75
|
+
let changed = false;
|
|
76
|
+
for (let index = 0; index < Math.min(sourceLines.length, translatedLines.length); index += 1) {
|
|
77
|
+
const sourceLine = sourceLines[index] ?? "";
|
|
78
|
+
let translatedLine = translatedLines[index] ?? "";
|
|
79
|
+
const lineAnchors = coalesceSourceLineAnchors(requiredAnchors.filter((anchor) => containsWholePhrase(sourceLine, anchor.english)));
|
|
80
|
+
for (const anchor of lineAnchors) {
|
|
81
|
+
if (shouldSkipAnchorInjectionForCommandPhrase(sourceLine, anchor)) {
|
|
82
|
+
continue;
|
|
83
|
+
}
|
|
84
|
+
const injectedLine = injectAnchorIntoLine(translatedLine, anchor);
|
|
85
|
+
if (injectedLine !== translatedLine) {
|
|
86
|
+
translatedLine = injectedLine;
|
|
87
|
+
changed = true;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
translatedLines[index] = translatedLine;
|
|
91
|
+
}
|
|
92
|
+
return changed ? translatedLines.join("\n") : text;
|
|
93
|
+
}
|
|
94
|
+
export function normalizeHeadingLikeAnchorText(source, text, slice) {
|
|
95
|
+
if (!slice) {
|
|
96
|
+
return text;
|
|
97
|
+
}
|
|
98
|
+
const requiredAnchors = dedupePromptAnchors(slice.requiredAnchors).sort((left, right) => right.english.length - left.english.length);
|
|
99
|
+
if (requiredAnchors.length === 0) {
|
|
100
|
+
return text;
|
|
101
|
+
}
|
|
102
|
+
const sourceHeadingLines = extractHeadingLikeLines(source);
|
|
103
|
+
const translatedHeadingLines = extractHeadingLikeLines(text);
|
|
104
|
+
if (sourceHeadingLines.length === 0 || translatedHeadingLines.length === 0) {
|
|
105
|
+
return text;
|
|
106
|
+
}
|
|
107
|
+
let normalized = text;
|
|
108
|
+
for (let index = 0; index < Math.min(sourceHeadingLines.length, translatedHeadingLines.length); index += 1) {
|
|
109
|
+
const sourceLine = sourceHeadingLines[index];
|
|
110
|
+
const translatedLine = translatedHeadingLines[index];
|
|
111
|
+
const lineAnchors = coalesceSourceLineAnchors(requiredAnchors.filter((anchor) => containsWholePhrase(sourceLine.content, anchor.english)));
|
|
112
|
+
let normalizedLine = translatedLine.raw;
|
|
113
|
+
for (const anchor of lineAnchors) {
|
|
114
|
+
if (!anchor.chineseHint ||
|
|
115
|
+
anchor.chineseHint.toLowerCase() === anchor.english.toLowerCase() ||
|
|
116
|
+
!normalizedLine.includes(anchor.chineseHint) ||
|
|
117
|
+
containsWholePhrase(normalizedLine, anchor.english)) {
|
|
118
|
+
continue;
|
|
119
|
+
}
|
|
120
|
+
normalizedLine = normalizedLine.replace(anchor.chineseHint, `${anchor.chineseHint}(${anchor.english})`);
|
|
121
|
+
}
|
|
122
|
+
if (normalizedLine !== translatedLine.raw) {
|
|
123
|
+
normalized = normalized.replace(translatedLine.raw, normalizedLine);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
return normalized;
|
|
127
|
+
}
|
|
128
|
+
export function normalizeExplicitRepairAnchorText(source, text, slice) {
|
|
129
|
+
if (!slice || slice.pendingRepairs.length === 0) {
|
|
130
|
+
return text;
|
|
131
|
+
}
|
|
132
|
+
const targets = slice.pendingRepairs
|
|
133
|
+
.map((repair) => parseExplicitRepairTarget(repair.instruction))
|
|
134
|
+
.filter((target) => target !== null);
|
|
135
|
+
if (targets.length === 0) {
|
|
136
|
+
return text;
|
|
137
|
+
}
|
|
138
|
+
const sourceLines = source.split(/\r?\n/);
|
|
139
|
+
const translatedLines = text.split(/\r?\n/);
|
|
140
|
+
const sourceHeadingLines = extractHeadingLikeLines(source);
|
|
141
|
+
const translatedHeadingLines = extractHeadingLikeLines(text);
|
|
142
|
+
let changed = false;
|
|
143
|
+
for (let index = 0; index < Math.min(sourceLines.length, translatedLines.length); index += 1) {
|
|
144
|
+
const sourceLine = sourceLines[index] ?? "";
|
|
145
|
+
let translatedLine = translatedLines[index] ?? "";
|
|
146
|
+
for (const target of targets) {
|
|
147
|
+
const sourceHeading = extractHeadingLikeLine(sourceLine);
|
|
148
|
+
const translatedHeading = extractHeadingLikeLine(translatedLine);
|
|
149
|
+
const matchingAnchor = resolvePromptAnchorForExplicitRepair(target, slice);
|
|
150
|
+
const english = matchingAnchor?.english ??
|
|
151
|
+
target.english ??
|
|
152
|
+
resolveHeadingEnglishFromSource(target.chineseHint, sourceHeadingLines, translatedHeadingLines);
|
|
153
|
+
if (!english) {
|
|
154
|
+
continue;
|
|
155
|
+
}
|
|
156
|
+
if (sourceHeading &&
|
|
157
|
+
translatedHeading &&
|
|
158
|
+
stripInlineMarkdownMarkers(translatedHeading.content).includes(target.chineseHint)) {
|
|
159
|
+
const normalizedHeading = matchingAnchor && translatedHeading.content.includes(matchingAnchor.chineseHint)
|
|
160
|
+
? injectAnchorIntoLine(translatedHeading.content, matchingAnchor)
|
|
161
|
+
: normalizeHeadingRepairContent(translatedHeading.content, english);
|
|
162
|
+
if (normalizedHeading !== translatedHeading.content) {
|
|
163
|
+
translatedLine = translatedLine.replace(translatedHeading.content, normalizedHeading);
|
|
164
|
+
changed = true;
|
|
165
|
+
continue;
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
if (!containsWholePhrase(sourceLine, english) ||
|
|
169
|
+
!translatedLine.includes(target.chineseHint) ||
|
|
170
|
+
containsWholePhrase(translatedLine, english)) {
|
|
171
|
+
continue;
|
|
172
|
+
}
|
|
173
|
+
if (matchingAnchor) {
|
|
174
|
+
const normalizedLine = injectAnchorIntoLine(translatedLine, matchingAnchor);
|
|
175
|
+
if (normalizedLine !== translatedLine) {
|
|
176
|
+
translatedLine = normalizedLine;
|
|
177
|
+
changed = true;
|
|
178
|
+
continue;
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
translatedLine = translatedLine.replace(target.chineseHint, `${target.chineseHint}(${english})`);
|
|
182
|
+
changed = true;
|
|
183
|
+
}
|
|
184
|
+
translatedLines[index] = translatedLine;
|
|
185
|
+
}
|
|
186
|
+
return changed ? translatedLines.join("\n") : text;
|
|
187
|
+
}
|
|
188
|
+
function resolveHeadingEnglishFromSource(chineseHint, sourceHeadingLines, translatedHeadingLines) {
|
|
189
|
+
for (let index = 0; index < Math.min(sourceHeadingLines.length, translatedHeadingLines.length); index += 1) {
|
|
190
|
+
const sourceHeading = sourceHeadingLines[index];
|
|
191
|
+
const translatedHeading = translatedHeadingLines[index];
|
|
192
|
+
if (!sourceHeading || !translatedHeading) {
|
|
193
|
+
continue;
|
|
194
|
+
}
|
|
195
|
+
if (stripInlineMarkdownMarkers(translatedHeading.content).trim() === chineseHint &&
|
|
196
|
+
/[A-Za-z]/.test(sourceHeading.content)) {
|
|
197
|
+
return sourceHeading.content.trim();
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
return null;
|
|
201
|
+
}
|
|
202
|
+
function normalizeHeadingRepairContent(content, english) {
|
|
203
|
+
if (!english || containsWholePhrase(content, english)) {
|
|
204
|
+
return content;
|
|
205
|
+
}
|
|
206
|
+
const parentheticalMatch = content.match(/(([^)]*[A-Za-z][^)]*))(?!.*()/);
|
|
207
|
+
if (!parentheticalMatch?.[1]) {
|
|
208
|
+
return `${content}(${english})`;
|
|
209
|
+
}
|
|
210
|
+
const inner = parentheticalMatch[1].trim();
|
|
211
|
+
if (containsWholePhrase(inner, english)) {
|
|
212
|
+
return content;
|
|
213
|
+
}
|
|
214
|
+
return content.replace(parentheticalMatch[0], `(${inner},${english})`);
|
|
215
|
+
}
|
|
216
|
+
function resolvePromptAnchorForExplicitRepair(target, slice) {
|
|
217
|
+
const targetEnglish = target.english?.toLowerCase();
|
|
218
|
+
if (!targetEnglish) {
|
|
219
|
+
return null;
|
|
220
|
+
}
|
|
221
|
+
const anchors = dedupePromptAnchors([
|
|
222
|
+
...slice.requiredAnchors,
|
|
223
|
+
...slice.repeatAnchors,
|
|
224
|
+
...slice.establishedAnchors
|
|
225
|
+
]);
|
|
226
|
+
return (anchors.find((anchor) => anchor.english.toLowerCase() === targetEnglish &&
|
|
227
|
+
anchor.chineseHint !== target.english &&
|
|
228
|
+
target.chineseHint.includes(anchor.chineseHint)) ?? null);
|
|
229
|
+
}
|
|
230
|
+
function normalizeSingleAnchor(text, anchor, isRequired, isRepeatOrEstablished) {
|
|
231
|
+
const display = resolveAnchorDisplay(anchor);
|
|
232
|
+
const english = display.english;
|
|
233
|
+
const chineseHint = display.chineseDisplay;
|
|
234
|
+
if (!english) {
|
|
235
|
+
return text;
|
|
236
|
+
}
|
|
237
|
+
const escapedEnglish = escapeRegExp(english);
|
|
238
|
+
let normalized = text;
|
|
239
|
+
if (display.mode === "chinese-primary") {
|
|
240
|
+
const escapedChinese = escapeRegExp(chineseHint);
|
|
241
|
+
const canonical = display.canonical;
|
|
242
|
+
normalized = normalized.replace(new RegExp(`${escapedEnglish}(${escapedChinese})`, "g"), canonical);
|
|
243
|
+
normalized = normalized.replace(new RegExp(`${escapedEnglish}(${escapedEnglish})`, "g"), canonical);
|
|
244
|
+
normalized = normalized.replace(new RegExp(`${escapedChinese}(${escapedEnglish})\\s*(${escapedEnglish})`, "g"), canonical);
|
|
245
|
+
normalized = normalized.replace(new RegExp(`${escapedChinese}(${escapedChinese})`, "g"), canonical);
|
|
246
|
+
if (isRepeatOrEstablished) {
|
|
247
|
+
normalized = normalized.replace(new RegExp(`${escapedChinese}(${escapedEnglish})`, "g"), chineseHint);
|
|
248
|
+
}
|
|
249
|
+
return normalized;
|
|
250
|
+
}
|
|
251
|
+
if (display.mode === "english-primary") {
|
|
252
|
+
const escapedChinese = escapeRegExp(chineseHint);
|
|
253
|
+
const canonical = display.canonical;
|
|
254
|
+
normalized = normalized.replace(new RegExp(`${escapedEnglish}(${escapedChinese}\\s+${escapedEnglish})`, "g"), canonical);
|
|
255
|
+
normalized = normalized.replace(new RegExp(`${escapedEnglish}\\s*${escapedChinese}(${escapedEnglish})`, "g"), canonical);
|
|
256
|
+
normalized = normalized.replace(new RegExp(`${escapedChinese}(${escapedEnglish})`, "g"), canonical);
|
|
257
|
+
normalized = normalized.replace(new RegExp(`${escapedEnglish}(${escapedEnglish})`, "g"), canonical);
|
|
258
|
+
if (isRepeatOrEstablished) {
|
|
259
|
+
normalized = normalized.replace(new RegExp(`${escapedEnglish}(${escapedChinese})`, "g"), english);
|
|
260
|
+
}
|
|
261
|
+
return normalized;
|
|
262
|
+
}
|
|
263
|
+
normalized = normalized.replace(new RegExp(`${escapedEnglish}(${escapedEnglish})`, "g"), english);
|
|
264
|
+
if (isRequired) {
|
|
265
|
+
normalized = collapseRepeatedEnglishParentheses(normalized, english);
|
|
266
|
+
}
|
|
267
|
+
return normalized;
|
|
268
|
+
}
|
|
269
|
+
function collapseRepeatedEnglishParentheses(text, english) {
|
|
270
|
+
const escapedEnglish = escapeRegExp(english);
|
|
271
|
+
return text.replace(new RegExp(`(${escapedEnglish})\\s*(${escapedEnglish})`, "g"), `(${english})`);
|
|
272
|
+
}
|
|
273
|
+
function injectAnchorIntoLine(text, anchor) {
|
|
274
|
+
const display = resolveAnchorDisplay(anchor);
|
|
275
|
+
if (!display.english || display.mode === "english-only") {
|
|
276
|
+
return text;
|
|
277
|
+
}
|
|
278
|
+
if (display.mode === "english-primary") {
|
|
279
|
+
if (text.includes(display.canonical)) {
|
|
280
|
+
return text;
|
|
281
|
+
}
|
|
282
|
+
if (containsWholePhrase(text, display.english) && !text.includes(display.chineseDisplay)) {
|
|
283
|
+
return replaceWholePhraseOnce(text, display.english, display.canonical);
|
|
284
|
+
}
|
|
285
|
+
if (text.includes(anchor.chineseHint)) {
|
|
286
|
+
return replaceFirst(text, anchor.chineseHint, display.canonical);
|
|
287
|
+
}
|
|
288
|
+
if (display.chineseDisplay && text.includes(display.chineseDisplay)) {
|
|
289
|
+
return replaceFirst(text, display.chineseDisplay, display.canonical);
|
|
290
|
+
}
|
|
291
|
+
return text;
|
|
292
|
+
}
|
|
293
|
+
if (text.includes(display.canonical) || containsWholePhrase(text, display.english)) {
|
|
294
|
+
return text;
|
|
295
|
+
}
|
|
296
|
+
if (text.includes(display.chineseDisplay)) {
|
|
297
|
+
return replaceFirst(text, display.chineseDisplay, display.canonical);
|
|
298
|
+
}
|
|
299
|
+
if (text.includes(anchor.chineseHint)) {
|
|
300
|
+
return replaceFirst(text, anchor.chineseHint, display.canonical);
|
|
301
|
+
}
|
|
302
|
+
return text;
|
|
303
|
+
}
|
|
304
|
+
function collapseUnexpectedFamilyVariant(text, variant, sourceAnchor) {
|
|
305
|
+
const variantDisplay = resolveAnchorDisplay(variant);
|
|
306
|
+
const sourceDisplay = resolveAnchorDisplay(sourceAnchor);
|
|
307
|
+
let normalized = text;
|
|
308
|
+
const variantSuffix = variant.english.startsWith(`${sourceAnchor.english} `)
|
|
309
|
+
? variant.english.slice(sourceAnchor.english.length).trim()
|
|
310
|
+
: "";
|
|
311
|
+
const replacements = [
|
|
312
|
+
[variantDisplay.canonical, sourceDisplay.canonical],
|
|
313
|
+
[`${variant.english}(${sourceAnchor.english})`, sourceDisplay.canonical],
|
|
314
|
+
[`${variant.english}(${sourceDisplay.chineseDisplay})`, sourceDisplay.canonical],
|
|
315
|
+
[`${variantDisplay.canonical}(${sourceAnchor.english})`, sourceDisplay.canonical],
|
|
316
|
+
[`${sourceDisplay.canonical}(${sourceAnchor.english})`, sourceDisplay.canonical]
|
|
317
|
+
];
|
|
318
|
+
if (variantSuffix) {
|
|
319
|
+
replacements.push([`${sourceDisplay.canonical} ${variantSuffix}(${sourceAnchor.english})`, sourceDisplay.canonical], [`${sourceDisplay.canonical} ${variantSuffix}`, sourceDisplay.canonical]);
|
|
320
|
+
}
|
|
321
|
+
for (const [from, to] of replacements) {
|
|
322
|
+
if (from && to && normalized.includes(from)) {
|
|
323
|
+
normalized = normalized.split(from).join(to);
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
if (containsWholePhrase(normalized, variant.english) && !containsWholePhrase(text, sourceAnchor.english)) {
|
|
327
|
+
normalized = replaceWholePhraseOnce(normalized, variant.english, sourceDisplay.canonical);
|
|
328
|
+
}
|
|
329
|
+
return normalized;
|
|
330
|
+
}
|
|
331
|
+
function coalesceSourceLineAnchors(anchors) {
|
|
332
|
+
return anchors.filter((anchor) => !anchors.some((candidate) => candidate.anchorId !== anchor.anchorId &&
|
|
333
|
+
candidate.familyId === anchor.familyId &&
|
|
334
|
+
candidate.english.length > anchor.english.length &&
|
|
335
|
+
containsWholePhrase(candidate.english, anchor.english)));
|
|
336
|
+
}
|
|
337
|
+
function shouldSkipAnchorInjectionForCommandPhrase(sourceLine, anchor) {
|
|
338
|
+
const trimmed = sourceLine.trim();
|
|
339
|
+
const bulletMatch = trimmed.match(/^(?:[-*+]|\d+[.)])\s+(.+)$/);
|
|
340
|
+
const body = bulletMatch?.[1]?.trim();
|
|
341
|
+
if (!body) {
|
|
342
|
+
return false;
|
|
343
|
+
}
|
|
344
|
+
const english = anchor.english.trim();
|
|
345
|
+
if (!english || /\s/.test(english)) {
|
|
346
|
+
return false;
|
|
347
|
+
}
|
|
348
|
+
const withoutTrailingExplanation = body.replace(/\s*\([^)]*\)\s*$/, "").trim();
|
|
349
|
+
const leadingToken = withoutTrailingExplanation.match(/^([A-Za-z][A-Za-z0-9+._/-]*)\b/)?.[1]?.trim();
|
|
350
|
+
if (!leadingToken || leadingToken.toLowerCase() !== english.toLowerCase()) {
|
|
351
|
+
return false;
|
|
352
|
+
}
|
|
353
|
+
const remainder = withoutTrailingExplanation.slice(leadingToken.length).trim();
|
|
354
|
+
if (!remainder) {
|
|
355
|
+
return false;
|
|
356
|
+
}
|
|
357
|
+
if (remainder.includes(",")) {
|
|
358
|
+
return true;
|
|
359
|
+
}
|
|
360
|
+
return /^[A-Za-z0-9./_-]/.test(remainder);
|
|
361
|
+
}
|
|
362
|
+
function extractHeadingLikeLine(rawLine) {
|
|
363
|
+
const trimmed = rawLine.trim();
|
|
364
|
+
if (!trimmed) {
|
|
365
|
+
return null;
|
|
366
|
+
}
|
|
367
|
+
const atxMatch = trimmed.match(/^#{1,6}[ \t]+(.+?)(?:[ \t]+#+)?$/);
|
|
368
|
+
if (atxMatch?.[1]) {
|
|
369
|
+
return { raw: rawLine, content: atxMatch[1].trim() };
|
|
370
|
+
}
|
|
371
|
+
const boldMatch = trimmed.match(/^\*\*(.+)\*\*$/);
|
|
372
|
+
if (boldMatch?.[1]) {
|
|
373
|
+
return { raw: rawLine, content: boldMatch[1].trim() };
|
|
374
|
+
}
|
|
375
|
+
return null;
|
|
376
|
+
}
|
|
377
|
+
function parseExplicitRepairTarget(instruction) {
|
|
378
|
+
const match = instruction.match(/需补为“([^(”]+)(([^)]+))”/);
|
|
379
|
+
if (match?.[1] && match[2]) {
|
|
380
|
+
return {
|
|
381
|
+
chineseHint: match[1].trim(),
|
|
382
|
+
english: match[2].trim()
|
|
383
|
+
};
|
|
384
|
+
}
|
|
385
|
+
const english = instruction.match(/关键术语“([^”]*[A-Za-z][^”]*)”/)?.[1]?.trim() ??
|
|
386
|
+
instruction.match(/首现术语\s+([A-Za-z][A-Za-z0-9 .+/_-]*)\s+未补/)?.[1]?.trim() ??
|
|
387
|
+
instruction.match(/[::]\s*([A-Za-z][A-Za-z0-9 .+/_-]*)\s+首次出现需补/)?.[1]?.trim() ??
|
|
388
|
+
instruction.match(/括注“([^”]*[A-Za-z][^”]*)”/)?.[1]?.trim() ??
|
|
389
|
+
instruction.match(/“([^”]*[A-Za-z][^”]*)”缺少/)?.[1]?.trim() ??
|
|
390
|
+
null;
|
|
391
|
+
const locationText = instruction.match(/位置:[^。;\n“]*“([^”]+)”/)?.[1]?.trim() ??
|
|
392
|
+
instruction.match(/位置:(.+?)。问题[::]/)?.[1]?.trim() ??
|
|
393
|
+
instruction.match(/`([^`]+)`/)?.[1]?.trim() ??
|
|
394
|
+
null;
|
|
395
|
+
const chineseHint = locationText ? stripHeadingMarkers(stripInlineMarkdownMarkers(locationText).trim()) : null;
|
|
396
|
+
if (!chineseHint) {
|
|
397
|
+
return null;
|
|
398
|
+
}
|
|
399
|
+
return { chineseHint, english };
|
|
400
|
+
}
|
|
401
|
+
function stripInlineMarkdownMarkers(text) {
|
|
402
|
+
return text.replace(/[*_`~]/g, "");
|
|
403
|
+
}
|
|
404
|
+
function stripHeadingMarkers(text) {
|
|
405
|
+
return text.replace(/^#{1,6}\s+/, "").trim();
|
|
406
|
+
}
|
|
407
|
+
function extractHeadingLikeLines(text) {
|
|
408
|
+
const headings = [];
|
|
409
|
+
for (const rawLine of text.split(/\r?\n/)) {
|
|
410
|
+
const heading = extractHeadingLikeLine(rawLine);
|
|
411
|
+
if (heading) {
|
|
412
|
+
headings.push(heading);
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
return headings;
|
|
416
|
+
}
|
|
417
|
+
function dedupePromptAnchors(anchors) {
|
|
418
|
+
const seen = new Set();
|
|
419
|
+
const deduped = [];
|
|
420
|
+
for (const anchor of anchors) {
|
|
421
|
+
if (seen.has(anchor.anchorId)) {
|
|
422
|
+
continue;
|
|
423
|
+
}
|
|
424
|
+
seen.add(anchor.anchorId);
|
|
425
|
+
deduped.push(anchor);
|
|
426
|
+
}
|
|
427
|
+
return deduped;
|
|
428
|
+
}
|
|
429
|
+
function isShadowedByLongerAnchor(anchor, anchors) {
|
|
430
|
+
const english = normalizeAnchorText(anchor.english);
|
|
431
|
+
const chineseHint = normalizeAnchorText(anchor.chineseHint);
|
|
432
|
+
if (!english || english !== chineseHint) {
|
|
433
|
+
return false;
|
|
434
|
+
}
|
|
435
|
+
return anchors.some((candidate) => {
|
|
436
|
+
if (candidate.anchorId === anchor.anchorId) {
|
|
437
|
+
return false;
|
|
438
|
+
}
|
|
439
|
+
const candidateEnglish = normalizeAnchorText(candidate.english);
|
|
440
|
+
if (!candidateEnglish || candidateEnglish.length <= english.length) {
|
|
441
|
+
return false;
|
|
442
|
+
}
|
|
443
|
+
return containsWholeEnglishPhrase(candidateEnglish, english);
|
|
444
|
+
});
|
|
445
|
+
}
|
|
446
|
+
function containsWholeEnglishPhrase(haystack, needle) {
|
|
447
|
+
const escapedNeedle = escapeRegExp(needle);
|
|
448
|
+
return new RegExp(`\\b${escapedNeedle}\\b`, "i").test(haystack);
|
|
449
|
+
}
|
|
450
|
+
function containsWholePhrase(haystack, needle) {
|
|
451
|
+
if (/[A-Za-z]/.test(needle)) {
|
|
452
|
+
return containsWholeEnglishPhrase(haystack, needle);
|
|
453
|
+
}
|
|
454
|
+
return haystack.includes(needle);
|
|
455
|
+
}
|
|
456
|
+
function normalizeAnchorText(value) {
|
|
457
|
+
return value.trim().toLowerCase();
|
|
458
|
+
}
|
|
459
|
+
function escapeRegExp(value) {
|
|
460
|
+
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
461
|
+
}
|
|
462
|
+
function replaceFirst(text, needle, replacement) {
|
|
463
|
+
const index = text.indexOf(needle);
|
|
464
|
+
if (index === -1) {
|
|
465
|
+
return text;
|
|
466
|
+
}
|
|
467
|
+
return `${text.slice(0, index)}${replacement}${text.slice(index + needle.length)}`;
|
|
468
|
+
}
|
|
469
|
+
function replaceWholePhraseOnce(text, needle, replacement) {
|
|
470
|
+
if (!/[A-Za-z]/.test(needle)) {
|
|
471
|
+
return replaceFirst(text, needle, replacement);
|
|
472
|
+
}
|
|
473
|
+
const pattern = new RegExp(`\\b${escapeRegExp(needle)}\\b`);
|
|
474
|
+
return text.replace(pattern, replacement);
|
|
475
|
+
}
|
|
476
|
+
function normalizeRepeatedEnglishParenthesesWithLocalHints(text) {
|
|
477
|
+
const localEnglishPrimary = new Map();
|
|
478
|
+
const englishPrimaryPattern = /\b([A-Za-z][A-Za-z0-9.+/_ -]{1,})(([^()\n]+))/g;
|
|
479
|
+
for (const match of text.matchAll(englishPrimaryPattern)) {
|
|
480
|
+
const english = match[1]?.trim();
|
|
481
|
+
const explainer = match[2]?.trim();
|
|
482
|
+
if (!english || !explainer) {
|
|
483
|
+
continue;
|
|
484
|
+
}
|
|
485
|
+
if (english.toLowerCase() === explainer.toLowerCase()) {
|
|
486
|
+
continue;
|
|
487
|
+
}
|
|
488
|
+
localEnglishPrimary.set(english.toLowerCase(), `${english}(${explainer})`);
|
|
489
|
+
}
|
|
490
|
+
return text.replace(englishPrimaryPattern, (raw, englishRaw, innerRaw) => {
|
|
491
|
+
const english = String(englishRaw).trim();
|
|
492
|
+
const inner = String(innerRaw).trim();
|
|
493
|
+
if (!english || !inner) {
|
|
494
|
+
return raw;
|
|
495
|
+
}
|
|
496
|
+
if (english.toLowerCase() !== inner.toLowerCase()) {
|
|
497
|
+
return raw;
|
|
498
|
+
}
|
|
499
|
+
const canonical = localEnglishPrimary.get(english.toLowerCase());
|
|
500
|
+
return canonical ?? english;
|
|
501
|
+
});
|
|
502
|
+
}
|
|
503
|
+
function resolveAnchorDisplay(anchor) {
|
|
504
|
+
const english = anchor.english.trim();
|
|
505
|
+
const chineseHint = anchor.chineseHint.trim();
|
|
506
|
+
if (!english || !chineseHint || chineseHint.toLowerCase() === english.toLowerCase()) {
|
|
507
|
+
return {
|
|
508
|
+
mode: "english-only",
|
|
509
|
+
english,
|
|
510
|
+
chineseDisplay: "",
|
|
511
|
+
canonical: english,
|
|
512
|
+
repeatText: english
|
|
513
|
+
};
|
|
514
|
+
}
|
|
515
|
+
const strippedEnglishPrefix = stripLeadingEnglishHint(chineseHint, english);
|
|
516
|
+
const strippedEnglishSuffix = stripTrailingEnglishHint(strippedEnglishPrefix ?? chineseHint, english);
|
|
517
|
+
const chineseDisplay = strippedEnglishSuffix ?? strippedEnglishPrefix ?? chineseHint;
|
|
518
|
+
if (shouldPreferEnglishPrimary(english, strippedEnglishPrefix)) {
|
|
519
|
+
return {
|
|
520
|
+
mode: "english-primary",
|
|
521
|
+
english,
|
|
522
|
+
chineseDisplay,
|
|
523
|
+
canonical: `${english}(${chineseDisplay})`,
|
|
524
|
+
repeatText: english
|
|
525
|
+
};
|
|
526
|
+
}
|
|
527
|
+
return {
|
|
528
|
+
mode: "chinese-primary",
|
|
529
|
+
english,
|
|
530
|
+
chineseDisplay,
|
|
531
|
+
canonical: `${chineseDisplay}(${english})`,
|
|
532
|
+
repeatText: chineseDisplay
|
|
533
|
+
};
|
|
534
|
+
}
|
|
535
|
+
function stripLeadingEnglishHint(chineseHint, english) {
|
|
536
|
+
if (!chineseHint.toLowerCase().startsWith(english.toLowerCase())) {
|
|
537
|
+
return null;
|
|
538
|
+
}
|
|
539
|
+
const suffix = chineseHint.slice(english.length).trim();
|
|
540
|
+
return suffix.length > 0 ? suffix : null;
|
|
541
|
+
}
|
|
542
|
+
function stripTrailingEnglishHint(chineseHint, english) {
|
|
543
|
+
if (!chineseHint.toLowerCase().endsWith(english.toLowerCase())) {
|
|
544
|
+
return null;
|
|
545
|
+
}
|
|
546
|
+
const prefix = chineseHint
|
|
547
|
+
.slice(0, Math.max(0, chineseHint.length - english.length))
|
|
548
|
+
.trim()
|
|
549
|
+
.replace(/[((::,,、\-–—\s]+$/u, "")
|
|
550
|
+
.trim();
|
|
551
|
+
return prefix.length > 0 ? prefix : null;
|
|
552
|
+
}
|
|
553
|
+
function shouldPreferEnglishPrimary(english, strippedEnglishPrefix) {
|
|
554
|
+
if (strippedEnglishPrefix) {
|
|
555
|
+
return true;
|
|
556
|
+
}
|
|
557
|
+
return /^[A-Za-z0-9][A-Za-z0-9.+/_-]*$/.test(english);
|
|
558
|
+
}
|
|
@@ -1,9 +1,11 @@
|
|
|
1
|
+
export declare const DOCUMENT_ANALYSIS_PROMPT: string;
|
|
1
2
|
export declare const INITIAL_TRANSLATION_PROMPT: string;
|
|
2
3
|
export declare const GATE_AUDIT_PROMPT: string;
|
|
3
4
|
export declare const BUNDLED_GATE_AUDIT_PROMPT: string;
|
|
4
5
|
export declare const REPAIR_PROMPT: string;
|
|
5
6
|
export declare const STYLE_POLISH_PROMPT: string;
|
|
6
7
|
export declare function buildInitialPrompt(source: string): string;
|
|
8
|
+
export declare function buildDocumentAnalysisPrompt(document: string): string;
|
|
7
9
|
export declare function buildGateAuditPrompt(source: string, translation: string): string;
|
|
8
10
|
export declare function buildBundledGateAuditPrompt(segments: string): string;
|
|
9
11
|
export declare function buildRepairPrompt(source: string, translation: string, mustFix: readonly string[]): string;
|
|
@@ -6,6 +6,54 @@ Markdown 结构要求:
|
|
|
6
6
|
4. 不要把代码块、命令行片段或配置键值误译成中文。
|
|
7
7
|
5. 如果原文正文使用了可翻译的 Markdown 强调结构(如 **加粗**、*斜体*)或命令/flag 写法(如 --flag),译文应保持等价结构;不要丢掉强调,也不要把普通命令/flag 误改成代码块、标题或其他 Markdown 结构。
|
|
8
8
|
`.trim();
|
|
9
|
+
export const DOCUMENT_ANALYSIS_PROMPT = `
|
|
10
|
+
你是一名科技与科普翻译编辑的前置分析器。请阅读下面的整篇 Markdown 文档分析输入,并只返回 JSON,不要返回散文说明。
|
|
11
|
+
|
|
12
|
+
目标:
|
|
13
|
+
1. 找出全文里需要建立“首次中英锚定”的候选专名、产品名、机构名、项目名和关键术语。
|
|
14
|
+
2. 标出它们在全文中的首次出现位置(chunkId + segmentId)。
|
|
15
|
+
3. 将同一概念家族的不同英文变体归并到同一个 familyKey。
|
|
16
|
+
4. 对明显不需要强制双语锚定的通用词,放进 ignoredTerms。
|
|
17
|
+
|
|
18
|
+
要求:
|
|
19
|
+
1. 只返回 JSON。
|
|
20
|
+
2. anchors 中每一项都必须包含:
|
|
21
|
+
- english
|
|
22
|
+
- chineseHint
|
|
23
|
+
- familyKey
|
|
24
|
+
- firstOccurrence.chunkId
|
|
25
|
+
- firstOccurrence.segmentId
|
|
26
|
+
3. english 必须是原文里实际出现的英文形式,不要杜撰。
|
|
27
|
+
4. chineseHint 只写最小必要的中文主译或中文说明,不要写整句。
|
|
28
|
+
5. familyKey 用于归并同一概念家族,保持稳定、简短、可复用。
|
|
29
|
+
6. 只有真正需要首现双语锚定的项才放进 anchors。像 Earth、reptiles、paleontologist 这类通用名词、职业称谓、类群名或常见科学词,通常应放进 ignoredTerms。
|
|
30
|
+
7. 如果同一项在标题、引用、列表项和正文中都出现,首次出现位置必须精确落在最先出现的那个 chunkId / segmentId。
|
|
31
|
+
8. 不要输出重复的 anchors。
|
|
32
|
+
|
|
33
|
+
返回格式:
|
|
34
|
+
{
|
|
35
|
+
"anchors": [
|
|
36
|
+
{
|
|
37
|
+
"english": "Prompt injection attacks",
|
|
38
|
+
"chineseHint": "提示注入攻击",
|
|
39
|
+
"familyKey": "prompt injection attacks",
|
|
40
|
+
"firstOccurrence": {
|
|
41
|
+
"chunkId": "chunk-3",
|
|
42
|
+
"segmentId": "chunk-3-segment-1"
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
],
|
|
46
|
+
"ignoredTerms": [
|
|
47
|
+
{
|
|
48
|
+
"english": "Earth",
|
|
49
|
+
"reason": "通用名词"
|
|
50
|
+
}
|
|
51
|
+
]
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
【文档分析输入】
|
|
55
|
+
{{document}}
|
|
56
|
+
`.trim();
|
|
9
57
|
export const INITIAL_TRANSLATION_PROMPT = `
|
|
10
58
|
你是一名科技与科普翻译编辑。请把下面的英文 Markdown 文章翻译成自然、准确、可读性高的中文 Markdown,但本次任务以“硬性项正确”为第一优先级。请严格遵守以下要求:
|
|
11
59
|
|
|
@@ -189,6 +237,9 @@ export const STYLE_POLISH_PROMPT = `
|
|
|
189
237
|
export function buildInitialPrompt(source) {
|
|
190
238
|
return INITIAL_TRANSLATION_PROMPT.replaceAll("{{source}}", source);
|
|
191
239
|
}
|
|
240
|
+
export function buildDocumentAnalysisPrompt(document) {
|
|
241
|
+
return DOCUMENT_ANALYSIS_PROMPT.replaceAll("{{document}}", document);
|
|
242
|
+
}
|
|
192
243
|
export function buildGateAuditPrompt(source, translation) {
|
|
193
244
|
return GATE_AUDIT_PROMPT.replaceAll("{{source}}", source).replaceAll("{{translation}}", translation);
|
|
194
245
|
}
|