@lobehub/ui 2.16.0 → 2.16.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/es/hooks/useMarkdown/latex.d.ts +124 -12
- package/es/hooks/useMarkdown/latex.js +379 -67
- package/package.json +1 -1
|
@@ -45,18 +45,6 @@ export declare function escapeTextUnderscores(text: string): string;
|
|
|
45
45
|
* @returns The string with currency dollar signs escaped
|
|
46
46
|
*/
|
|
47
47
|
export declare function escapeCurrencyDollars(text: string): string;
|
|
48
|
-
/**
|
|
49
|
-
* Preprocesses LaTeX content by performing multiple operations:
|
|
50
|
-
* 1. Protects code blocks from processing
|
|
51
|
-
* 2. Protects existing LaTeX expressions
|
|
52
|
-
* 3. Escapes dollar signs that likely represent currency
|
|
53
|
-
* 4. Converts LaTeX delimiters
|
|
54
|
-
* 5. Escapes mhchem commands and pipes
|
|
55
|
-
*
|
|
56
|
-
* @param content The input string containing LaTeX expressions
|
|
57
|
-
* @returns The processed string with proper LaTeX formatting
|
|
58
|
-
*/
|
|
59
|
-
export declare function preprocessLaTeX(str: string): string;
|
|
60
48
|
/**
|
|
61
49
|
* Checks if the last LaTeX formula in the text is renderable.
|
|
62
50
|
* Only validates the formula after the last $$ if there's an odd number of $$.
|
|
@@ -65,3 +53,127 @@ export declare function preprocessLaTeX(str: string): string;
|
|
|
65
53
|
* @returns True if the last formula is renderable or if there's no incomplete formula
|
|
66
54
|
*/
|
|
67
55
|
export declare const isLastFormulaRenderable: (text: string) => boolean;
|
|
56
|
+
/**
|
|
57
|
+
* Fixes common LaTeX syntax errors automatically
|
|
58
|
+
* - Balances unmatched braces
|
|
59
|
+
* - Balances \left and \right delimiters
|
|
60
|
+
*
|
|
61
|
+
* @param text The input string containing LaTeX expressions
|
|
62
|
+
* @returns The string with fixed LaTeX expressions
|
|
63
|
+
*/
|
|
64
|
+
export declare function fixCommonLaTeXErrors(text: string): string;
|
|
65
|
+
/**
|
|
66
|
+
* Normalizes whitespace in LaTeX expressions
|
|
67
|
+
* - Removes extra spaces around $ delimiters
|
|
68
|
+
* - Normalizes multiple spaces to single space inside formulas
|
|
69
|
+
*
|
|
70
|
+
* @param text The input string containing LaTeX expressions
|
|
71
|
+
* @returns The string with normalized whitespace
|
|
72
|
+
*/
|
|
73
|
+
export declare function normalizeLatexSpacing(text: string): string;
|
|
74
|
+
/**
|
|
75
|
+
* Validates all LaTeX expressions in the text
|
|
76
|
+
* Returns detailed information about validation results
|
|
77
|
+
*
|
|
78
|
+
* @param text The input string containing LaTeX expressions
|
|
79
|
+
* @returns Validation results with errors if any
|
|
80
|
+
*/
|
|
81
|
+
export declare function validateLatexExpressions(text: string): {
|
|
82
|
+
errors: Array<{
|
|
83
|
+
formula: string;
|
|
84
|
+
message: string;
|
|
85
|
+
position: number;
|
|
86
|
+
type: 'display' | 'inline';
|
|
87
|
+
}>;
|
|
88
|
+
totalExpressions: number;
|
|
89
|
+
valid: boolean;
|
|
90
|
+
};
|
|
91
|
+
/**
|
|
92
|
+
* Handles CJK (Chinese, Japanese, Korean) characters mixed with LaTeX
|
|
93
|
+
* Optionally adds spaces between CJK characters and LaTeX expressions for better rendering
|
|
94
|
+
*
|
|
95
|
+
* @param text The input string
|
|
96
|
+
* @param addSpaces Whether to add spaces between CJK and LaTeX (default: false)
|
|
97
|
+
* @returns The processed string
|
|
98
|
+
*/
|
|
99
|
+
export declare function handleCJKWithLatex(text: string, addSpaces?: boolean): string;
|
|
100
|
+
export interface AdvancedPreprocessOptions {
|
|
101
|
+
/** Add spaces between CJK and LaTeX (default: false, requires handleCJK: true) */
|
|
102
|
+
addCJKSpaces?: boolean;
|
|
103
|
+
/** Convert bracket notation \[...\] to $$...$$ (default: true) */
|
|
104
|
+
convertBrackets?: boolean;
|
|
105
|
+
/** Enable currency escaping (default: true) */
|
|
106
|
+
escapeCurrency?: boolean;
|
|
107
|
+
/** Escape mhchem commands (default: true) */
|
|
108
|
+
escapeMhchem?: boolean;
|
|
109
|
+
/** Escape pipe symbols in LaTeX (default: true) */
|
|
110
|
+
escapePipes?: boolean;
|
|
111
|
+
/** Escape underscores in \text{} (default: true) */
|
|
112
|
+
escapeUnderscores?: boolean;
|
|
113
|
+
/** Automatically fix common LaTeX errors (default: false) */
|
|
114
|
+
fixErrors?: boolean;
|
|
115
|
+
/** Handle CJK characters (default: false) */
|
|
116
|
+
handleCJK?: boolean;
|
|
117
|
+
/** Normalize whitespace (default: false) */
|
|
118
|
+
normalizeSpacing?: boolean;
|
|
119
|
+
/** Throw error on validation failure (default: false, requires validate: true) */
|
|
120
|
+
throwOnValidationError?: boolean;
|
|
121
|
+
/** Validate LaTeX syntax (default: false) */
|
|
122
|
+
validate?: boolean;
|
|
123
|
+
}
|
|
124
|
+
/**
|
|
125
|
+
* Comprehensive LaTeX preprocessing with configurable options
|
|
126
|
+
*
|
|
127
|
+
* This is the main preprocessing function that handles:
|
|
128
|
+
* - Currency symbol escaping (e.g., $20 → \$20)
|
|
129
|
+
* - LaTeX delimiter conversion (\[...\] → $$...$$)
|
|
130
|
+
* - Special character escaping (pipes, underscores, mhchem)
|
|
131
|
+
* - Optional error fixing and validation
|
|
132
|
+
* - Optional CJK character handling
|
|
133
|
+
*
|
|
134
|
+
* @param text The input string containing LaTeX and Markdown
|
|
135
|
+
* @param options Configuration options for fine-grained control
|
|
136
|
+
* @returns The preprocessed string
|
|
137
|
+
*
|
|
138
|
+
* @example
|
|
139
|
+
* ```ts
|
|
140
|
+
* // Default behavior (same as old preprocessLaTeX)
|
|
141
|
+
* preprocessLaTeX('向量$90^\\circ$,非 $0^\\circ$ 和 $180^\\circ$')
|
|
142
|
+
*
|
|
143
|
+
* // With custom options
|
|
144
|
+
* preprocessLaTeX(text, {
|
|
145
|
+
* fixErrors: true,
|
|
146
|
+
* validate: true,
|
|
147
|
+
* handleCJK: true
|
|
148
|
+
* })
|
|
149
|
+
* ```
|
|
150
|
+
*/
|
|
151
|
+
export declare function preprocessLaTeX(text: string, options?: AdvancedPreprocessOptions): string;
|
|
152
|
+
/**
|
|
153
|
+
* Strict preprocessing mode - enables all safety features and validations
|
|
154
|
+
* Use this when you want maximum correctness and are willing to accept the performance cost
|
|
155
|
+
*
|
|
156
|
+
* @param text The input string
|
|
157
|
+
* @returns The preprocessed string with all features enabled
|
|
158
|
+
*
|
|
159
|
+
* @example
|
|
160
|
+
* ```ts
|
|
161
|
+
* const processed = preprocessLaTeXStrict(userInput)
|
|
162
|
+
* // Enables: error fixing, validation, CJK handling, space normalization
|
|
163
|
+
* ```
|
|
164
|
+
*/
|
|
165
|
+
export declare function preprocessLaTeXStrict(text: string): string;
|
|
166
|
+
/**
|
|
167
|
+
* Minimal preprocessing mode - only essential operations
|
|
168
|
+
* Use this for better performance when you control the input
|
|
169
|
+
*
|
|
170
|
+
* @param text The input string
|
|
171
|
+
* @returns The preprocessed string with minimal processing
|
|
172
|
+
*
|
|
173
|
+
* @example
|
|
174
|
+
* ```ts
|
|
175
|
+
* const processed = preprocessLaTeXMinimal(trustedInput)
|
|
176
|
+
* // Only escapes currency and converts brackets
|
|
177
|
+
* ```
|
|
178
|
+
*/
|
|
179
|
+
export declare function preprocessLaTeXMinimal(text: string): string;
|
|
@@ -1,5 +1,53 @@
|
|
|
1
|
+
function _typeof(o) { "@babel/helpers - typeof"; return _typeof = "function" == typeof Symbol && "symbol" == typeof Symbol.iterator ? function (o) { return typeof o; } : function (o) { return o && "function" == typeof Symbol && o.constructor === Symbol && o !== Symbol.prototype ? "symbol" : typeof o; }, _typeof(o); }
|
|
2
|
+
function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
|
|
3
|
+
function _defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, _toPropertyKey(descriptor.key), descriptor); } }
|
|
4
|
+
function _createClass(Constructor, protoProps, staticProps) { if (protoProps) _defineProperties(Constructor.prototype, protoProps); if (staticProps) _defineProperties(Constructor, staticProps); Object.defineProperty(Constructor, "prototype", { writable: false }); return Constructor; }
|
|
5
|
+
function _defineProperty(obj, key, value) { key = _toPropertyKey(key); if (key in obj) { Object.defineProperty(obj, key, { value: value, enumerable: true, configurable: true, writable: true }); } else { obj[key] = value; } return obj; }
|
|
6
|
+
function _toPropertyKey(t) { var i = _toPrimitive(t, "string"); return "symbol" == _typeof(i) ? i : String(i); }
|
|
7
|
+
function _toPrimitive(t, r) { if ("object" != _typeof(t) || !t) return t; var e = t[Symbol.toPrimitive]; if (void 0 !== e) { var i = e.call(t, r || "default"); if ("object" != _typeof(i)) return i; throw new TypeError("@@toPrimitive must return a primitive value."); } return ("string" === r ? String : Number)(t); }
|
|
1
8
|
import { renderToString } from 'katex';
|
|
2
9
|
|
|
10
|
+
// ============================================================================
|
|
11
|
+
// Utility Classes
|
|
12
|
+
// ============================================================================
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* PlaceholderManager - Manages temporary replacement and restoration of protected content
|
|
16
|
+
* Used to protect code blocks and LaTeX expressions during preprocessing
|
|
17
|
+
*/
|
|
18
|
+
var PlaceholderManager = /*#__PURE__*/function () {
|
|
19
|
+
function PlaceholderManager() {
|
|
20
|
+
var prefix = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 'PROTECTED';
|
|
21
|
+
_classCallCheck(this, PlaceholderManager);
|
|
22
|
+
_defineProperty(this, "placeholders", []);
|
|
23
|
+
_defineProperty(this, "prefix", void 0);
|
|
24
|
+
this.prefix = prefix;
|
|
25
|
+
}
|
|
26
|
+
_createClass(PlaceholderManager, [{
|
|
27
|
+
key: "add",
|
|
28
|
+
value: function add(content) {
|
|
29
|
+
var index = this.placeholders.length;
|
|
30
|
+
this.placeholders.push(content);
|
|
31
|
+
return "<<".concat(this.prefix, "_").concat(index, ">>");
|
|
32
|
+
}
|
|
33
|
+
}, {
|
|
34
|
+
key: "restore",
|
|
35
|
+
value: function restore(text) {
|
|
36
|
+
var _this = this;
|
|
37
|
+
return text.replaceAll(new RegExp("<<".concat(this.prefix, "_(\\d+)>>"), 'g'), function (_, index) {
|
|
38
|
+
return _this.placeholders[Number.parseInt(index)] || '';
|
|
39
|
+
});
|
|
40
|
+
}
|
|
41
|
+
}, {
|
|
42
|
+
key: "clear",
|
|
43
|
+
value: function clear() {
|
|
44
|
+
this.placeholders = [];
|
|
45
|
+
}
|
|
46
|
+
}]);
|
|
47
|
+
return PlaceholderManager;
|
|
48
|
+
}(); // ============================================================================
|
|
49
|
+
// Helper Functions
|
|
50
|
+
// ============================================================================
|
|
3
51
|
// Helper: replace unescaped pipes with \vert within a LaTeX math fragment
|
|
4
52
|
var replaceUnescapedPipes = function replaceUnescapedPipes(formula) {
|
|
5
53
|
return (
|
|
@@ -52,13 +100,30 @@ export function escapeLatexPipes(text) {
|
|
|
52
100
|
// remark-gfm table parsing won't treat them as column separators.
|
|
53
101
|
// Leave code blocks/inline code untouched.
|
|
54
102
|
// Also ignore escaped dollars (\$) which are currency symbols
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
return
|
|
103
|
+
|
|
104
|
+
// Process code blocks first to protect them
|
|
105
|
+
var codeBlocks = [];
|
|
106
|
+
var content = text.replaceAll(/(```[\S\s]*?```|`[^\n`]*`)/g, function (match) {
|
|
107
|
+
codeBlocks.push(match);
|
|
108
|
+
return "<<CODE_".concat(codeBlocks.length - 1, ">>");
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
// For display math, allow multiline
|
|
112
|
+
content = content.replaceAll(/\$\$([\S\s]*?)\$\$/g, function (match, display) {
|
|
113
|
+
return "$$".concat(replaceUnescapedPipes(display), "$$");
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
// For inline math, use non-greedy match that DOES NOT cross newlines
|
|
117
|
+
// This prevents issues in tables where $ might appear in different cells
|
|
118
|
+
content = content.replaceAll(/(?<!\\)\$(?!\$)([^\n$]*?)(?<!\\)\$(?!\$)/g, function (match, inline) {
|
|
119
|
+
return "$".concat(replaceUnescapedPipes(inline), "$");
|
|
61
120
|
});
|
|
121
|
+
|
|
122
|
+
// Restore code blocks
|
|
123
|
+
content = content.replaceAll(/<<CODE_(\d+)>>/g, function (_, index) {
|
|
124
|
+
return codeBlocks[Number.parseInt(index)];
|
|
125
|
+
});
|
|
126
|
+
return content;
|
|
62
127
|
}
|
|
63
128
|
|
|
64
129
|
/**
|
|
@@ -94,79 +159,36 @@ export function escapeTextUnderscores(text) {
|
|
|
94
159
|
*/
|
|
95
160
|
export function escapeCurrencyDollars(text) {
|
|
96
161
|
// Protect code blocks and existing LaTeX expressions from processing
|
|
97
|
-
var
|
|
98
|
-
var content = text.replaceAll(
|
|
99
|
-
|
|
100
|
-
|
|
162
|
+
var manager = new PlaceholderManager('PROTECTED');
|
|
163
|
+
var content = text.replaceAll(
|
|
164
|
+
// Match patterns to protect (in order):
|
|
165
|
+
// 1. Code blocks: ```...```
|
|
166
|
+
// 2. Inline code: `...`
|
|
167
|
+
// 3. Display math: $$...$$
|
|
168
|
+
// 4. Inline math with LaTeX commands: $...\...$ (must contain backslash to distinguish from currency)
|
|
169
|
+
// 5. LaTeX bracket notation: \[...\]
|
|
170
|
+
// 6. LaTeX parenthesis notation: \(...\)
|
|
171
|
+
/(```[\S\s]*?```|`[^\n`]*`|\$\$[\S\s]*?\$\$|(?<!\\)\$(?!\$)(?=[\S\s]*?\\)[\S\s]*?(?<!\\)\$(?!\$)|\\\[[\S\s]*?\\]|\\\(.*?\\\))/g, function (match) {
|
|
172
|
+
return manager.add(match);
|
|
101
173
|
});
|
|
102
174
|
|
|
103
175
|
// Escape dollar signs that are clearly currency:
|
|
104
176
|
// - $ followed by a digit
|
|
105
177
|
// - Not preceded by another $ (to avoid breaking $$)
|
|
178
|
+
// - Not followed immediately by another $ (to avoid breaking $1$ LaTeX)
|
|
106
179
|
// - Followed by number patterns with optional commas, decimals, ranges, or plus signs
|
|
107
180
|
// Match patterns like: $20, $1,000, $19.99, $20-50, $300+, $1,000-2,000+
|
|
181
|
+
// But NOT: $1$, $2$ (these are LaTeX formulas)
|
|
108
182
|
// In the replacement: \\ = backslash, $$ = literal $, $1 = capture group 1
|
|
109
|
-
content = content.replaceAll(/(?<!\$)\$(\d{1,3}(?:,\d{3})*(?:\.\d+)?(?:-\d{1,3}(?:,\d{3})*(?:\.\d+)?)?\+?)/g, '\\$$$1');
|
|
183
|
+
content = content.replaceAll(/(?<!\$)\$(\d{1,3}(?:,\d{3})*(?:\.\d+)?(?:-\d{1,3}(?:,\d{3})*(?:\.\d+)?)?\+?)(?!\$)/g, '\\$$$1');
|
|
110
184
|
|
|
111
185
|
// Restore protected content
|
|
112
|
-
content =
|
|
113
|
-
return protectedStrings[Number.parseInt(index)];
|
|
114
|
-
});
|
|
186
|
+
content = manager.restore(content);
|
|
115
187
|
return content;
|
|
116
188
|
}
|
|
117
189
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
* 1. Protects code blocks from processing
|
|
121
|
-
* 2. Protects existing LaTeX expressions
|
|
122
|
-
* 3. Escapes dollar signs that likely represent currency
|
|
123
|
-
* 4. Converts LaTeX delimiters
|
|
124
|
-
* 5. Escapes mhchem commands and pipes
|
|
125
|
-
*
|
|
126
|
-
* @param content The input string containing LaTeX expressions
|
|
127
|
-
* @returns The processed string with proper LaTeX formatting
|
|
128
|
-
*/
|
|
129
|
-
export function preprocessLaTeX(str) {
|
|
130
|
-
// Step 1: Protect code blocks
|
|
131
|
-
// const codeBlocks: string[] = [];
|
|
132
|
-
// let content = str.replaceAll(/(```[\S\s]*?```|`[^\n`]+`)/g, (match, code) => {
|
|
133
|
-
// codeBlocks.push(code);
|
|
134
|
-
// return `<<CODE_BLOCK_${codeBlocks.length - 1}>>`;
|
|
135
|
-
// });
|
|
136
|
-
|
|
137
|
-
// // Step 2: Protect existing LaTeX expressions
|
|
138
|
-
// const latexExpressions: string[] = [];
|
|
139
|
-
// content = content.replaceAll(/(\$\$[\S\s]*?\$\$|\\\[[\S\s]*?\\]|\\\(.*?\\\))/g, (match) => {
|
|
140
|
-
// latexExpressions.push(match);
|
|
141
|
-
// return `<<LATEX_${latexExpressions.length - 1}>>`;
|
|
142
|
-
// });
|
|
143
|
-
|
|
144
|
-
// Step 3: Escape dollar signs that are likely currency indicators
|
|
145
|
-
// Deprecated, as it causes parsing errors for formulas starting with a number, such as `$1$`
|
|
146
|
-
// content = content.replaceAll(/\$(?=\d)/g, '\\$');
|
|
147
|
-
|
|
148
|
-
// Step 4: Restore LaTeX expressions
|
|
149
|
-
// content = content.replaceAll(
|
|
150
|
-
// /<<LATEX_(\d+)>>/g,
|
|
151
|
-
// (_, index) => latexExpressions[Number.parseInt(index)],
|
|
152
|
-
// );
|
|
153
|
-
|
|
154
|
-
// // Step 5: Restore code blocks
|
|
155
|
-
// content = content.replaceAll(
|
|
156
|
-
// /<<CODE_BLOCK_(\d+)>>/g,
|
|
157
|
-
// (_, index) => codeBlocks[Number.parseInt(index)],
|
|
158
|
-
// );
|
|
159
|
-
var content = str;
|
|
160
|
-
|
|
161
|
-
// Step 6: Apply additional escaping functions
|
|
162
|
-
// Escape currency dollar signs FIRST before other LaTeX processing
|
|
163
|
-
content = escapeCurrencyDollars(content);
|
|
164
|
-
content = convertLatexDelimiters(content);
|
|
165
|
-
content = escapeMhchemCommands(content);
|
|
166
|
-
content = escapeLatexPipes(content);
|
|
167
|
-
content = escapeTextUnderscores(content);
|
|
168
|
-
return content;
|
|
169
|
-
}
|
|
190
|
+
// Old simple preprocessLaTeX has been replaced by the comprehensive version below
|
|
191
|
+
// The new preprocessLaTeX provides the same default behavior with optional advanced featuresgit
|
|
170
192
|
|
|
171
193
|
/**
|
|
172
194
|
* Extracts the LaTeX formula after the last $$ delimiter if there's an odd number of $$ delimiters.
|
|
@@ -212,4 +234,294 @@ export var isLastFormulaRenderable = function isLastFormulaRenderable(text) {
|
|
|
212
234
|
console.log("LaTeX formula rendering error: ".concat(error));
|
|
213
235
|
return false;
|
|
214
236
|
}
|
|
215
|
-
};
|
|
237
|
+
};
|
|
238
|
+
|
|
239
|
+
// ============================================================================
|
|
240
|
+
// Advanced Preprocessing Functions
|
|
241
|
+
// ============================================================================
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* Fixes common LaTeX syntax errors automatically
|
|
245
|
+
* - Balances unmatched braces
|
|
246
|
+
* - Balances \left and \right delimiters
|
|
247
|
+
*
|
|
248
|
+
* @param text The input string containing LaTeX expressions
|
|
249
|
+
* @returns The string with fixed LaTeX expressions
|
|
250
|
+
*/
|
|
251
|
+
export function fixCommonLaTeXErrors(text) {
|
|
252
|
+
return text.replaceAll(/(\$\$[\S\s]*?\$\$|\$[\S\s]*?\$)/g, function (match) {
|
|
253
|
+
var fixed = match;
|
|
254
|
+
|
|
255
|
+
// Fix unbalanced braces
|
|
256
|
+
var openBraces = (fixed.match(/(?<!\\){/g) || []).length;
|
|
257
|
+
var closeBraces = (fixed.match(/(?<!\\)}/g) || []).length;
|
|
258
|
+
if (openBraces > closeBraces) {
|
|
259
|
+
var diff = openBraces - closeBraces;
|
|
260
|
+
var closingBraces = '}'.repeat(diff);
|
|
261
|
+
// Insert before the closing delimiter
|
|
262
|
+
fixed = fixed.replace(/(\$\$?)$/, closingBraces + '$1');
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
// Fix unbalanced \left and \right
|
|
266
|
+
var leftDelims = (fixed.match(/\\left[(.<[{|]/g) || []).length;
|
|
267
|
+
var rightDelims = (fixed.match(/\\right[).>\]|}]/g) || []).length;
|
|
268
|
+
if (leftDelims > rightDelims) {
|
|
269
|
+
var _diff = leftDelims - rightDelims;
|
|
270
|
+
var rightDots = '\\right.'.repeat(_diff);
|
|
271
|
+
fixed = fixed.replace(/(\$\$?)$/, rightDots + '$1');
|
|
272
|
+
}
|
|
273
|
+
return fixed;
|
|
274
|
+
});
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
/**
|
|
278
|
+
* Normalizes whitespace in LaTeX expressions
|
|
279
|
+
* - Removes extra spaces around $ delimiters
|
|
280
|
+
* - Normalizes multiple spaces to single space inside formulas
|
|
281
|
+
*
|
|
282
|
+
* @param text The input string containing LaTeX expressions
|
|
283
|
+
* @returns The string with normalized whitespace
|
|
284
|
+
*/
|
|
285
|
+
export function normalizeLatexSpacing(text) {
|
|
286
|
+
var result = text;
|
|
287
|
+
|
|
288
|
+
// Remove spaces inside $ delimiters (at the edges)
|
|
289
|
+
result = result.replaceAll(/\$\s+/g, '$');
|
|
290
|
+
result = result.replaceAll(/\s+\$/g, '$');
|
|
291
|
+
result = result.replaceAll(/\$\$\s+/g, '$$');
|
|
292
|
+
result = result.replaceAll(/\s+\$\$/g, '$$');
|
|
293
|
+
|
|
294
|
+
// Normalize multiple spaces inside formulas to single space
|
|
295
|
+
result = result.replaceAll(/(\$\$[\S\s]*?\$\$|\$[\S\s]*?\$)/g, function (match) {
|
|
296
|
+
return match.replaceAll(/\s{2,}/g, ' ');
|
|
297
|
+
});
|
|
298
|
+
return result;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
/**
|
|
302
|
+
* Validates all LaTeX expressions in the text
|
|
303
|
+
* Returns detailed information about validation results
|
|
304
|
+
*
|
|
305
|
+
* @param text The input string containing LaTeX expressions
|
|
306
|
+
* @returns Validation results with errors if any
|
|
307
|
+
*/
|
|
308
|
+
export function validateLatexExpressions(text) {
|
|
309
|
+
var errors = [];
|
|
310
|
+
var totalExpressions = 0;
|
|
311
|
+
var pattern = /\$\$([\S\s]*?)\$\$|(?<!\\)\$(?!\$)([\S\s]*?)(?<!\\)\$(?!\$)/g;
|
|
312
|
+
var match;
|
|
313
|
+
while ((match = pattern.exec(text)) !== null) {
|
|
314
|
+
totalExpressions++;
|
|
315
|
+
var formula = match[1] || match[2];
|
|
316
|
+
var isDisplay = match[0].startsWith('$$');
|
|
317
|
+
try {
|
|
318
|
+
renderToString(formula, {
|
|
319
|
+
displayMode: isDisplay,
|
|
320
|
+
strict: 'warn',
|
|
321
|
+
throwOnError: true,
|
|
322
|
+
trust: false
|
|
323
|
+
});
|
|
324
|
+
} catch (error) {
|
|
325
|
+
errors.push({
|
|
326
|
+
formula: formula.slice(0, 50) + (formula.length > 50 ? '...' : ''),
|
|
327
|
+
message: error instanceof Error ? error.message : String(error),
|
|
328
|
+
position: match.index,
|
|
329
|
+
type: isDisplay ? 'display' : 'inline'
|
|
330
|
+
});
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
return {
|
|
334
|
+
errors: errors,
|
|
335
|
+
totalExpressions: totalExpressions,
|
|
336
|
+
valid: errors.length === 0
|
|
337
|
+
};
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
/**
|
|
341
|
+
* Handles CJK (Chinese, Japanese, Korean) characters mixed with LaTeX
|
|
342
|
+
* Optionally adds spaces between CJK characters and LaTeX expressions for better rendering
|
|
343
|
+
*
|
|
344
|
+
* @param text The input string
|
|
345
|
+
* @param addSpaces Whether to add spaces between CJK and LaTeX (default: false)
|
|
346
|
+
* @returns The processed string
|
|
347
|
+
*/
|
|
348
|
+
export function handleCJKWithLatex(text) {
|
|
349
|
+
var addSpaces = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : false;
|
|
350
|
+
if (!addSpaces) return text;
|
|
351
|
+
var result = text;
|
|
352
|
+
|
|
353
|
+
// Add space between CJK character and opening $
|
|
354
|
+
result = result.replaceAll(/([\u3040-\u30FF\u4E00-\u9FA5])(\$)/g, '$1 $2');
|
|
355
|
+
|
|
356
|
+
// Add space between closing $ and CJK character
|
|
357
|
+
result = result.replaceAll(/(\$)([\u3040-\u30FF\u4E00-\u9FA5])/g, '$1 $2');
|
|
358
|
+
return result;
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
// ============================================================================
|
|
362
|
+
// Advanced Preprocessing Options
|
|
363
|
+
// ============================================================================
|
|
364
|
+
|
|
365
|
+
/**
|
|
366
|
+
* Comprehensive LaTeX preprocessing with configurable options
|
|
367
|
+
*
|
|
368
|
+
* This is the main preprocessing function that handles:
|
|
369
|
+
* - Currency symbol escaping (e.g., $20 → \$20)
|
|
370
|
+
* - LaTeX delimiter conversion (\[...\] → $$...$$)
|
|
371
|
+
* - Special character escaping (pipes, underscores, mhchem)
|
|
372
|
+
* - Optional error fixing and validation
|
|
373
|
+
* - Optional CJK character handling
|
|
374
|
+
*
|
|
375
|
+
* @param text The input string containing LaTeX and Markdown
|
|
376
|
+
* @param options Configuration options for fine-grained control
|
|
377
|
+
* @returns The preprocessed string
|
|
378
|
+
*
|
|
379
|
+
* @example
|
|
380
|
+
* ```ts
|
|
381
|
+
* // Default behavior (same as old preprocessLaTeX)
|
|
382
|
+
* preprocessLaTeX('向量$90^\\circ$,非 $0^\\circ$ 和 $180^\\circ$')
|
|
383
|
+
*
|
|
384
|
+
* // With custom options
|
|
385
|
+
* preprocessLaTeX(text, {
|
|
386
|
+
* fixErrors: true,
|
|
387
|
+
* validate: true,
|
|
388
|
+
* handleCJK: true
|
|
389
|
+
* })
|
|
390
|
+
* ```
|
|
391
|
+
*/
|
|
392
|
+
export function preprocessLaTeX(text) {
|
|
393
|
+
var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
|
|
394
|
+
var _options$addCJKSpaces = options.addCJKSpaces,
|
|
395
|
+
addCJKSpaces = _options$addCJKSpaces === void 0 ? false : _options$addCJKSpaces,
|
|
396
|
+
_options$convertBrack = options.convertBrackets,
|
|
397
|
+
convertBrackets = _options$convertBrack === void 0 ? true : _options$convertBrack,
|
|
398
|
+
_options$escapeCurren = options.escapeCurrency,
|
|
399
|
+
escapeCurrency = _options$escapeCurren === void 0 ? true : _options$escapeCurren,
|
|
400
|
+
_options$escapeMhchem = options.escapeMhchem,
|
|
401
|
+
escapeMhchem = _options$escapeMhchem === void 0 ? true : _options$escapeMhchem,
|
|
402
|
+
_options$escapePipes = options.escapePipes,
|
|
403
|
+
escapePipes = _options$escapePipes === void 0 ? true : _options$escapePipes,
|
|
404
|
+
_options$escapeUnders = options.escapeUnderscores,
|
|
405
|
+
escapeUnderscores = _options$escapeUnders === void 0 ? true : _options$escapeUnders,
|
|
406
|
+
_options$fixErrors = options.fixErrors,
|
|
407
|
+
fixErrors = _options$fixErrors === void 0 ? false : _options$fixErrors,
|
|
408
|
+
_options$handleCJK = options.handleCJK,
|
|
409
|
+
handleCJK = _options$handleCJK === void 0 ? false : _options$handleCJK,
|
|
410
|
+
_options$normalizeSpa = options.normalizeSpacing,
|
|
411
|
+
normalizeSpacing = _options$normalizeSpa === void 0 ? false : _options$normalizeSpa,
|
|
412
|
+
_options$throwOnValid = options.throwOnValidationError,
|
|
413
|
+
throwOnValidationError = _options$throwOnValid === void 0 ? false : _options$throwOnValid,
|
|
414
|
+
_options$validate = options.validate,
|
|
415
|
+
validate = _options$validate === void 0 ? false : _options$validate;
|
|
416
|
+
var content = text;
|
|
417
|
+
|
|
418
|
+
// Phase 1: Currency escaping (if enabled)
|
|
419
|
+
if (escapeCurrency) {
|
|
420
|
+
content = escapeCurrencyDollars(content);
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
// Phase 2: Bracket conversion (if enabled)
|
|
424
|
+
if (convertBrackets) {
|
|
425
|
+
content = convertLatexDelimiters(content);
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
// Phase 3: LaTeX-specific escaping
|
|
429
|
+
if (escapeMhchem) {
|
|
430
|
+
content = escapeMhchemCommands(content);
|
|
431
|
+
}
|
|
432
|
+
if (escapePipes) {
|
|
433
|
+
content = escapeLatexPipes(content);
|
|
434
|
+
}
|
|
435
|
+
if (escapeUnderscores) {
|
|
436
|
+
content = escapeTextUnderscores(content);
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
// Phase 4: Error fixing (if enabled)
|
|
440
|
+
if (fixErrors) {
|
|
441
|
+
content = fixCommonLaTeXErrors(content);
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
// Phase 5: Whitespace normalization (if enabled)
|
|
445
|
+
if (normalizeSpacing) {
|
|
446
|
+
content = normalizeLatexSpacing(content);
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
// Phase 6: CJK handling (if enabled)
|
|
450
|
+
if (handleCJK) {
|
|
451
|
+
content = handleCJKWithLatex(content, addCJKSpaces);
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
// Phase 7: Validation (if enabled)
|
|
455
|
+
if (validate) {
|
|
456
|
+
var validation = validateLatexExpressions(content);
|
|
457
|
+
if (!validation.valid) {
|
|
458
|
+
var errorMessage = "LaTeX validation failed (".concat(validation.errors.length, "/").concat(validation.totalExpressions, " expressions have errors):\n").concat(validation.errors.map(function (e) {
|
|
459
|
+
return " - [".concat(e.type, "] at position ").concat(e.position, ": ").concat(e.message, "\n Formula: ").concat(e.formula);
|
|
460
|
+
}).join('\n'));
|
|
461
|
+
if (throwOnValidationError) {
|
|
462
|
+
throw new Error(errorMessage);
|
|
463
|
+
} else {
|
|
464
|
+
console.warn(errorMessage);
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
return content;
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
/**
|
|
472
|
+
* Strict preprocessing mode - enables all safety features and validations
|
|
473
|
+
* Use this when you want maximum correctness and are willing to accept the performance cost
|
|
474
|
+
*
|
|
475
|
+
* @param text The input string
|
|
476
|
+
* @returns The preprocessed string with all features enabled
|
|
477
|
+
*
|
|
478
|
+
* @example
|
|
479
|
+
* ```ts
|
|
480
|
+
* const processed = preprocessLaTeXStrict(userInput)
|
|
481
|
+
* // Enables: error fixing, validation, CJK handling, space normalization
|
|
482
|
+
* ```
|
|
483
|
+
*/
|
|
484
|
+
export function preprocessLaTeXStrict(text) {
|
|
485
|
+
return preprocessLaTeX(text, {
|
|
486
|
+
addCJKSpaces: false,
|
|
487
|
+
// Usually don't want extra spaces
|
|
488
|
+
convertBrackets: true,
|
|
489
|
+
escapeCurrency: true,
|
|
490
|
+
escapeMhchem: true,
|
|
491
|
+
escapePipes: true,
|
|
492
|
+
escapeUnderscores: true,
|
|
493
|
+
fixErrors: true,
|
|
494
|
+
handleCJK: true,
|
|
495
|
+
normalizeSpacing: true,
|
|
496
|
+
throwOnValidationError: false,
|
|
497
|
+
// Warn but don't throw
|
|
498
|
+
validate: true
|
|
499
|
+
});
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
/**
|
|
503
|
+
* Minimal preprocessing mode - only essential operations
|
|
504
|
+
* Use this for better performance when you control the input
|
|
505
|
+
*
|
|
506
|
+
* @param text The input string
|
|
507
|
+
* @returns The preprocessed string with minimal processing
|
|
508
|
+
*
|
|
509
|
+
* @example
|
|
510
|
+
* ```ts
|
|
511
|
+
* const processed = preprocessLaTeXMinimal(trustedInput)
|
|
512
|
+
* // Only escapes currency and converts brackets
|
|
513
|
+
* ```
|
|
514
|
+
*/
|
|
515
|
+
export function preprocessLaTeXMinimal(text) {
|
|
516
|
+
return preprocessLaTeX(text, {
|
|
517
|
+
convertBrackets: true,
|
|
518
|
+
escapeCurrency: true,
|
|
519
|
+
escapeMhchem: false,
|
|
520
|
+
escapePipes: false,
|
|
521
|
+
escapeUnderscores: false,
|
|
522
|
+
fixErrors: false,
|
|
523
|
+
handleCJK: false,
|
|
524
|
+
normalizeSpacing: false,
|
|
525
|
+
validate: false
|
|
526
|
+
});
|
|
527
|
+
}
|