orbitchat 3.3.8 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/assets/{ChartRenderer-BtX7_jv5.js → ChartRenderer-f1NX6EiP.js} +2 -2
- package/dist/assets/{MermaidRenderer-DLpT9XPj.js → MermaidRenderer-CIRXKrUW.js} +6 -6
- package/dist/assets/{MusicRenderer-DZhuX52M.js → MusicRenderer-5IBeIdNL.js} +2 -2
- package/dist/assets/{SVGRenderer-CB5j7ekx.js → SVGRenderer-5N8JnME6.js} +1 -1
- package/dist/assets/{_basePickBy-KeSLCJM0.js → _basePickBy-DILSsB_e.js} +1 -1
- package/dist/assets/{_baseUniq-BFwLbgVF.js → _baseUniq-4bbGVp76.js} +1 -1
- package/dist/assets/{architectureDiagram-VXUJARFQ-Dhht8baZ.js → architectureDiagram-VXUJARFQ-eS3oy19C.js} +1 -1
- package/dist/assets/{blockDiagram-VD42YOAC-C0uY9SKW.js → blockDiagram-VD42YOAC-CgMmA0Ue.js} +1 -1
- package/dist/assets/{c4Diagram-YG6GDRKO-AGMRXqhN.js → c4Diagram-YG6GDRKO-BsfOyrLR.js} +1 -1
- package/dist/assets/channel-B3fOlVXE.js +1 -0
- package/dist/assets/{chunk-4BX2VUAB-FoAnG2DD.js → chunk-4BX2VUAB-BKhu7quX.js} +1 -1
- package/dist/assets/{chunk-55IACEB6-Cj902k47.js → chunk-55IACEB6-BgUfPqoK.js} +1 -1
- package/dist/assets/{chunk-B4BG7PRW-DZisX-Yn.js → chunk-B4BG7PRW-DpVrA_ZB.js} +1 -1
- package/dist/assets/{chunk-DI55MBZ5-YO8RmHLs.js → chunk-DI55MBZ5-B44ONHRe.js} +1 -1
- package/dist/assets/{chunk-FMBD7UC4-D8TmopkR.js → chunk-FMBD7UC4-d-tt_p3A.js} +1 -1
- package/dist/assets/{chunk-QN33PNHL-DDhJT2OP.js → chunk-QN33PNHL-BE0U4yBJ.js} +1 -1
- package/dist/assets/{chunk-QZHKN3VN-CCSqXR9J.js → chunk-QZHKN3VN-JmmqQWka.js} +1 -1
- package/dist/assets/{chunk-TZMSLE5B-DtWrsAau.js → chunk-TZMSLE5B-D7in22Ny.js} +1 -1
- package/dist/assets/classDiagram-2ON5EDUG-DeTrhMN7.js +1 -0
- package/dist/assets/classDiagram-v2-WZHVMYZB-DeTrhMN7.js +1 -0
- package/dist/assets/clone-pNtiM7wR.js +1 -0
- package/dist/assets/{cose-bilkent-S5V4N54A-Dp7UKE-6.js → cose-bilkent-S5V4N54A-CP8SWrs7.js} +1 -1
- package/dist/assets/{dagre-6UL2VRFP-NEctntTO.js → dagre-6UL2VRFP-DdpYz4eU.js} +1 -1
- package/dist/assets/{diagram-PSM6KHXK-Bdb-Z7Rq.js → diagram-PSM6KHXK-CwdCgT3a.js} +1 -1
- package/dist/assets/{diagram-QEK2KX5R-Cxi1cnQw.js → diagram-QEK2KX5R-BRLirpQY.js} +1 -1
- package/dist/assets/{diagram-S2PKOQOG-COTWuuiM.js → diagram-S2PKOQOG-nr9C1pGv.js} +1 -1
- package/dist/assets/{erDiagram-Q2GNP2WA-CgLq4-Sy.js → erDiagram-Q2GNP2WA-BQIeXFNJ.js} +1 -1
- package/dist/assets/{flowDiagram-NV44I4VS-CVdT6vYV.js → flowDiagram-NV44I4VS-ChHiFQ37.js} +1 -1
- package/dist/assets/{ganttDiagram-JELNMOA3-nAhYUBJC.js → ganttDiagram-JELNMOA3-CrRAOx2L.js} +1 -1
- package/dist/assets/{gitGraphDiagram-V2S2FVAM-cZq9sWZG.js → gitGraphDiagram-V2S2FVAM-DmTUGcrW.js} +1 -1
- package/dist/assets/{graph-CVfZ5ZRD.js → graph-DjPD_bIk.js} +1 -1
- package/dist/assets/{index-DuEkeKcS.js → index-D_smRcdi.js} +1 -1
- package/dist/assets/index-Dj2F_Aiy.css +1 -0
- package/dist/assets/{index-CmDt8-sd.js → index-Ds9gSdPm.js} +108 -109
- package/dist/assets/{infoDiagram-HS3SLOUP-DFQtcUA2.js → infoDiagram-HS3SLOUP-CLeOJpnJ.js} +1 -1
- package/dist/assets/{journeyDiagram-XKPGCS4Q-DzPJjseD.js → journeyDiagram-XKPGCS4Q-rQ891sAH.js} +1 -1
- package/dist/assets/{kanban-definition-3W4ZIXB7-BMPHlnkL.js → kanban-definition-3W4ZIXB7-DAeNInkK.js} +1 -1
- package/dist/assets/{layout-Cdwf2_35.js → layout-BS0Sgtez.js} +1 -1
- package/dist/assets/{mindmap-definition-VGOIOE7T-76g77fcj.js → mindmap-definition-VGOIOE7T-BUNcHei6.js} +1 -1
- package/dist/assets/{pieDiagram-ADFJNKIX-_2bZQVhp.js → pieDiagram-ADFJNKIX-DPsO9TW-.js} +1 -1
- package/dist/assets/{quadrantDiagram-AYHSOK5B-DHidw6CG.js → quadrantDiagram-AYHSOK5B-kZZSJ2T2.js} +1 -1
- package/dist/assets/{requirementDiagram-UZGBJVZJ-BosyfqW6.js → requirementDiagram-UZGBJVZJ-SNCvW_SH.js} +1 -1
- package/dist/assets/{sankeyDiagram-TZEHDZUN-kXQhuPRq.js → sankeyDiagram-TZEHDZUN-B_wuKnke.js} +1 -1
- package/dist/assets/{sequenceDiagram-WL72ISMW-CQCG4z68.js → sequenceDiagram-WL72ISMW-BOFPTCnf.js} +1 -1
- package/dist/assets/{stateDiagram-FKZM4ZOC-DUwTvhKc.js → stateDiagram-FKZM4ZOC-D7mkcmix.js} +1 -1
- package/dist/assets/stateDiagram-v2-4FDKWEC3-BkijA-km.js +1 -0
- package/dist/assets/{timeline-definition-IT6M3QCI-tUdTwV48.js → timeline-definition-IT6M3QCI-BTODA6yn.js} +1 -1
- package/dist/assets/treemap-GDKQZRPO-D_T4Lq-5.js +160 -0
- package/dist/assets/{xychartDiagram-PRI3JC2R-2ndTjhZS.js → xychartDiagram-PRI3JC2R-Da89GqP0.js} +1 -1
- package/dist/index.html +2 -2
- package/package.json +1 -9
- package/dist/assets/channel-D-yx-ubr.js +0 -1
- package/dist/assets/classDiagram-2ON5EDUG-BOVDq9sH.js +0 -1
- package/dist/assets/classDiagram-v2-WZHVMYZB-BOVDq9sH.js +0 -1
- package/dist/assets/clone-DOmxAX3a.js +0 -1
- package/dist/assets/index-Baf0NBsK.css +0 -1
- package/dist/assets/stateDiagram-v2-4FDKWEC3-Ccelj_jo.js +0 -1
- package/dist/assets/treemap-GDKQZRPO-AkKmBZRv.js +0 -160
- package/markdown-renderer/LICENSE +0 -201
- package/markdown-renderer/src/CodeBlock.tsx +0 -332
- package/markdown-renderer/src/MarkdownComponents.tsx +0 -233
- package/markdown-renderer/src/MarkdownStyles.css +0 -732
- package/markdown-renderer/src/css.d.ts +0 -4
- package/markdown-renderer/src/index.ts +0 -32
- package/markdown-renderer/src/preprocessing.ts +0 -519
- package/markdown-renderer/src/renderers/ChartRenderer.tsx +0 -1464
- package/markdown-renderer/src/renderers/MermaidRenderer.tsx +0 -474
- package/markdown-renderer/src/renderers/MusicRenderer.tsx +0 -394
- package/markdown-renderer/src/renderers/SVGRenderer.tsx +0 -307
- package/markdown-renderer/src/types.ts +0 -174
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
// Main entry point for the markdown-renderer package
|
|
2
|
-
import rawStyles from './MarkdownStyles.css?inline';
|
|
3
|
-
import './MarkdownStyles.css';
|
|
4
|
-
import 'katex/dist/katex.min.css';
|
|
5
|
-
import katexRawStyles from 'katex/dist/katex.min.css?inline';
|
|
6
|
-
|
|
7
|
-
const STYLE_TAG_ID = 'schmitech-markdown-renderer-styles';
|
|
8
|
-
|
|
9
|
-
const combinedStyles = `${katexRawStyles}\n${rawStyles}`;
|
|
10
|
-
|
|
11
|
-
const ensureStylesInjected = () => {
|
|
12
|
-
if (typeof document === 'undefined') return;
|
|
13
|
-
if (document.getElementById(STYLE_TAG_ID)) return;
|
|
14
|
-
|
|
15
|
-
const styleTag = document.createElement('style');
|
|
16
|
-
styleTag.id = STYLE_TAG_ID;
|
|
17
|
-
styleTag.textContent = combinedStyles;
|
|
18
|
-
document.head.appendChild(styleTag);
|
|
19
|
-
};
|
|
20
|
-
|
|
21
|
-
ensureStylesInjected();
|
|
22
|
-
export {
|
|
23
|
-
MarkdownRenderer,
|
|
24
|
-
MarkdownLink,
|
|
25
|
-
preprocessMarkdown,
|
|
26
|
-
containsMathNotation,
|
|
27
|
-
type MarkdownRendererProps
|
|
28
|
-
} from './MarkdownComponents';
|
|
29
|
-
|
|
30
|
-
// Export styles path for consumers who want to import separately
|
|
31
|
-
export const stylesPath = './MarkdownStyles.css';
|
|
32
|
-
export { ensureStylesInjected };
|
|
@@ -1,519 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Detect if content inside a code block is actually a markdown table that should be rendered
|
|
3
|
-
* as a GFM table rather than as code. This handles cases where LLMs wrap tables in code blocks.
|
|
4
|
-
*/
|
|
5
|
-
function isMarkdownTableContent(content: string): boolean {
|
|
6
|
-
const lines = content.trim().split('\n');
|
|
7
|
-
if (lines.length < 2) return false;
|
|
8
|
-
|
|
9
|
-
// Check if first line looks like a table header (starts with | and has multiple |)
|
|
10
|
-
const firstLine = lines[0].trim();
|
|
11
|
-
if (!firstLine.startsWith('|') || (firstLine.match(/\|/g) || []).length < 2) {
|
|
12
|
-
return false;
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
// Check if second line is a separator row (contains |, -, and optionally :)
|
|
16
|
-
const secondLine = lines[1].trim();
|
|
17
|
-
if (!secondLine.startsWith('|') || !/^[\s|:|-]+$/.test(secondLine) || !secondLine.includes('-')) {
|
|
18
|
-
return false;
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
// Check remaining lines also look like table rows
|
|
22
|
-
for (let i = 2; i < lines.length; i++) {
|
|
23
|
-
const line = lines[i].trim();
|
|
24
|
-
if (line === '') continue; // Allow empty lines
|
|
25
|
-
if (!line.startsWith('|')) return false;
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
return true;
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
/**
|
|
32
|
-
* Unwrap markdown tables that are incorrectly wrapped in fenced code blocks.
|
|
33
|
-
* LLMs sometimes wrap tables in ```...``` which prevents them from being rendered as GFM tables.
|
|
34
|
-
*/
|
|
35
|
-
function unwrapTablesFromCodeBlocks(src: string): string {
|
|
36
|
-
// Match fenced code blocks (``` or ~~~) without a language specifier or with empty language
|
|
37
|
-
// that contain what looks like a markdown table
|
|
38
|
-
return src.replace(
|
|
39
|
-
/(^|\n)(```|~~~)\s*\n([\s\S]*?)\n\2(\n|$)/g,
|
|
40
|
-
(match, prefix, _fence, content, suffix) => {
|
|
41
|
-
// Check if the content is a markdown table
|
|
42
|
-
if (isMarkdownTableContent(content)) {
|
|
43
|
-
// Unwrap the table - return it without the code fences
|
|
44
|
-
// Add blank lines around it to ensure proper GFM parsing
|
|
45
|
-
return `${prefix}\n${content.trim()}\n${suffix}`;
|
|
46
|
-
}
|
|
47
|
-
// Not a table, keep the code block as-is
|
|
48
|
-
return match;
|
|
49
|
-
}
|
|
50
|
-
);
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
/**
|
|
54
|
-
* Utility: mask segments that must be preserved verbatim (fenced & inline code, math blocks).
|
|
55
|
-
*/
|
|
56
|
-
function maskCodeSegments(src: string) {
|
|
57
|
-
const masks: Record<string, string> = {};
|
|
58
|
-
let i = 0;
|
|
59
|
-
|
|
60
|
-
// Mask display math blocks $$...$$ first (before code blocks to avoid conflicts)
|
|
61
|
-
// Handle both empty and non-empty math blocks
|
|
62
|
-
src = src.replace(/\$\$([\s\S]*?)\$\$/g, (_m, body) => {
|
|
63
|
-
const key = `__DISPLAY_MATH_${i++}__`;
|
|
64
|
-
// Preserve empty math blocks as-is (they'll be rendered by KaTeX)
|
|
65
|
-
masks[key] = `$$${body}$$`;
|
|
66
|
-
return key;
|
|
67
|
-
});
|
|
68
|
-
|
|
69
|
-
// Mask fenced code blocks ``` ``` and ~~~ ~~~
|
|
70
|
-
src = src.replace(/(^|\n)(```|~~~)([^\n]*)\n([\s\S]*?)\n\2(\n|$)/g, (_m, p1, fence, info, body, p5) => {
|
|
71
|
-
const key = `__FENCED_CODE_${i++}__`;
|
|
72
|
-
masks[key] = `${p1}${fence}${info}\n${body}\n${fence}${p5}`;
|
|
73
|
-
return key;
|
|
74
|
-
});
|
|
75
|
-
|
|
76
|
-
// Mask inline code `...`
|
|
77
|
-
src = src.replace(/`([^`]+)`/g, (_m) => {
|
|
78
|
-
const key = `__INLINE_CODE_${i++}__`;
|
|
79
|
-
masks[key] = _m;
|
|
80
|
-
return key;
|
|
81
|
-
});
|
|
82
|
-
|
|
83
|
-
return { masked: src, masks };
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
/**
|
|
87
|
-
* Mask inline math $...$ but only if it doesn't look like currency
|
|
88
|
-
*/
|
|
89
|
-
function maskInlineMath(src: string) {
|
|
90
|
-
const masks: Record<string, string> = {};
|
|
91
|
-
let i = 0;
|
|
92
|
-
|
|
93
|
-
// Mask inline math $...$ (but not $$...$$ which are already masked)
|
|
94
|
-
// Only mask if it doesn't look like currency (must be ONLY a number, not starting with a number)
|
|
95
|
-
src = src.replace(/(?<!\$)\$(?!\$)([^$\n]+?)(?<!\$)\$(?!\$)/g, (_m, body) => {
|
|
96
|
-
const trimmed = body.trim();
|
|
97
|
-
// Don't mask if it looks like currency (must be ONLY digits, commas, decimals, and optional suffixes)
|
|
98
|
-
// This regex matches the full currency pattern: digits (with commas), optional decimals, optional k/m/b suffix
|
|
99
|
-
// It must match the ENTIRE string, not just the start
|
|
100
|
-
const looksLikeCurrency = /^-?\d{1,3}(?:,\d{3})*(?:\.\d+)?(?:\s?[KMBkmb]|[Kk]ilo|[Mm]illion|[Bb]illion)?$/.test(trimmed);
|
|
101
|
-
if (looksLikeCurrency) return _m;
|
|
102
|
-
|
|
103
|
-
// Fix: Double-escape backslashes followed by punctuation to prevent Markdown from consuming them
|
|
104
|
-
// e.g. \, -> \\, and \{ -> \\{
|
|
105
|
-
// We protect any backslash that isn't followed by a letter or digit
|
|
106
|
-
const fixedBody = body.replace(/\\([^A-Za-z0-9])/g, (_match: string, char: string) => {
|
|
107
|
-
if (char === '\\') return '\\\\\\\\';
|
|
108
|
-
return '\\\\' + char;
|
|
109
|
-
});
|
|
110
|
-
|
|
111
|
-
const key = `__INLINE_MATH_${i++}__`;
|
|
112
|
-
masks[key] = `$${fixedBody}$`;
|
|
113
|
-
return key;
|
|
114
|
-
});
|
|
115
|
-
|
|
116
|
-
return { masked: src, masks };
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
/**
|
|
120
|
-
* Ensure Markdown tables start on their own line even if the LLM placed them
|
|
121
|
-
* immediately after punctuation like ":" without a newline.
|
|
122
|
-
*/
|
|
123
|
-
function normalizeInlineTables(src: string) {
|
|
124
|
-
const lines = src.split('\n');
|
|
125
|
-
|
|
126
|
-
// First pass: handle tables on the same line as preceding text (e.g., "Table: | Col |")
|
|
127
|
-
for (let i = 0; i < lines.length - 1; i++) {
|
|
128
|
-
const line = lines[i];
|
|
129
|
-
const nextLine = lines[i + 1] ?? '';
|
|
130
|
-
if (!line || !nextLine) continue;
|
|
131
|
-
|
|
132
|
-
const nextTrim = nextLine.trim();
|
|
133
|
-
const looksLikeSeparator =
|
|
134
|
-
nextTrim.startsWith('|') &&
|
|
135
|
-
nextTrim.includes('-');
|
|
136
|
-
if (!looksLikeSeparator) continue;
|
|
137
|
-
|
|
138
|
-
const firstPipe = line.indexOf('|');
|
|
139
|
-
if (firstPipe <= 0) continue; // Already at start or no table detected
|
|
140
|
-
|
|
141
|
-
const prefix = line.slice(0, firstPipe);
|
|
142
|
-
if (!prefix || prefix.trim() === '') continue;
|
|
143
|
-
|
|
144
|
-
const trimmedPrefix = prefix.trim();
|
|
145
|
-
const prefixWithoutSpaces = trimmedPrefix.replace(/\s+/g, '');
|
|
146
|
-
const isBlockQuotePrefix = prefixWithoutSpaces !== '' && /^[>]+$/.test(prefixWithoutSpaces);
|
|
147
|
-
const isListPrefix = /^[-*+]\s*$/.test(trimmedPrefix);
|
|
148
|
-
const isOrderedListPrefix = /^\d+\.\s*$/.test(trimmedPrefix);
|
|
149
|
-
if (isBlockQuotePrefix || isListPrefix || isOrderedListPrefix) continue;
|
|
150
|
-
|
|
151
|
-
const tableHeader = line.slice(firstPipe);
|
|
152
|
-
const pipeCount = (tableHeader.match(/\|/g) || []).length;
|
|
153
|
-
if (pipeCount < 2) continue; // Need at least header + one column separator
|
|
154
|
-
|
|
155
|
-
const beforeLine = prefix.replace(/\s+$/, '');
|
|
156
|
-
const normalizedHeader = tableHeader.replace(/^\s+/, '');
|
|
157
|
-
if (!beforeLine) continue;
|
|
158
|
-
|
|
159
|
-
lines.splice(i, 1, beforeLine, normalizedHeader);
|
|
160
|
-
i++; // Skip past the newly inserted header line
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
// Second pass: ensure blank line before tables that start with | on their own line
|
|
164
|
-
// This handles cases like:
|
|
165
|
-
// some text
|
|
166
|
-
// | Header | Header |
|
|
167
|
-
// |--------|--------|
|
|
168
|
-
// Which need a blank line before the table for GFM to recognize it
|
|
169
|
-
const result: string[] = [];
|
|
170
|
-
for (let i = 0; i < lines.length; i++) {
|
|
171
|
-
const line = lines[i];
|
|
172
|
-
const lineTrim = line.trim();
|
|
173
|
-
const nextLine = lines[i + 1] ?? '';
|
|
174
|
-
const nextTrim = nextLine.trim();
|
|
175
|
-
|
|
176
|
-
// Check if this line is a table header row (starts with |, has multiple |, followed by separator row)
|
|
177
|
-
const isTableHeader =
|
|
178
|
-
lineTrim.startsWith('|') &&
|
|
179
|
-
(lineTrim.match(/\|/g) || []).length >= 2 &&
|
|
180
|
-
nextTrim.startsWith('|') &&
|
|
181
|
-
/^[\s|:-]+$/.test(nextTrim) &&
|
|
182
|
-
nextTrim.includes('-');
|
|
183
|
-
|
|
184
|
-
if (isTableHeader && result.length > 0) {
|
|
185
|
-
const prevLine = result[result.length - 1];
|
|
186
|
-
// If previous line is not empty and not already a blank line, insert one
|
|
187
|
-
if (prevLine.trim() !== '') {
|
|
188
|
-
result.push('');
|
|
189
|
-
}
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
result.push(line);
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
return result.join('\n');
|
|
196
|
-
}
|
|
197
|
-
|
|
198
|
-
const LATEX_ENVIRONMENTS = [
|
|
199
|
-
'aligned',
|
|
200
|
-
'align',
|
|
201
|
-
'align*',
|
|
202
|
-
'array',
|
|
203
|
-
'cases',
|
|
204
|
-
'pmatrix',
|
|
205
|
-
'bmatrix',
|
|
206
|
-
'vmatrix',
|
|
207
|
-
'Vmatrix',
|
|
208
|
-
'matrix',
|
|
209
|
-
'smallmatrix',
|
|
210
|
-
'gather',
|
|
211
|
-
'gather*',
|
|
212
|
-
'multline',
|
|
213
|
-
'multline*',
|
|
214
|
-
'split',
|
|
215
|
-
'flalign',
|
|
216
|
-
'flalign*',
|
|
217
|
-
'eqnarray',
|
|
218
|
-
'equation',
|
|
219
|
-
'equation*',
|
|
220
|
-
];
|
|
221
|
-
|
|
222
|
-
function createMaskPlaceholder(masks: Record<string, string>, prefix: string): string {
|
|
223
|
-
let index = 0;
|
|
224
|
-
let key = `__${prefix}_${index}__`;
|
|
225
|
-
while (Object.prototype.hasOwnProperty.call(masks, key)) {
|
|
226
|
-
index++;
|
|
227
|
-
key = `__${prefix}_${index}__`;
|
|
228
|
-
}
|
|
229
|
-
return key;
|
|
230
|
-
}
|
|
231
|
-
|
|
232
|
-
function wrapLatexEnvironments(src: string, masks: Record<string, string>) {
|
|
233
|
-
if (!LATEX_ENVIRONMENTS.length) return src;
|
|
234
|
-
const envPattern = LATEX_ENVIRONMENTS.join('|');
|
|
235
|
-
const regex = new RegExp(String.raw`\\begin\{(${envPattern})\}([\s\S]*?)\\end\{\1\}`, 'g');
|
|
236
|
-
|
|
237
|
-
return src.replace(regex, (match, _env, _body, offset, source) => {
|
|
238
|
-
const before = source.slice(0, offset);
|
|
239
|
-
const after = source.slice(offset + match.length);
|
|
240
|
-
|
|
241
|
-
// Check if already wrapped in display math ($$ or \[...\])
|
|
242
|
-
// Case 1: $$ or \[ immediately before (with optional whitespace)
|
|
243
|
-
const hasDisplayStart = /(\$\$|\\\[)\s*$/.test(before);
|
|
244
|
-
const hasDisplayEnd = /^\s*(\$\$|\\\])/.test(after);
|
|
245
|
-
|
|
246
|
-
if (hasDisplayStart && hasDisplayEnd) {
|
|
247
|
-
return match;
|
|
248
|
-
}
|
|
249
|
-
|
|
250
|
-
// Case 2: Check if we're anywhere inside \[...\] block
|
|
251
|
-
// Find the last \[ and \] before our position
|
|
252
|
-
const lastOpenBracket = before.lastIndexOf('\\[');
|
|
253
|
-
const lastCloseBracket = before.lastIndexOf('\\]');
|
|
254
|
-
const isInsideBracketMath = lastOpenBracket > lastCloseBracket && lastOpenBracket !== -1;
|
|
255
|
-
|
|
256
|
-
if (isInsideBracketMath) {
|
|
257
|
-
// Verify there's a closing \] after the environment
|
|
258
|
-
const hasClosingBracket = /\\]/.test(after);
|
|
259
|
-
if (hasClosingBracket) {
|
|
260
|
-
return match;
|
|
261
|
-
}
|
|
262
|
-
}
|
|
263
|
-
|
|
264
|
-
// Case 3: Check if inside display math $$...$$ that wasn't at boundary
|
|
265
|
-
// Find matching $$ pairs before our position
|
|
266
|
-
const displayMathPattern = /\$\$/g;
|
|
267
|
-
let displayCount = 0;
|
|
268
|
-
while (displayMathPattern.exec(before) !== null) {
|
|
269
|
-
displayCount++;
|
|
270
|
-
}
|
|
271
|
-
const isInsideDisplayMath = displayCount % 2 === 1;
|
|
272
|
-
|
|
273
|
-
if (isInsideDisplayMath) {
|
|
274
|
-
const hasClosingDisplay = /\$\$/.test(after);
|
|
275
|
-
if (hasClosingDisplay) {
|
|
276
|
-
return match;
|
|
277
|
-
}
|
|
278
|
-
}
|
|
279
|
-
|
|
280
|
-
// Case 4: Check if inside inline math ($...$)
|
|
281
|
-
// Remove $$ first to avoid double-counting, then count single $
|
|
282
|
-
const beforeWithoutDisplay = before.replace(/\$\$/g, '__DD__');
|
|
283
|
-
// Also remove escaped \$ signs
|
|
284
|
-
const beforeClean = beforeWithoutDisplay.replace(/\\\$/g, '__ES__');
|
|
285
|
-
// Count unescaped single $ signs
|
|
286
|
-
const dollarMatches = beforeClean.match(/\$/g) || [];
|
|
287
|
-
const isInsideInlineMath = dollarMatches.length % 2 === 1;
|
|
288
|
-
|
|
289
|
-
if (isInsideInlineMath) {
|
|
290
|
-
const afterWithoutDisplay = after.replace(/\$\$/g, '__DD__');
|
|
291
|
-
const afterClean = afterWithoutDisplay.replace(/\\\$/g, '__ES__');
|
|
292
|
-
const hasClosingDollar = /^[^$]*\$(?!\$)/.test(afterClean);
|
|
293
|
-
if (hasClosingDollar) {
|
|
294
|
-
// Already inside inline math - leave it alone for KaTeX to handle
|
|
295
|
-
return match;
|
|
296
|
-
}
|
|
297
|
-
}
|
|
298
|
-
|
|
299
|
-
const placeholder = createMaskPlaceholder(masks, 'LATEX_ENV');
|
|
300
|
-
const block = match.trim();
|
|
301
|
-
masks[placeholder] = `\n$$\n${block}\n$$\n`;
|
|
302
|
-
return placeholder;
|
|
303
|
-
});
|
|
304
|
-
}
|
|
305
|
-
|
|
306
|
-
function unmaskCodeSegments(src: string, masks: Record<string, string>) {
|
|
307
|
-
for (const [k, v] of Object.entries(masks)) {
|
|
308
|
-
const pattern = new RegExp(k.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'g');
|
|
309
|
-
src = src.replace(pattern, () => v);
|
|
310
|
-
}
|
|
311
|
-
return src;
|
|
312
|
-
}
|
|
313
|
-
|
|
314
|
-
/**
|
|
315
|
-
* Enhanced markdown preprocessing that handles both currency and math notation
|
|
316
|
-
* without clobbering each other.
|
|
317
|
-
*/
|
|
318
|
-
export const preprocessMarkdown = (content: string): string => {
|
|
319
|
-
if (!content || typeof content !== 'string') return '';
|
|
320
|
-
|
|
321
|
-
try {
|
|
322
|
-
// Normalize line endings
|
|
323
|
-
let processed = content.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
|
|
324
|
-
|
|
325
|
-
// 0a) Unwrap markdown tables that are incorrectly wrapped in code blocks
|
|
326
|
-
// LLMs sometimes wrap tables in ``` which prevents GFM table rendering
|
|
327
|
-
processed = unwrapTablesFromCodeBlocks(processed);
|
|
328
|
-
|
|
329
|
-
// 0b) Mask code blocks/inline code and display math FIRST so we never touch them during preprocessing
|
|
330
|
-
// This is critical for preserving $ symbols in Mermaid and other code blocks
|
|
331
|
-
const { masked, masks } = maskCodeSegments(processed);
|
|
332
|
-
processed = masked;
|
|
333
|
-
|
|
334
|
-
// Convert HTML <br> tags into newline characters so they render like real line breaks
|
|
335
|
-
processed = processed.replace(/<br\s*\/?>/gi, '\n');
|
|
336
|
-
|
|
337
|
-
// LLMs sometimes keep the first table row on the same line as preceding text (e.g. "Table: | Col |")
|
|
338
|
-
// ReactMarkdown expects the table to start on a fresh line, so split those constructs.
|
|
339
|
-
processed = normalizeInlineTables(processed);
|
|
340
|
-
|
|
341
|
-
// Wrap standalone LaTeX environments (aligned, cases, matrices, etc.) in display math fences
|
|
342
|
-
// so KaTeX can parse them reliably, but skip anything that's already wrapped.
|
|
343
|
-
processed = wrapLatexEnvironments(processed, masks);
|
|
344
|
-
|
|
345
|
-
// 0.5) Process currency BEFORE masking inline math to avoid conflicts
|
|
346
|
-
// Temporarily replace currency with placeholders
|
|
347
|
-
const currencyMap = new Map<string, string>();
|
|
348
|
-
let idx = 0;
|
|
349
|
-
|
|
350
|
-
// Range helper: replace ranges like $5-$10 or $5–$10 with placeholders for BOTH sides
|
|
351
|
-
// Currency pattern: must be followed by space, punctuation, end of string, or valid suffix (k/m/b)
|
|
352
|
-
// Must NOT be followed by a letter (unless it's a valid suffix)
|
|
353
|
-
const currencyCore = String.raw`-?\$\(?\d{1,3}(?:,\d{3})*(?:\.\d+)?\)?(?:\s?(?:[KMBkmb]|[Kk]ilo|[Mm]illion|[Bb]illion))?|\$-?\d+(?:\.\d+)?(?:\s?(?:[KMBkmb]|[Kk]ilo|[Mm]illion|[Bb]illion))?`;
|
|
354
|
-
const rangeRegex = new RegExp(
|
|
355
|
-
String.raw`(${currencyCore})(\s?[–-]\s?)(${currencyCore})`,
|
|
356
|
-
'g'
|
|
357
|
-
);
|
|
358
|
-
|
|
359
|
-
processed = processed.replace(rangeRegex, (_m, left, dash, right) => {
|
|
360
|
-
const lph = `__CURRENCY_${idx++}__`;
|
|
361
|
-
const rph = `__CURRENCY_${idx++}__`;
|
|
362
|
-
currencyMap.set(lph, left);
|
|
363
|
-
currencyMap.set(rph, right);
|
|
364
|
-
return `${lph}${dash}${rph}`;
|
|
365
|
-
});
|
|
366
|
-
|
|
367
|
-
// Single currency amounts - must be followed by space, punctuation, or end of string
|
|
368
|
-
// Use negative lookahead to ensure it's not followed by a letter (unless it's a valid suffix)
|
|
369
|
-
const singleCurrencyRegex = new RegExp(
|
|
370
|
-
String.raw`-?\$\(?\d{1,3}(?:,\d{3})*(?:\.\d+)?\)?(?:\s?(?:[KMBkmb]|[Kk]ilo|[Mm]illion|[Bb]illion))?(?!\w)|\$-?\d+(?:\.\d+)?(?:\s?(?:[KMBkmb]|[Kk]ilo|[Mm]illion|[Bb]illion))?(?!\w)`,
|
|
371
|
-
'g'
|
|
372
|
-
);
|
|
373
|
-
processed = processed.replace(singleCurrencyRegex, (match, offset, string) => {
|
|
374
|
-
// Double-check: if followed by a letter (and not a valid suffix), it's not currency
|
|
375
|
-
const afterMatch = string.substring(offset + match.length);
|
|
376
|
-
if (afterMatch.match(/^[a-zA-Z]/) && !match.match(/[KMBkmb]|[Kk]ilo|[Mm]illion|[Bb]illion$/)) {
|
|
377
|
-
return match; // Don't replace, it's probably part of math like $10n$
|
|
378
|
-
}
|
|
379
|
-
const ph = `__CURRENCY_${idx++}__`;
|
|
380
|
-
currencyMap.set(ph, match);
|
|
381
|
-
return ph;
|
|
382
|
-
});
|
|
383
|
-
|
|
384
|
-
// Now mask inline math (currency is already protected)
|
|
385
|
-
const { masked: mathMasked, masks: mathMasks } = maskInlineMath(processed);
|
|
386
|
-
processed = mathMasked;
|
|
387
|
-
// Merge math masks into main masks
|
|
388
|
-
Object.assign(masks, mathMasks);
|
|
389
|
-
|
|
390
|
-
// Auto-detect and wrap common math patterns that might not have delimiters
|
|
391
|
-
// This helps catch expressions like "x^2 + y^2 = z^2" and wrap them properly
|
|
392
|
-
const mathPatterns = [
|
|
393
|
-
// Equations with equals sign and math operators
|
|
394
|
-
/(?:^|\s)([a-zA-Z0-9]+\s*[\^_]\s*[a-zA-Z0-9{}]+(?:\s*[+\-*/]\s*[a-zA-Z0-9]+\s*[\^_]\s*[a-zA-Z0-9{}]+)*\s*=\s*[^$\n]+)(?:\s|$)/g,
|
|
395
|
-
// Fractions not already wrapped
|
|
396
|
-
/(?:^|\s)(\\frac\{[^}]+\}\{[^}]+\})(?:\s|$)/g,
|
|
397
|
-
// Square roots, integrals, sums not already wrapped
|
|
398
|
-
/(?:^|\s)(\\(?:sqrt|int|sum|prod|lim|log|ln|sin|cos|tan|exp)\b[^$\n]{0,50})(?:\s|$)/g,
|
|
399
|
-
// Chemical formulas (e.g., H2O, CO2, Ca(OH)2)
|
|
400
|
-
/(?:^|\s)([A-Z][a-z]?(?:\d+)?(?:\([A-Z][a-z]?(?:\d+)?\))?(?:\d+)?(?:[+-]\d*)?)+(?:\s|$)/g,
|
|
401
|
-
];
|
|
402
|
-
|
|
403
|
-
// Wrap detected patterns in $ delimiters if not already wrapped
|
|
404
|
-
mathPatterns.forEach(pattern => {
|
|
405
|
-
processed = processed.replace(pattern, (match, expr) => {
|
|
406
|
-
// Check if already wrapped in $ or $$
|
|
407
|
-
if (match.includes('$')) return match;
|
|
408
|
-
// Avoid wrapping single-letter words like "I" that are not math
|
|
409
|
-
const trimmed = String(expr ?? '').trim();
|
|
410
|
-
if (/^[A-Za-z]$/.test(trimmed)) return match;
|
|
411
|
-
|
|
412
|
-
// Heuristics: only auto-wrap if it clearly looks like math or chemistry
|
|
413
|
-
const looksLikeMath = /[\\^_+=<>]|\\b(?:frac|sqrt|sum|int|lim|log|ln|sin|cos|tan|exp)\b/.test(trimmed);
|
|
414
|
-
const hasDigit = /\d/.test(trimmed);
|
|
415
|
-
const hasParens = /[()]/.test(trimmed);
|
|
416
|
-
const uppercaseCount = (trimmed.match(/[A-Z]/g) || []).length;
|
|
417
|
-
const lowercaseCount = (trimmed.match(/[a-z]/g) || []).length;
|
|
418
|
-
const hasTwoElementTokens = uppercaseCount >= 2; // e.g., NaCl, CO2 (with digits handled separately)
|
|
419
|
-
|
|
420
|
-
const looksLikeChemistry = hasDigit || hasParens || (hasTwoElementTokens && lowercaseCount > 0);
|
|
421
|
-
|
|
422
|
-
if (!looksLikeMath && !looksLikeChemistry) return match;
|
|
423
|
-
|
|
424
|
-
return match.replace(expr, `$${expr}$`);
|
|
425
|
-
});
|
|
426
|
-
});
|
|
427
|
-
|
|
428
|
-
// 1) Normalize LaTeX delimiters to markdown-math friendly forms
|
|
429
|
-
// \[...\] -> $$...$$ and \(...\) -> $...$
|
|
430
|
-
processed = processed.replace(/\\\[([\s\S]*?)\\\]/g, (_m, p1) => `\n$$${p1}$$\n`);
|
|
431
|
-
processed = processed.replace(/\\\(([\s\S]*?)\\\)/g, (_m, p1) => `$${p1}$`);
|
|
432
|
-
|
|
433
|
-
// 2) Protect stray $ that aren't math (e.g., isolated dollar signs in prose)
|
|
434
|
-
// If we see $word$ that doesn't look like math, escape both sides.
|
|
435
|
-
// Skip if it's already a placeholder (currency or math)
|
|
436
|
-
processed = processed.replace(
|
|
437
|
-
/(?<!\\)\$(?!\$)([^$\n]*?)(?<!\\)\$(?!\$)/g,
|
|
438
|
-
(m, inner) => {
|
|
439
|
-
// Skip if this is already a placeholder
|
|
440
|
-
if (m.includes('__CURRENCY_') || m.includes('__INLINE_MATH_') || m.includes('__DISPLAY_MATH_')) {
|
|
441
|
-
return m;
|
|
442
|
-
}
|
|
443
|
-
|
|
444
|
-
// Handle empty math blocks - leave them as-is (KaTeX will handle them)
|
|
445
|
-
if (inner.trim() === '') {
|
|
446
|
-
return m;
|
|
447
|
-
}
|
|
448
|
-
|
|
449
|
-
// Much more aggressive math detection - assume math unless it's clearly currency
|
|
450
|
-
const isLikelyCurrency = /^\d+(?:,\d{3})*(?:\.\d{2})?$/.test(inner.trim());
|
|
451
|
-
const hasBackslash = /\\/.test(inner);
|
|
452
|
-
const hasMathOperators = /[+\-*/=<>^_{}()]/.test(inner);
|
|
453
|
-
const hasLettersAndNumbers = /[a-zA-Z].*\d|\d.*[a-zA-Z]/.test(inner);
|
|
454
|
-
const hasGreekLetters = /\\(?:alpha|beta|gamma|delta|epsilon|theta|lambda|mu|pi|sigma|omega)/.test(inner);
|
|
455
|
-
const hasMathFunctions = /\\(?:frac|sqrt|sum|int|lim|log|ln|sin|cos|tan|exp)/.test(inner);
|
|
456
|
-
const isSingleLetterVariable = /^[A-Za-z]$/.test(inner.trim());
|
|
457
|
-
|
|
458
|
-
// It's probably math if it has any math-like characteristics
|
|
459
|
-
const isProbablyMath = !isLikelyCurrency && (
|
|
460
|
-
hasBackslash ||
|
|
461
|
-
hasMathOperators ||
|
|
462
|
-
hasLettersAndNumbers ||
|
|
463
|
-
hasGreekLetters ||
|
|
464
|
-
hasMathFunctions ||
|
|
465
|
-
isSingleLetterVariable ||
|
|
466
|
-
inner.length > 1 // Single characters are likely variables unless clearly non-math
|
|
467
|
-
);
|
|
468
|
-
|
|
469
|
-
if (isProbablyMath) return m;
|
|
470
|
-
return `\\$${inner}\\$`;
|
|
471
|
-
}
|
|
472
|
-
);
|
|
473
|
-
|
|
474
|
-
// 2.5) Escape stray double-dollar markers that appear inline with text and have no closing pair
|
|
475
|
-
processed = processed
|
|
476
|
-
.split('\n')
|
|
477
|
-
.map((line) => {
|
|
478
|
-
const firstIndex = line.indexOf('$$');
|
|
479
|
-
if (firstIndex === -1) return line;
|
|
480
|
-
if (line.trim() === '$$') return line; // display math fence on its own line
|
|
481
|
-
const secondIndex = line.indexOf('$$', firstIndex + 2);
|
|
482
|
-
if (secondIndex !== -1) return line; // already has a matching pair on the same line
|
|
483
|
-
return line.replace('$$', '\\$\\$');
|
|
484
|
-
})
|
|
485
|
-
.join('\n');
|
|
486
|
-
|
|
487
|
-
// 3) Restore currency placeholders but convert '$' into HTML entities so remark-math never
|
|
488
|
-
// interprets them as inline math delimiters. Strip any escaping slashes that users provided.
|
|
489
|
-
currencyMap.forEach((original, ph) => {
|
|
490
|
-
const entitySafe = original.replace(/\$/g, '$');
|
|
491
|
-
processed = processed.replace(new RegExp(ph.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'g'), entitySafe);
|
|
492
|
-
});
|
|
493
|
-
processed = processed.replace(/\\$/g, '$');
|
|
494
|
-
|
|
495
|
-
// 5) Unmask code segments
|
|
496
|
-
processed = unmaskCodeSegments(processed, masks);
|
|
497
|
-
|
|
498
|
-
// 6) Final tidy
|
|
499
|
-
processed = processed.trimEnd() + '\n';
|
|
500
|
-
return processed;
|
|
501
|
-
} catch (err) {
|
|
502
|
-
console.warn('Error preprocessing markdown:', err);
|
|
503
|
-
return content;
|
|
504
|
-
}
|
|
505
|
-
};
|
|
506
|
-
|
|
507
|
-
/**
|
|
508
|
-
* Utility: detect likely math without false positives from currency
|
|
509
|
-
*/
|
|
510
|
-
export const containsMathNotation = (text: string): boolean => {
|
|
511
|
-
const withoutCurrency = text.replace(/\$\s?\d+(?:,\d{3})*(?:\.\d+)?\b/gi, '');
|
|
512
|
-
const patterns = [
|
|
513
|
-
/\$\$[\s\S]+?\$\$/,
|
|
514
|
-
/(?<!\\)\$[^$\n]+?(?<!\\)\$/,
|
|
515
|
-
/\\\[[\s\S]+?\\\]/,
|
|
516
|
-
/\\\([^)]+?\\\)/,
|
|
517
|
-
];
|
|
518
|
-
return patterns.some((re) => re.test(withoutCurrency));
|
|
519
|
-
};
|