@next-ai-drawio/mcp-server 0.1.2 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +90 -36
- package/dist/history.d.ts +16 -0
- package/dist/history.d.ts.map +1 -0
- package/dist/history.js +48 -0
- package/dist/history.js.map +1 -0
- package/dist/http-server.d.ts +6 -19
- package/dist/http-server.d.ts.map +1 -1
- package/dist/http-server.js +329 -148
- package/dist/http-server.js.map +1 -1
- package/dist/index.js +89 -8
- package/dist/index.js.map +1 -1
- package/dist/xml-validation.d.ts +38 -0
- package/dist/xml-validation.d.ts.map +1 -0
- package/dist/xml-validation.js +785 -0
- package/dist/xml-validation.js.map +1 -0
- package/package.json +1 -1
- package/dist/api-client.d.ts +0 -30
- package/dist/api-client.d.ts.map +0 -1
- package/dist/api-client.js +0 -87
- package/dist/api-client.js.map +0 -1
- package/dist/session.d.ts +0 -59
- package/dist/session.d.ts.map +0 -1
- package/dist/session.js +0 -120
- package/dist/session.js.map +0 -1
|
@@ -0,0 +1,785 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* XML Validation and Auto-Fix for draw.io diagrams
|
|
3
|
+
* Copied from lib/utils.ts to avoid cross-package imports
|
|
4
|
+
*/
|
|
5
|
+
// ============================================================================
|
|
6
|
+
// Constants
|
|
7
|
+
// ============================================================================
|
|
8
|
+
/** Maximum XML size to process (1MB) - larger XMLs may cause performance issues */
|
|
9
|
+
const MAX_XML_SIZE = 1_000_000;
|
|
10
|
+
/** Maximum iterations for aggressive cell dropping to prevent infinite loops */
|
|
11
|
+
const MAX_DROP_ITERATIONS = 10;
|
|
12
|
+
/** Structural attributes that should not be duplicated in draw.io */
|
|
13
|
+
const STRUCTURAL_ATTRS = [
|
|
14
|
+
"edge",
|
|
15
|
+
"parent",
|
|
16
|
+
"source",
|
|
17
|
+
"target",
|
|
18
|
+
"vertex",
|
|
19
|
+
"connectable",
|
|
20
|
+
];
|
|
21
|
+
/** Valid XML entity names */
|
|
22
|
+
const VALID_ENTITIES = new Set(["lt", "gt", "amp", "quot", "apos"]);
|
|
23
|
+
/**
|
|
24
|
+
* Parse XML tags while properly handling quoted strings
|
|
25
|
+
*/
|
|
26
|
+
function parseXmlTags(xml) {
|
|
27
|
+
const tags = [];
|
|
28
|
+
let i = 0;
|
|
29
|
+
while (i < xml.length) {
|
|
30
|
+
const tagStart = xml.indexOf("<", i);
|
|
31
|
+
if (tagStart === -1)
|
|
32
|
+
break;
|
|
33
|
+
// Find matching > by tracking quotes
|
|
34
|
+
let tagEnd = tagStart + 1;
|
|
35
|
+
let inQuote = false;
|
|
36
|
+
let quoteChar = "";
|
|
37
|
+
while (tagEnd < xml.length) {
|
|
38
|
+
const c = xml[tagEnd];
|
|
39
|
+
if (inQuote) {
|
|
40
|
+
if (c === quoteChar)
|
|
41
|
+
inQuote = false;
|
|
42
|
+
}
|
|
43
|
+
else {
|
|
44
|
+
if (c === '"' || c === "'") {
|
|
45
|
+
inQuote = true;
|
|
46
|
+
quoteChar = c;
|
|
47
|
+
}
|
|
48
|
+
else if (c === ">") {
|
|
49
|
+
break;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
tagEnd++;
|
|
53
|
+
}
|
|
54
|
+
if (tagEnd >= xml.length)
|
|
55
|
+
break;
|
|
56
|
+
const tag = xml.substring(tagStart, tagEnd + 1);
|
|
57
|
+
i = tagEnd + 1;
|
|
58
|
+
const tagMatch = /^<(\/?)([a-zA-Z][a-zA-Z0-9:_-]*)/.exec(tag);
|
|
59
|
+
if (!tagMatch)
|
|
60
|
+
continue;
|
|
61
|
+
tags.push({
|
|
62
|
+
tag,
|
|
63
|
+
tagName: tagMatch[2],
|
|
64
|
+
isClosing: tagMatch[1] === "/",
|
|
65
|
+
isSelfClosing: tag.endsWith("/>"),
|
|
66
|
+
startIndex: tagStart,
|
|
67
|
+
endIndex: tagEnd,
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
return tags;
|
|
71
|
+
}
|
|
72
|
+
// ============================================================================
|
|
73
|
+
// Validation Helper Functions
|
|
74
|
+
// ============================================================================
|
|
75
|
+
/** Check for duplicate structural attributes in a tag */
|
|
76
|
+
function checkDuplicateAttributes(xml) {
|
|
77
|
+
const structuralSet = new Set(STRUCTURAL_ATTRS);
|
|
78
|
+
const tagPattern = /<[^>]+>/g;
|
|
79
|
+
let tagMatch;
|
|
80
|
+
while ((tagMatch = tagPattern.exec(xml)) !== null) {
|
|
81
|
+
const tag = tagMatch[0];
|
|
82
|
+
const attrPattern = /\s([a-zA-Z_:][a-zA-Z0-9_:.-]*)\s*=/g;
|
|
83
|
+
const attributes = new Map();
|
|
84
|
+
let attrMatch;
|
|
85
|
+
while ((attrMatch = attrPattern.exec(tag)) !== null) {
|
|
86
|
+
const attrName = attrMatch[1];
|
|
87
|
+
attributes.set(attrName, (attributes.get(attrName) || 0) + 1);
|
|
88
|
+
}
|
|
89
|
+
const duplicates = Array.from(attributes.entries())
|
|
90
|
+
.filter(([name, count]) => count > 1 && structuralSet.has(name))
|
|
91
|
+
.map(([name]) => name);
|
|
92
|
+
if (duplicates.length > 0) {
|
|
93
|
+
return `Invalid XML: Duplicate structural attribute(s): ${duplicates.join(", ")}. Remove duplicate attributes.`;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
return null;
|
|
97
|
+
}
|
|
98
|
+
/** Check for duplicate IDs in XML */
|
|
99
|
+
function checkDuplicateIds(xml) {
|
|
100
|
+
const idPattern = /\bid\s*=\s*["']([^"']+)["']/gi;
|
|
101
|
+
const ids = new Map();
|
|
102
|
+
let idMatch;
|
|
103
|
+
while ((idMatch = idPattern.exec(xml)) !== null) {
|
|
104
|
+
const id = idMatch[1];
|
|
105
|
+
ids.set(id, (ids.get(id) || 0) + 1);
|
|
106
|
+
}
|
|
107
|
+
const duplicateIds = Array.from(ids.entries())
|
|
108
|
+
.filter(([, count]) => count > 1)
|
|
109
|
+
.map(([id, count]) => `'${id}' (${count}x)`);
|
|
110
|
+
if (duplicateIds.length > 0) {
|
|
111
|
+
return `Invalid XML: Found duplicate ID(s): ${duplicateIds.slice(0, 3).join(", ")}. All id attributes must be unique.`;
|
|
112
|
+
}
|
|
113
|
+
return null;
|
|
114
|
+
}
|
|
115
|
+
/** Check for tag mismatches using parsed tags */
|
|
116
|
+
function checkTagMismatches(xml) {
|
|
117
|
+
const xmlWithoutComments = xml.replace(/<!--[\s\S]*?-->/g, "");
|
|
118
|
+
const tags = parseXmlTags(xmlWithoutComments);
|
|
119
|
+
const tagStack = [];
|
|
120
|
+
for (const { tagName, isClosing, isSelfClosing } of tags) {
|
|
121
|
+
if (isClosing) {
|
|
122
|
+
if (tagStack.length === 0) {
|
|
123
|
+
return `Invalid XML: Closing tag </${tagName}> without matching opening tag`;
|
|
124
|
+
}
|
|
125
|
+
const expected = tagStack.pop();
|
|
126
|
+
if (expected?.toLowerCase() !== tagName.toLowerCase()) {
|
|
127
|
+
return `Invalid XML: Expected closing tag </${expected}> but found </${tagName}>`;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
else if (!isSelfClosing) {
|
|
131
|
+
tagStack.push(tagName);
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
if (tagStack.length > 0) {
|
|
135
|
+
return `Invalid XML: Document has ${tagStack.length} unclosed tag(s): ${tagStack.join(", ")}`;
|
|
136
|
+
}
|
|
137
|
+
return null;
|
|
138
|
+
}
|
|
139
|
+
/** Check for invalid character references */
|
|
140
|
+
function checkCharacterReferences(xml) {
|
|
141
|
+
const charRefPattern = /&#x?[^;]+;?/g;
|
|
142
|
+
let charMatch;
|
|
143
|
+
while ((charMatch = charRefPattern.exec(xml)) !== null) {
|
|
144
|
+
const ref = charMatch[0];
|
|
145
|
+
if (ref.startsWith("&#x")) {
|
|
146
|
+
if (!ref.endsWith(";")) {
|
|
147
|
+
return `Invalid XML: Missing semicolon after hex reference: ${ref}`;
|
|
148
|
+
}
|
|
149
|
+
const hexDigits = ref.substring(3, ref.length - 1);
|
|
150
|
+
if (hexDigits.length === 0 || !/^[0-9a-fA-F]+$/.test(hexDigits)) {
|
|
151
|
+
return `Invalid XML: Invalid hex character reference: ${ref}`;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
else if (ref.startsWith("&#")) {
|
|
155
|
+
if (!ref.endsWith(";")) {
|
|
156
|
+
return `Invalid XML: Missing semicolon after decimal reference: ${ref}`;
|
|
157
|
+
}
|
|
158
|
+
const decDigits = ref.substring(2, ref.length - 1);
|
|
159
|
+
if (decDigits.length === 0 || !/^[0-9]+$/.test(decDigits)) {
|
|
160
|
+
return `Invalid XML: Invalid decimal character reference: ${ref}`;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
return null;
|
|
165
|
+
}
|
|
166
|
+
/** Check for invalid entity references */
|
|
167
|
+
function checkEntityReferences(xml) {
|
|
168
|
+
const xmlWithoutComments = xml.replace(/<!--[\s\S]*?-->/g, "");
|
|
169
|
+
const bareAmpPattern = /&(?!(?:lt|gt|amp|quot|apos|#))/g;
|
|
170
|
+
if (bareAmpPattern.test(xmlWithoutComments)) {
|
|
171
|
+
return "Invalid XML: Found unescaped & character(s). Replace & with &";
|
|
172
|
+
}
|
|
173
|
+
const invalidEntityPattern = /&([a-zA-Z][a-zA-Z0-9]*);/g;
|
|
174
|
+
let entityMatch;
|
|
175
|
+
while ((entityMatch = invalidEntityPattern.exec(xmlWithoutComments)) !== null) {
|
|
176
|
+
if (!VALID_ENTITIES.has(entityMatch[1])) {
|
|
177
|
+
return `Invalid XML: Invalid entity reference: &${entityMatch[1]}; - use only valid XML entities (lt, gt, amp, quot, apos)`;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
return null;
|
|
181
|
+
}
|
|
182
|
+
/** Check for nested mxCell tags using regex */
|
|
183
|
+
function checkNestedMxCells(xml) {
|
|
184
|
+
const cellTagPattern = /<\/?mxCell[^>]*>/g;
|
|
185
|
+
const cellStack = [];
|
|
186
|
+
let cellMatch;
|
|
187
|
+
while ((cellMatch = cellTagPattern.exec(xml)) !== null) {
|
|
188
|
+
const tag = cellMatch[0];
|
|
189
|
+
if (tag.startsWith("</mxCell>")) {
|
|
190
|
+
if (cellStack.length > 0)
|
|
191
|
+
cellStack.pop();
|
|
192
|
+
}
|
|
193
|
+
else if (!tag.endsWith("/>")) {
|
|
194
|
+
const isLabelOrGeometry = /\sas\s*=\s*["'](valueLabel|geometry)["']/.test(tag);
|
|
195
|
+
if (!isLabelOrGeometry) {
|
|
196
|
+
cellStack.push(cellMatch.index);
|
|
197
|
+
if (cellStack.length > 1) {
|
|
198
|
+
return "Invalid XML: Found nested mxCell tags. Cells should be siblings, not nested inside other mxCell elements.";
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
return null;
|
|
204
|
+
}
|
|
205
|
+
// ============================================================================
|
|
206
|
+
// Main Validation Function
|
|
207
|
+
// ============================================================================
|
|
208
|
+
/**
|
|
209
|
+
* Validates draw.io XML structure for common issues
|
|
210
|
+
* Uses DOM parsing + additional regex checks for high accuracy
|
|
211
|
+
* @param xml - The XML string to validate
|
|
212
|
+
* @returns null if valid, error message string if invalid
|
|
213
|
+
*/
|
|
214
|
+
export function validateMxCellStructure(xml) {
|
|
215
|
+
// Size check for performance
|
|
216
|
+
if (xml.length > MAX_XML_SIZE) {
|
|
217
|
+
console.warn(`[validateMxCellStructure] XML size (${xml.length}) exceeds ${MAX_XML_SIZE} bytes, may cause performance issues`);
|
|
218
|
+
}
|
|
219
|
+
// 0. First use DOM parser to catch syntax errors (most accurate)
|
|
220
|
+
try {
|
|
221
|
+
const parser = new DOMParser();
|
|
222
|
+
const doc = parser.parseFromString(xml, "text/xml");
|
|
223
|
+
const parseError = doc.querySelector("parsererror");
|
|
224
|
+
if (parseError) {
|
|
225
|
+
return `Invalid XML: The XML contains syntax errors (likely unescaped special characters like <, >, & in attribute values). Please escape special characters: use < for <, > for >, & for &, " for ". Regenerate the diagram with properly escaped values.`;
|
|
226
|
+
}
|
|
227
|
+
// DOM-based checks for nested mxCell
|
|
228
|
+
const allCells = doc.querySelectorAll("mxCell");
|
|
229
|
+
for (const cell of allCells) {
|
|
230
|
+
if (cell.parentElement?.tagName === "mxCell") {
|
|
231
|
+
const id = cell.getAttribute("id") || "unknown";
|
|
232
|
+
return `Invalid XML: Found nested mxCell (id="${id}"). Cells should be siblings, not nested inside other mxCell elements.`;
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
catch (error) {
|
|
237
|
+
console.warn("[validateMxCellStructure] DOMParser threw unexpected error, falling back to regex validation:", error);
|
|
238
|
+
}
|
|
239
|
+
// 1. Check for CDATA wrapper (invalid at document root)
|
|
240
|
+
if (/^\s*<!\[CDATA\[/.test(xml)) {
|
|
241
|
+
return "Invalid XML: XML is wrapped in CDATA section - remove <![CDATA[ from start and ]]> from end";
|
|
242
|
+
}
|
|
243
|
+
// 2. Check for duplicate structural attributes
|
|
244
|
+
const dupAttrError = checkDuplicateAttributes(xml);
|
|
245
|
+
if (dupAttrError) {
|
|
246
|
+
return dupAttrError;
|
|
247
|
+
}
|
|
248
|
+
// 3. Check for unescaped < in attribute values
|
|
249
|
+
const attrValuePattern = /=\s*"([^"]*)"/g;
|
|
250
|
+
let attrValMatch;
|
|
251
|
+
while ((attrValMatch = attrValuePattern.exec(xml)) !== null) {
|
|
252
|
+
const value = attrValMatch[1];
|
|
253
|
+
if (/</.test(value) && !/</.test(value)) {
|
|
254
|
+
return "Invalid XML: Unescaped < character in attribute values. Replace < with <";
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
// 4. Check for duplicate IDs
|
|
258
|
+
const dupIdError = checkDuplicateIds(xml);
|
|
259
|
+
if (dupIdError) {
|
|
260
|
+
return dupIdError;
|
|
261
|
+
}
|
|
262
|
+
// 5. Check for tag mismatches
|
|
263
|
+
const tagMismatchError = checkTagMismatches(xml);
|
|
264
|
+
if (tagMismatchError) {
|
|
265
|
+
return tagMismatchError;
|
|
266
|
+
}
|
|
267
|
+
// 6. Check invalid character references
|
|
268
|
+
const charRefError = checkCharacterReferences(xml);
|
|
269
|
+
if (charRefError) {
|
|
270
|
+
return charRefError;
|
|
271
|
+
}
|
|
272
|
+
// 7. Check for invalid comment syntax (-- inside comments)
|
|
273
|
+
const commentPattern = /<!--([\s\S]*?)-->/g;
|
|
274
|
+
let commentMatch;
|
|
275
|
+
while ((commentMatch = commentPattern.exec(xml)) !== null) {
|
|
276
|
+
if (/--/.test(commentMatch[1])) {
|
|
277
|
+
return "Invalid XML: Comment contains -- (double hyphen) which is not allowed";
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
// 8. Check for unescaped entity references and invalid entity names
|
|
281
|
+
const entityError = checkEntityReferences(xml);
|
|
282
|
+
if (entityError) {
|
|
283
|
+
return entityError;
|
|
284
|
+
}
|
|
285
|
+
// 9. Check for empty id attributes on mxCell
|
|
286
|
+
if (/<mxCell[^>]*\sid\s*=\s*["']\s*["'][^>]*>/g.test(xml)) {
|
|
287
|
+
return "Invalid XML: Found mxCell element(s) with empty id attribute";
|
|
288
|
+
}
|
|
289
|
+
// 10. Check for nested mxCell tags
|
|
290
|
+
const nestedCellError = checkNestedMxCells(xml);
|
|
291
|
+
if (nestedCellError) {
|
|
292
|
+
return nestedCellError;
|
|
293
|
+
}
|
|
294
|
+
return null;
|
|
295
|
+
}
|
|
296
|
+
// ============================================================================
|
|
297
|
+
// Auto-Fix Function
|
|
298
|
+
// ============================================================================
|
|
299
|
+
/**
|
|
300
|
+
* Attempts to auto-fix common XML issues in draw.io diagrams
|
|
301
|
+
* @param xml - The XML string to fix
|
|
302
|
+
* @returns Object with fixed XML and list of fixes applied
|
|
303
|
+
*/
|
|
304
|
+
export function autoFixXml(xml) {
|
|
305
|
+
let fixed = xml;
|
|
306
|
+
const fixes = [];
|
|
307
|
+
// 0. Fix JSON-escaped XML
|
|
308
|
+
if (/=\\"/.test(fixed)) {
|
|
309
|
+
fixed = fixed.replace(/\\"/g, '"');
|
|
310
|
+
fixed = fixed.replace(/\\n/g, "\n");
|
|
311
|
+
fixes.push("Fixed JSON-escaped XML");
|
|
312
|
+
}
|
|
313
|
+
// 1. Remove CDATA wrapper
|
|
314
|
+
if (/^\s*<!\[CDATA\[/.test(fixed)) {
|
|
315
|
+
fixed = fixed.replace(/^\s*<!\[CDATA\[/, "").replace(/\]\]>\s*$/, "");
|
|
316
|
+
fixes.push("Removed CDATA wrapper");
|
|
317
|
+
}
|
|
318
|
+
// 2. Remove text before XML declaration or root element
|
|
319
|
+
const xmlStart = fixed.search(/<(\?xml|mxGraphModel|mxfile)/i);
|
|
320
|
+
if (xmlStart > 0 && !/^<[a-zA-Z]/.test(fixed.trim())) {
|
|
321
|
+
fixed = fixed.substring(xmlStart);
|
|
322
|
+
fixes.push("Removed text before XML root");
|
|
323
|
+
}
|
|
324
|
+
// 3. Fix duplicate attributes
|
|
325
|
+
let dupAttrFixed = false;
|
|
326
|
+
fixed = fixed.replace(/<[^>]+>/g, (tag) => {
|
|
327
|
+
let newTag = tag;
|
|
328
|
+
for (const attr of STRUCTURAL_ATTRS) {
|
|
329
|
+
const attrRegex = new RegExp(`\\s${attr}\\s*=\\s*["'][^"']*["']`, "gi");
|
|
330
|
+
const matches = tag.match(attrRegex);
|
|
331
|
+
if (matches && matches.length > 1) {
|
|
332
|
+
let firstKept = false;
|
|
333
|
+
newTag = newTag.replace(attrRegex, (m) => {
|
|
334
|
+
if (!firstKept) {
|
|
335
|
+
firstKept = true;
|
|
336
|
+
return m;
|
|
337
|
+
}
|
|
338
|
+
dupAttrFixed = true;
|
|
339
|
+
return "";
|
|
340
|
+
});
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
return newTag;
|
|
344
|
+
});
|
|
345
|
+
if (dupAttrFixed) {
|
|
346
|
+
fixes.push("Removed duplicate structural attributes");
|
|
347
|
+
}
|
|
348
|
+
// 4. Fix unescaped & characters
|
|
349
|
+
const ampersandPattern = /&(?!(?:lt|gt|amp|quot|apos|#[0-9]+|#x[0-9a-fA-F]+);)/g;
|
|
350
|
+
if (ampersandPattern.test(fixed)) {
|
|
351
|
+
fixed = fixed.replace(/&(?!(?:lt|gt|amp|quot|apos|#[0-9]+|#x[0-9a-fA-F]+);)/g, "&");
|
|
352
|
+
fixes.push("Escaped unescaped & characters");
|
|
353
|
+
}
|
|
354
|
+
// 5. Fix invalid entity names (double-escaping)
|
|
355
|
+
const invalidEntities = [
|
|
356
|
+
{ pattern: /&quot;/g, replacement: """, name: "&quot;" },
|
|
357
|
+
{ pattern: /&lt;/g, replacement: "<", name: "&lt;" },
|
|
358
|
+
{ pattern: /&gt;/g, replacement: ">", name: "&gt;" },
|
|
359
|
+
{ pattern: /&apos;/g, replacement: "'", name: "&apos;" },
|
|
360
|
+
{ pattern: /&amp;/g, replacement: "&", name: "&amp;" },
|
|
361
|
+
];
|
|
362
|
+
for (const { pattern, replacement, name } of invalidEntities) {
|
|
363
|
+
if (pattern.test(fixed)) {
|
|
364
|
+
fixed = fixed.replace(pattern, replacement);
|
|
365
|
+
fixes.push(`Fixed double-escaped entity ${name}`);
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
// 6. Fix malformed attribute quotes
|
|
369
|
+
const malformedQuotePattern = /(\s[a-zA-Z][a-zA-Z0-9_:-]*)="/;
|
|
370
|
+
if (malformedQuotePattern.test(fixed)) {
|
|
371
|
+
fixed = fixed.replace(/(\s[a-zA-Z][a-zA-Z0-9_:-]*)="([^&]*?)"/g, '$1="$2"');
|
|
372
|
+
fixes.push("Fixed malformed attribute quotes");
|
|
373
|
+
}
|
|
374
|
+
// 7. Fix malformed closing tags
|
|
375
|
+
const malformedClosingTag = /<\/([a-zA-Z][a-zA-Z0-9]*)\s*\/>/g;
|
|
376
|
+
if (malformedClosingTag.test(fixed)) {
|
|
377
|
+
fixed = fixed.replace(/<\/([a-zA-Z][a-zA-Z0-9]*)\s*\/>/g, "</$1>");
|
|
378
|
+
fixes.push("Fixed malformed closing tags");
|
|
379
|
+
}
|
|
380
|
+
// 8. Fix missing space between attributes
|
|
381
|
+
const missingSpacePattern = /("[^"]*")([a-zA-Z][a-zA-Z0-9_:-]*=)/g;
|
|
382
|
+
if (missingSpacePattern.test(fixed)) {
|
|
383
|
+
fixed = fixed.replace(/("[^"]*")([a-zA-Z][a-zA-Z0-9_:-]*=)/g, "$1 $2");
|
|
384
|
+
fixes.push("Added missing space between attributes");
|
|
385
|
+
}
|
|
386
|
+
// 9. Fix unescaped quotes in style color values
|
|
387
|
+
const quotedColorPattern = /;([a-zA-Z]*[Cc]olor)="#/;
|
|
388
|
+
if (quotedColorPattern.test(fixed)) {
|
|
389
|
+
fixed = fixed.replace(/;([a-zA-Z]*[Cc]olor)="#/g, ";$1=#");
|
|
390
|
+
fixes.push("Removed quotes around color values in style");
|
|
391
|
+
}
|
|
392
|
+
// 10. Fix unescaped < in attribute values
|
|
393
|
+
const attrPattern = /(=\s*")([^"]*?)(<)([^"]*?)(")/g;
|
|
394
|
+
let attrMatch;
|
|
395
|
+
let hasUnescapedLt = false;
|
|
396
|
+
while ((attrMatch = attrPattern.exec(fixed)) !== null) {
|
|
397
|
+
if (!attrMatch[3].startsWith("<")) {
|
|
398
|
+
hasUnescapedLt = true;
|
|
399
|
+
break;
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
if (hasUnescapedLt) {
|
|
403
|
+
fixed = fixed.replace(/=\s*"([^"]*)"/g, (_match, value) => {
|
|
404
|
+
const escaped = value.replace(/</g, "<");
|
|
405
|
+
return `="${escaped}"`;
|
|
406
|
+
});
|
|
407
|
+
fixes.push("Escaped < characters in attribute values");
|
|
408
|
+
}
|
|
409
|
+
// 11. Fix invalid hex character references
|
|
410
|
+
const invalidHexRefs = [];
|
|
411
|
+
fixed = fixed.replace(/&#x([^;]*);/g, (match, hex) => {
|
|
412
|
+
if (/^[0-9a-fA-F]+$/.test(hex) && hex.length > 0) {
|
|
413
|
+
return match;
|
|
414
|
+
}
|
|
415
|
+
invalidHexRefs.push(match);
|
|
416
|
+
return "";
|
|
417
|
+
});
|
|
418
|
+
if (invalidHexRefs.length > 0) {
|
|
419
|
+
fixes.push(`Removed ${invalidHexRefs.length} invalid hex character reference(s)`);
|
|
420
|
+
}
|
|
421
|
+
// 12. Fix invalid decimal character references
|
|
422
|
+
const invalidDecRefs = [];
|
|
423
|
+
fixed = fixed.replace(/&#([^x][^;]*);/g, (match, dec) => {
|
|
424
|
+
if (/^[0-9]+$/.test(dec) && dec.length > 0) {
|
|
425
|
+
return match;
|
|
426
|
+
}
|
|
427
|
+
invalidDecRefs.push(match);
|
|
428
|
+
return "";
|
|
429
|
+
});
|
|
430
|
+
if (invalidDecRefs.length > 0) {
|
|
431
|
+
fixes.push(`Removed ${invalidDecRefs.length} invalid decimal character reference(s)`);
|
|
432
|
+
}
|
|
433
|
+
// 13. Fix invalid comment syntax
|
|
434
|
+
fixed = fixed.replace(/<!--([\s\S]*?)-->/g, (match, content) => {
|
|
435
|
+
if (/--/.test(content)) {
|
|
436
|
+
let fixedContent = content;
|
|
437
|
+
while (/--/.test(fixedContent)) {
|
|
438
|
+
fixedContent = fixedContent.replace(/--/g, "-");
|
|
439
|
+
}
|
|
440
|
+
fixes.push("Fixed invalid comment syntax");
|
|
441
|
+
return `<!--${fixedContent}-->`;
|
|
442
|
+
}
|
|
443
|
+
return match;
|
|
444
|
+
});
|
|
445
|
+
// 14. Fix <Cell> tags to <mxCell>
|
|
446
|
+
const hasCellTags = /<\/?Cell[\s>]/i.test(fixed);
|
|
447
|
+
if (hasCellTags) {
|
|
448
|
+
fixed = fixed.replace(/<Cell(\s)/gi, "<mxCell$1");
|
|
449
|
+
fixed = fixed.replace(/<Cell>/gi, "<mxCell>");
|
|
450
|
+
fixed = fixed.replace(/<\/Cell>/gi, "</mxCell>");
|
|
451
|
+
fixes.push("Fixed <Cell> tags to <mxCell>");
|
|
452
|
+
}
|
|
453
|
+
// 15. Fix common closing tag typos (MUST run before foreign tag removal)
|
|
454
|
+
const tagTypos = [
|
|
455
|
+
{ wrong: /<\/mxElement>/gi, right: "</mxCell>", name: "</mxElement>" },
|
|
456
|
+
{ wrong: /<\/mxcell>/g, right: "</mxCell>", name: "</mxcell>" },
|
|
457
|
+
{
|
|
458
|
+
wrong: /<\/mxgeometry>/g,
|
|
459
|
+
right: "</mxGeometry>",
|
|
460
|
+
name: "</mxgeometry>",
|
|
461
|
+
},
|
|
462
|
+
{ wrong: /<\/mxpoint>/g, right: "</mxPoint>", name: "</mxpoint>" },
|
|
463
|
+
{
|
|
464
|
+
wrong: /<\/mxgraphmodel>/gi,
|
|
465
|
+
right: "</mxGraphModel>",
|
|
466
|
+
name: "</mxgraphmodel>",
|
|
467
|
+
},
|
|
468
|
+
];
|
|
469
|
+
for (const { wrong, right, name } of tagTypos) {
|
|
470
|
+
const before = fixed;
|
|
471
|
+
fixed = fixed.replace(wrong, right);
|
|
472
|
+
if (fixed !== before) {
|
|
473
|
+
fixes.push(`Fixed typo ${name} to ${right}`);
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
// 16. Remove non-draw.io tags (after typo fixes so lowercase variants are fixed first)
|
|
477
|
+
const validDrawioTags = new Set([
|
|
478
|
+
"mxfile",
|
|
479
|
+
"diagram",
|
|
480
|
+
"mxGraphModel",
|
|
481
|
+
"root",
|
|
482
|
+
"mxCell",
|
|
483
|
+
"mxGeometry",
|
|
484
|
+
"mxPoint",
|
|
485
|
+
"Array",
|
|
486
|
+
"Object",
|
|
487
|
+
"mxRectangle",
|
|
488
|
+
]);
|
|
489
|
+
const foreignTagPattern = /<\/?([a-zA-Z][a-zA-Z0-9_]*)[^>]*>/g;
|
|
490
|
+
let foreignMatch;
|
|
491
|
+
const foreignTags = new Set();
|
|
492
|
+
while ((foreignMatch = foreignTagPattern.exec(fixed)) !== null) {
|
|
493
|
+
const tagName = foreignMatch[1];
|
|
494
|
+
if (!validDrawioTags.has(tagName)) {
|
|
495
|
+
foreignTags.add(tagName);
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
if (foreignTags.size > 0) {
|
|
499
|
+
for (const tag of foreignTags) {
|
|
500
|
+
fixed = fixed.replace(new RegExp(`<${tag}[^>]*>`, "gi"), "");
|
|
501
|
+
fixed = fixed.replace(new RegExp(`</${tag}>`, "gi"), "");
|
|
502
|
+
}
|
|
503
|
+
fixes.push(`Removed foreign tags: ${Array.from(foreignTags).join(", ")}`);
|
|
504
|
+
}
|
|
505
|
+
// 17. Fix unclosed tags
|
|
506
|
+
const tagStack = [];
|
|
507
|
+
const parsedTags = parseXmlTags(fixed);
|
|
508
|
+
for (const { tagName, isClosing, isSelfClosing } of parsedTags) {
|
|
509
|
+
if (isClosing) {
|
|
510
|
+
const lastIdx = tagStack.lastIndexOf(tagName);
|
|
511
|
+
if (lastIdx !== -1) {
|
|
512
|
+
tagStack.splice(lastIdx, 1);
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
else if (!isSelfClosing) {
|
|
516
|
+
tagStack.push(tagName);
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
if (tagStack.length > 0) {
|
|
520
|
+
const tagsToClose = [];
|
|
521
|
+
for (const tagName of tagStack.reverse()) {
|
|
522
|
+
const openCount = (fixed.match(new RegExp(`<${tagName}[\\s>]`, "gi")) || []).length;
|
|
523
|
+
const closeCount = (fixed.match(new RegExp(`</${tagName}>`, "gi")) || []).length;
|
|
524
|
+
if (openCount > closeCount) {
|
|
525
|
+
tagsToClose.push(tagName);
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
if (tagsToClose.length > 0) {
|
|
529
|
+
const closingTags = tagsToClose.map((t) => `</${t}>`).join("\n");
|
|
530
|
+
fixed = fixed.trimEnd() + "\n" + closingTags;
|
|
531
|
+
fixes.push(`Closed ${tagsToClose.length} unclosed tag(s): ${tagsToClose.join(", ")}`);
|
|
532
|
+
}
|
|
533
|
+
}
|
|
534
|
+
// 18. Remove extra closing tags
|
|
535
|
+
const tagCounts = new Map();
|
|
536
|
+
const fullTagPattern = /<(\/?[a-zA-Z][a-zA-Z0-9]*)[^>]*>/g;
|
|
537
|
+
let tagCountMatch;
|
|
538
|
+
while ((tagCountMatch = fullTagPattern.exec(fixed)) !== null) {
|
|
539
|
+
const fullMatch = tagCountMatch[0];
|
|
540
|
+
const tagPart = tagCountMatch[1];
|
|
541
|
+
const isClosing = tagPart.startsWith("/");
|
|
542
|
+
const isSelfClosing = fullMatch.endsWith("/>");
|
|
543
|
+
const tagName = isClosing ? tagPart.slice(1) : tagPart;
|
|
544
|
+
let counts = tagCounts.get(tagName);
|
|
545
|
+
if (!counts) {
|
|
546
|
+
counts = { opens: 0, closes: 0, selfClosing: 0 };
|
|
547
|
+
tagCounts.set(tagName, counts);
|
|
548
|
+
}
|
|
549
|
+
if (isClosing) {
|
|
550
|
+
counts.closes++;
|
|
551
|
+
}
|
|
552
|
+
else if (isSelfClosing) {
|
|
553
|
+
counts.selfClosing++;
|
|
554
|
+
}
|
|
555
|
+
else {
|
|
556
|
+
counts.opens++;
|
|
557
|
+
}
|
|
558
|
+
}
|
|
559
|
+
for (const [tagName, counts] of tagCounts) {
|
|
560
|
+
const extraCloses = counts.closes - counts.opens;
|
|
561
|
+
if (extraCloses > 0) {
|
|
562
|
+
let removed = 0;
|
|
563
|
+
const closeTagPattern = new RegExp(`</${tagName}>`, "g");
|
|
564
|
+
const matches = [...fixed.matchAll(closeTagPattern)];
|
|
565
|
+
for (let i = matches.length - 1; i >= 0 && removed < extraCloses; i--) {
|
|
566
|
+
const match = matches[i];
|
|
567
|
+
const idx = match.index ?? 0;
|
|
568
|
+
fixed = fixed.slice(0, idx) + fixed.slice(idx + match[0].length);
|
|
569
|
+
removed++;
|
|
570
|
+
}
|
|
571
|
+
if (removed > 0) {
|
|
572
|
+
fixes.push(`Removed ${removed} extra </${tagName}> closing tag(s)`);
|
|
573
|
+
}
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
// 19. Remove trailing garbage after last XML tag
|
|
577
|
+
const closingTagPattern = /<\/[a-zA-Z][a-zA-Z0-9]*>|\/>/g;
|
|
578
|
+
let lastValidTagEnd = -1;
|
|
579
|
+
let closingMatch;
|
|
580
|
+
while ((closingMatch = closingTagPattern.exec(fixed)) !== null) {
|
|
581
|
+
lastValidTagEnd = closingMatch.index + closingMatch[0].length;
|
|
582
|
+
}
|
|
583
|
+
if (lastValidTagEnd > 0 && lastValidTagEnd < fixed.length) {
|
|
584
|
+
const trailing = fixed.slice(lastValidTagEnd).trim();
|
|
585
|
+
if (trailing) {
|
|
586
|
+
fixed = fixed.slice(0, lastValidTagEnd);
|
|
587
|
+
fixes.push("Removed trailing garbage after last XML tag");
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
// 20. Fix nested mxCell by flattening
|
|
591
|
+
const lines = fixed.split("\n");
|
|
592
|
+
let newLines = [];
|
|
593
|
+
let nestedFixed = 0;
|
|
594
|
+
let extraClosingToRemove = 0;
|
|
595
|
+
for (let i = 0; i < lines.length; i++) {
|
|
596
|
+
const line = lines[i];
|
|
597
|
+
const nextLine = lines[i + 1];
|
|
598
|
+
if (nextLine &&
|
|
599
|
+
/<mxCell\s/.test(line) &&
|
|
600
|
+
/<mxCell\s/.test(nextLine) &&
|
|
601
|
+
!line.includes("/>") &&
|
|
602
|
+
!nextLine.includes("/>")) {
|
|
603
|
+
const id1 = line.match(/\bid\s*=\s*["']([^"']+)["']/)?.[1];
|
|
604
|
+
const id2 = nextLine.match(/\bid\s*=\s*["']([^"']+)["']/)?.[1];
|
|
605
|
+
if (id1 && id1 === id2) {
|
|
606
|
+
nestedFixed++;
|
|
607
|
+
extraClosingToRemove++;
|
|
608
|
+
continue;
|
|
609
|
+
}
|
|
610
|
+
}
|
|
611
|
+
if (extraClosingToRemove > 0 && /^\s*<\/mxCell>\s*$/.test(line)) {
|
|
612
|
+
extraClosingToRemove--;
|
|
613
|
+
continue;
|
|
614
|
+
}
|
|
615
|
+
newLines.push(line);
|
|
616
|
+
}
|
|
617
|
+
if (nestedFixed > 0) {
|
|
618
|
+
fixed = newLines.join("\n");
|
|
619
|
+
fixes.push(`Flattened ${nestedFixed} duplicate-ID nested mxCell(s)`);
|
|
620
|
+
}
|
|
621
|
+
// 21. Fix true nested mxCell (different IDs)
|
|
622
|
+
const lines2 = fixed.split("\n");
|
|
623
|
+
newLines = [];
|
|
624
|
+
let trueNestedFixed = 0;
|
|
625
|
+
let cellDepth = 0;
|
|
626
|
+
let pendingCloseRemoval = 0;
|
|
627
|
+
for (let i = 0; i < lines2.length; i++) {
|
|
628
|
+
const line = lines2[i];
|
|
629
|
+
const trimmed = line.trim();
|
|
630
|
+
const isOpenCell = /<mxCell\s/.test(trimmed) && !trimmed.endsWith("/>");
|
|
631
|
+
const isCloseCell = trimmed === "</mxCell>";
|
|
632
|
+
if (isOpenCell) {
|
|
633
|
+
if (cellDepth > 0) {
|
|
634
|
+
const indent = line.match(/^(\s*)/)?.[1] || "";
|
|
635
|
+
newLines.push(indent + "</mxCell>");
|
|
636
|
+
trueNestedFixed++;
|
|
637
|
+
pendingCloseRemoval++;
|
|
638
|
+
}
|
|
639
|
+
cellDepth = 1;
|
|
640
|
+
newLines.push(line);
|
|
641
|
+
}
|
|
642
|
+
else if (isCloseCell) {
|
|
643
|
+
if (pendingCloseRemoval > 0) {
|
|
644
|
+
pendingCloseRemoval--;
|
|
645
|
+
}
|
|
646
|
+
else {
|
|
647
|
+
cellDepth = Math.max(0, cellDepth - 1);
|
|
648
|
+
newLines.push(line);
|
|
649
|
+
}
|
|
650
|
+
}
|
|
651
|
+
else {
|
|
652
|
+
newLines.push(line);
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
if (trueNestedFixed > 0) {
|
|
656
|
+
fixed = newLines.join("\n");
|
|
657
|
+
fixes.push(`Fixed ${trueNestedFixed} true nested mxCell(s)`);
|
|
658
|
+
}
|
|
659
|
+
// 22. Fix duplicate IDs by appending suffix
|
|
660
|
+
const seenIds = new Map();
|
|
661
|
+
const duplicateIds = [];
|
|
662
|
+
const idPattern = /\bid\s*=\s*["']([^"']+)["']/gi;
|
|
663
|
+
let idMatch;
|
|
664
|
+
while ((idMatch = idPattern.exec(fixed)) !== null) {
|
|
665
|
+
const id = idMatch[1];
|
|
666
|
+
seenIds.set(id, (seenIds.get(id) || 0) + 1);
|
|
667
|
+
}
|
|
668
|
+
for (const [id, count] of seenIds) {
|
|
669
|
+
if (count > 1)
|
|
670
|
+
duplicateIds.push(id);
|
|
671
|
+
}
|
|
672
|
+
if (duplicateIds.length > 0) {
|
|
673
|
+
const idCounters = new Map();
|
|
674
|
+
fixed = fixed.replace(/\bid\s*=\s*["']([^"']+)["']/gi, (match, id) => {
|
|
675
|
+
if (!duplicateIds.includes(id))
|
|
676
|
+
return match;
|
|
677
|
+
const count = idCounters.get(id) || 0;
|
|
678
|
+
idCounters.set(id, count + 1);
|
|
679
|
+
if (count === 0)
|
|
680
|
+
return match;
|
|
681
|
+
const newId = `${id}_dup${count}`;
|
|
682
|
+
return match.replace(id, newId);
|
|
683
|
+
});
|
|
684
|
+
fixes.push(`Renamed ${duplicateIds.length} duplicate ID(s)`);
|
|
685
|
+
}
|
|
686
|
+
// 23. Fix empty id attributes
|
|
687
|
+
let emptyIdCount = 0;
|
|
688
|
+
fixed = fixed.replace(/<mxCell([^>]*)\sid\s*=\s*["']\s*["']([^>]*)>/g, (_match, before, after) => {
|
|
689
|
+
emptyIdCount++;
|
|
690
|
+
const newId = `cell_${Date.now()}_${emptyIdCount}`;
|
|
691
|
+
return `<mxCell${before} id="${newId}"${after}>`;
|
|
692
|
+
});
|
|
693
|
+
if (emptyIdCount > 0) {
|
|
694
|
+
fixes.push(`Generated ${emptyIdCount} missing ID(s)`);
|
|
695
|
+
}
|
|
696
|
+
// 24. Aggressive: drop broken mxCell elements
|
|
697
|
+
if (typeof DOMParser !== "undefined") {
|
|
698
|
+
let droppedCells = 0;
|
|
699
|
+
let maxIterations = MAX_DROP_ITERATIONS;
|
|
700
|
+
while (maxIterations-- > 0) {
|
|
701
|
+
const parser = new DOMParser();
|
|
702
|
+
const doc = parser.parseFromString(fixed, "text/xml");
|
|
703
|
+
const parseError = doc.querySelector("parsererror");
|
|
704
|
+
if (!parseError)
|
|
705
|
+
break;
|
|
706
|
+
const errText = parseError.textContent || "";
|
|
707
|
+
const match = errText.match(/(\d+):\d+:/);
|
|
708
|
+
if (!match)
|
|
709
|
+
break;
|
|
710
|
+
const errLine = parseInt(match[1], 10) - 1;
|
|
711
|
+
const lines = fixed.split("\n");
|
|
712
|
+
let cellStart = errLine;
|
|
713
|
+
let cellEnd = errLine;
|
|
714
|
+
while (cellStart > 0 && !lines[cellStart].includes("<mxCell")) {
|
|
715
|
+
cellStart--;
|
|
716
|
+
}
|
|
717
|
+
while (cellEnd < lines.length - 1) {
|
|
718
|
+
if (lines[cellEnd].includes("</mxCell>") ||
|
|
719
|
+
lines[cellEnd].trim().endsWith("/>")) {
|
|
720
|
+
break;
|
|
721
|
+
}
|
|
722
|
+
cellEnd++;
|
|
723
|
+
}
|
|
724
|
+
lines.splice(cellStart, cellEnd - cellStart + 1);
|
|
725
|
+
fixed = lines.join("\n");
|
|
726
|
+
droppedCells++;
|
|
727
|
+
}
|
|
728
|
+
if (droppedCells > 0) {
|
|
729
|
+
fixes.push(`Dropped ${droppedCells} unfixable mxCell element(s)`);
|
|
730
|
+
}
|
|
731
|
+
}
|
|
732
|
+
return { fixed, fixes };
|
|
733
|
+
}
|
|
734
|
+
// ============================================================================
|
|
735
|
+
// Combined Validation and Fix
|
|
736
|
+
// ============================================================================
|
|
737
|
+
/**
|
|
738
|
+
* Validates XML and attempts to fix if invalid
|
|
739
|
+
* @param xml - The XML string to validate and potentially fix
|
|
740
|
+
* @returns Object with validation result, fixed XML if applicable, and fixes applied
|
|
741
|
+
*/
|
|
742
|
+
export function validateAndFixXml(xml) {
|
|
743
|
+
// First validation attempt
|
|
744
|
+
let error = validateMxCellStructure(xml);
|
|
745
|
+
if (!error) {
|
|
746
|
+
return { valid: true, error: null, fixed: null, fixes: [] };
|
|
747
|
+
}
|
|
748
|
+
// Try to fix
|
|
749
|
+
const { fixed, fixes } = autoFixXml(xml);
|
|
750
|
+
// Validate the fixed version
|
|
751
|
+
error = validateMxCellStructure(fixed);
|
|
752
|
+
if (!error) {
|
|
753
|
+
return { valid: true, error: null, fixed, fixes };
|
|
754
|
+
}
|
|
755
|
+
// Still invalid after fixes
|
|
756
|
+
return {
|
|
757
|
+
valid: false,
|
|
758
|
+
error,
|
|
759
|
+
fixed: fixes.length > 0 ? fixed : null,
|
|
760
|
+
fixes,
|
|
761
|
+
};
|
|
762
|
+
}
|
|
763
|
+
/**
|
|
764
|
+
* Check if mxCell XML output is complete (not truncated).
|
|
765
|
+
* @param xml - The XML string to check (can be undefined/null)
|
|
766
|
+
* @returns true if XML appears complete, false if truncated or empty
|
|
767
|
+
*/
|
|
768
|
+
export function isMxCellXmlComplete(xml) {
|
|
769
|
+
let trimmed = xml?.trim() || "";
|
|
770
|
+
if (!trimmed)
|
|
771
|
+
return false;
|
|
772
|
+
// Strip wrapper tags if present
|
|
773
|
+
let prev = "";
|
|
774
|
+
while (prev !== trimmed) {
|
|
775
|
+
prev = trimmed;
|
|
776
|
+
trimmed = trimmed
|
|
777
|
+
.replace(/<\/mxParameter>\s*$/i, "")
|
|
778
|
+
.replace(/<\/invoke>\s*$/i, "")
|
|
779
|
+
.replace(/<\/antml:parameter>\s*$/i, "")
|
|
780
|
+
.replace(/<\/antml:invoke>\s*$/i, "")
|
|
781
|
+
.trim();
|
|
782
|
+
}
|
|
783
|
+
return trimmed.endsWith("/>") || trimmed.endsWith("</mxCell>");
|
|
784
|
+
}
|
|
785
|
+
//# sourceMappingURL=xml-validation.js.map
|