@hla4ts/fom-codegen 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,404 @@
1
+ /**
2
+ * XSD Parser
3
+ *
4
+ * Parses XML Schema Definition (XSD) files to extract type information
5
+ * for generating TypeScript types.
6
+ */
7
+
8
+ interface XmlElement {
9
+ name: string;
10
+ attributes: Record<string, string>;
11
+ children: XmlElement[];
12
+ text?: string;
13
+ }
14
+
15
+ interface XsdType {
16
+ name: string;
17
+ kind: "complex" | "simple" | "enum";
18
+ baseType?: string;
19
+ elements: XsdElement[];
20
+ attributes: XsdAttribute[];
21
+ enumValues?: string[];
22
+ documentation?: string;
23
+ }
24
+
25
+ interface XsdElement {
26
+ name: string;
27
+ type: string;
28
+ minOccurs: number;
29
+ maxOccurs: number | "unbounded";
30
+ optional: boolean;
31
+ array: boolean;
32
+ documentation?: string;
33
+ }
34
+
35
+ interface XsdAttribute {
36
+ name: string;
37
+ type: string;
38
+ required: boolean;
39
+ defaultValue?: string;
40
+ }
41
+
42
+ interface XsdSchema {
43
+ targetNamespace: string;
44
+ types: Map<string, XsdType>;
45
+ elements: Map<string, XsdElement>;
46
+ rootElement?: string;
47
+ }
48
+
49
+ /**
50
+ * Parse an XSD file and extract type information.
51
+ */
52
+ export function parseXsd(xsdContent: string): XsdSchema {
53
+ const root = parseXml(xsdContent);
54
+ const schema: XsdSchema = {
55
+ targetNamespace: root.attributes["targetNamespace"] || "",
56
+ types: new Map(),
57
+ elements: new Map(),
58
+ };
59
+
60
+ // Find the schema element
61
+ const schemaElement = root.name === "schema" ? root : findChild(root, "schema");
62
+ if (!schemaElement) {
63
+ throw new Error("No schema element found in XSD");
64
+ }
65
+
66
+ // Extract namespace
67
+ schema.targetNamespace = schemaElement.attributes["targetNamespace"] || "";
68
+
69
+ // Parse complex types
70
+ for (const complexType of getChildren(schemaElement, "complexType")) {
71
+ const typeName = complexType.attributes["name"];
72
+ if (!typeName) continue;
73
+
74
+ const type = parseComplexType(complexType, schema);
75
+ schema.types.set(typeName, type);
76
+ }
77
+
78
+ // Parse inline complex types in elements (for elements without explicit type attribute)
79
+ // This is handled in parseElement now, but we need to process top-level elements too
80
+ for (const element of getChildren(schemaElement, "element")) {
81
+ const elementName = element.attributes["name"];
82
+ const elementType = element.attributes["type"];
83
+ if (elementName && !elementType) {
84
+ const complexType = findChild(element, "complexType");
85
+ if (complexType) {
86
+ // Check if it's simpleContent - if so, extract the base type
87
+ const simpleContent = findChild(complexType, "simpleContent");
88
+ if (!simpleContent) {
89
+ // Only register non-simpleContent inline types
90
+ const typeName = `${capitalize(elementName)}Type`;
91
+ const type = parseComplexType(complexType, schema);
92
+ type.name = typeName;
93
+ schema.types.set(typeName, type);
94
+ }
95
+ }
96
+ }
97
+ }
98
+
99
+ // Parse simple types (including enumerations)
100
+ for (const simpleType of getChildren(schemaElement, "simpleType")) {
101
+ const typeName = simpleType.attributes["name"];
102
+ if (!typeName) continue;
103
+
104
+ const type = parseSimpleType(simpleType);
105
+ schema.types.set(typeName, type);
106
+ }
107
+
108
+ // Parse root element
109
+ for (const element of getChildren(schemaElement, "element")) {
110
+ const elementName = element.attributes["name"];
111
+ const elementType = element.attributes["type"];
112
+ if (elementName && elementType) {
113
+ schema.rootElement = elementName;
114
+ schema.elements.set(elementName, {
115
+ name: elementName,
116
+ type: elementType,
117
+ minOccurs: 1,
118
+ maxOccurs: 1,
119
+ optional: false,
120
+ array: false,
121
+ });
122
+ }
123
+ }
124
+
125
+ return schema;
126
+ }
127
+
128
+ function parseComplexType(node: XmlElement, schema: XsdSchema): XsdType {
129
+ const typeName = node.attributes["name"] || "";
130
+ const type: XsdType = {
131
+ name: typeName,
132
+ kind: "complex",
133
+ elements: [],
134
+ attributes: [],
135
+ };
136
+
137
+ // Extract documentation
138
+ const annotation = findChild(node, "annotation");
139
+ if (annotation) {
140
+ const doc = findChild(annotation, "documentation");
141
+ if (doc && doc.text) {
142
+ type.documentation = doc.text.trim();
143
+ }
144
+ }
145
+
146
+ // Parse sequence/all/choice
147
+ const sequence = findChild(node, "sequence") || findChild(node, "all") || findChild(node, "choice");
148
+ if (sequence) {
149
+ for (const element of getChildren(sequence, "element")) {
150
+ type.elements.push(parseElement(element, schema));
151
+ }
152
+ }
153
+
154
+ // Handle simpleContent (for types that extend simple types)
155
+ const simpleContent = findChild(node, "simpleContent");
156
+ if (simpleContent) {
157
+ const extension = findChild(simpleContent, "extension");
158
+ if (extension) {
159
+ type.baseType = extension.attributes["base"];
160
+ }
161
+ }
162
+
163
+ // Handle complexContent (for types that extend complex types)
164
+ const complexContent = findChild(node, "complexContent");
165
+ if (complexContent) {
166
+ const extension = findChild(complexContent, "extension");
167
+ if (extension) {
168
+ type.baseType = extension.attributes["base"];
169
+ const extensionSequence = findChild(extension, "sequence") || findChild(extension, "all");
170
+ if (extensionSequence) {
171
+ for (const element of getChildren(extensionSequence, "element")) {
172
+ type.elements.push(parseElement(element, schema));
173
+ }
174
+ }
175
+ }
176
+ }
177
+
178
+ // Parse attributes
179
+ for (const attr of getChildren(node, "attribute")) {
180
+ type.attributes.push(parseAttribute(attr));
181
+ }
182
+
183
+
184
+ return type;
185
+ }
186
+
187
+ function parseSimpleType(node: XmlElement): XsdType {
188
+ const typeName = node.attributes["name"] || "";
189
+
190
+ // Check for union (e.g., SecurityClassificationUnion)
191
+ const union = findChild(node, "union");
192
+ if (union) {
193
+ const memberTypes = union.attributes["memberTypes"] || "";
194
+ // For unions, we'll treat them as the first member type if it's an enum
195
+ const members = memberTypes.split(/\s+/).filter(Boolean);
196
+ if (members.length > 0) {
197
+ // Check if first member is an enum - if so, use it
198
+ // Otherwise, treat as string (union of enum + string)
199
+ return {
200
+ name: typeName,
201
+ kind: "simple",
202
+ baseType: members[0], // Reference to the enum type
203
+ elements: [],
204
+ attributes: [],
205
+ };
206
+ }
207
+ }
208
+
209
+ // Check for enumeration
210
+ const restriction = findChild(node, "restriction");
211
+ if (restriction) {
212
+ const base = restriction.attributes["base"];
213
+ const enumValues: string[] = [];
214
+
215
+ for (const enumValue of getChildren(restriction, "enumeration")) {
216
+ const value = enumValue.attributes["value"];
217
+ if (value) {
218
+ enumValues.push(value);
219
+ }
220
+ }
221
+
222
+ if (enumValues.length > 0) {
223
+ return {
224
+ name: typeName,
225
+ kind: "enum",
226
+ baseType: base,
227
+ enumValues,
228
+ elements: [],
229
+ attributes: [],
230
+ };
231
+ }
232
+ }
233
+
234
+ return {
235
+ name: typeName,
236
+ kind: "simple",
237
+ elements: [],
238
+ attributes: [],
239
+ };
240
+ }
241
+
242
+ function parseElement(node: XmlElement, schema?: XsdSchema): XsdElement {
243
+ const name = node.attributes["name"] || "";
244
+ let type = node.attributes["type"] || "";
245
+ const minOccurs = parseInt(node.attributes["minOccurs"] || "1", 10);
246
+ const maxOccurs = node.attributes["maxOccurs"] || "1";
247
+ const maxOccursNum = maxOccurs === "unbounded" ? Infinity : parseInt(maxOccurs, 10);
248
+
249
+ // Handle inline complexType
250
+ if (!type) {
251
+ const complexType = findChild(node, "complexType");
252
+ if (complexType) {
253
+ // Check if it's simpleContent (extends a simple type)
254
+ const simpleContent = findChild(complexType, "simpleContent");
255
+ if (simpleContent) {
256
+ const extension = findChild(simpleContent, "extension");
257
+ if (extension) {
258
+ type = extension.attributes["base"] || "string";
259
+ } else {
260
+ type = `${capitalize(name)}Type`;
261
+ }
262
+ } else {
263
+ // Generate a type name from the element name
264
+ // If name already ends with "Type", don't add another one
265
+ const baseName = capitalize(name);
266
+ const typeName = baseName.endsWith("Type") ? baseName : `${baseName}Type`;
267
+ type = typeName;
268
+
269
+ // Parse and register the inline type if schema is provided
270
+ if (schema) {
271
+ const inlineType = parseComplexType(complexType, schema);
272
+ inlineType.name = typeName;
273
+ schema.types.set(typeName, inlineType);
274
+ }
275
+ }
276
+ } else {
277
+ const simpleType = findChild(node, "simpleType");
278
+ if (simpleType) {
279
+ type = `${capitalize(name)}Type`;
280
+ }
281
+ }
282
+ }
283
+
284
+ // Extract documentation
285
+ let documentation: string | undefined;
286
+ const annotation = findChild(node, "annotation");
287
+ if (annotation) {
288
+ const doc = findChild(annotation, "documentation");
289
+ if (doc && doc.text) {
290
+ documentation = doc.text.trim();
291
+ }
292
+ }
293
+
294
+ return {
295
+ name,
296
+ type,
297
+ minOccurs,
298
+ maxOccurs: maxOccursNum === Infinity ? "unbounded" : maxOccursNum,
299
+ optional: minOccurs === 0,
300
+ array: maxOccursNum > 1 || maxOccurs === "unbounded",
301
+ documentation,
302
+ };
303
+ }
304
+
305
+ function capitalize(str: string): string {
306
+ return str[0].toUpperCase() + str.slice(1);
307
+ }
308
+
309
+ function parseAttribute(node: XmlElement): XsdAttribute {
310
+ const name = node.attributes["name"] || "";
311
+ let type = node.attributes["type"] || "";
312
+ const use = node.attributes["use"] || "optional";
313
+ const defaultValue = node.attributes["default"];
314
+ const fixed = node.attributes["fixed"];
315
+
316
+ // If no type is specified, default to string
317
+ // Attributes with fixed values are typically strings
318
+ if (!type) {
319
+ type = "xs:string";
320
+ }
321
+
322
+ return {
323
+ name,
324
+ type,
325
+ required: use === "required",
326
+ defaultValue: defaultValue || fixed,
327
+ };
328
+ }
329
+
330
+ // Simple XML parser (reusing approach from xml-parser.ts)
331
+ function parseXml(xml: string): XmlElement {
332
+ const cleaned = xml
333
+ .replace(/<!--[\s\S]*?-->/g, "")
334
+ .replace(/<!\[CDATA\[[\s\S]*?\]\]>/g, (match) => match.slice(9, -3))
335
+ .trim();
336
+ const tokens = cleaned.match(/<[^>]+>|[^<]+/g) ?? [];
337
+ const stack: XmlElement[] = [];
338
+ let root: XmlElement | undefined;
339
+
340
+ for (const token of tokens) {
341
+ if (token.startsWith("<")) {
342
+ if (token.startsWith("</")) {
343
+ stack.pop();
344
+ continue;
345
+ }
346
+
347
+ const selfClosing = token.endsWith("/>");
348
+ const tagBody = token.slice(1, token.length - (selfClosing ? 2 : 1)).trim();
349
+ const [rawName, ...attrParts] = tagBody.split(/\s+/);
350
+ const name = stripNamespace(rawName);
351
+ const attributes = parseAttributes(attrParts.join(" "));
352
+ const element: XmlElement = { name, attributes, children: [], text: "" };
353
+
354
+ if (!root) {
355
+ root = element;
356
+ }
357
+ if (stack.length > 0) {
358
+ stack[stack.length - 1].children.push(element);
359
+ }
360
+ if (!selfClosing) {
361
+ stack.push(element);
362
+ }
363
+ } else if (stack.length > 0) {
364
+ const text = token.trim();
365
+ if (text) {
366
+ stack[stack.length - 1].text += text;
367
+ }
368
+ }
369
+ }
370
+
371
+ if (!root) {
372
+ throw new Error("Failed to parse XML: no root element found.");
373
+ }
374
+ return root;
375
+ }
376
+
377
+ function parseAttributes(text: string): Record<string, string> {
378
+ const attrs: Record<string, string> = {};
379
+ const regex = /([^\s=]+)\s*=\s*"(.*?)"|([^\s=]+)\s*=\s*'(.*?)'/g;
380
+ let match: RegExpExecArray | null;
381
+ while ((match = regex.exec(text)) !== null) {
382
+ const name = stripNamespace(match[1] ?? match[3] ?? "");
383
+ const value = match[2] ?? match[4] ?? "";
384
+ if (name) {
385
+ attrs[name] = value;
386
+ }
387
+ }
388
+ return attrs;
389
+ }
390
+
391
+ function stripNamespace(name: string): string {
392
+ const parts = name.split(":");
393
+ return parts[parts.length - 1];
394
+ }
395
+
396
+ function findChild(parent: XmlElement, name: string): XmlElement | undefined {
397
+ return parent.children.find((child) => child.name === name);
398
+ }
399
+
400
+ function getChildren(parent: XmlElement, name: string): XmlElement[] {
401
+ return parent.children.filter((child) => child.name === name);
402
+ }
403
+
404
+ export type { XsdSchema, XsdType, XsdElement, XsdAttribute };