nx-md-parser 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,197 @@
1
+ # Quick Start - Your Exact Use Case
2
+
3
+ ## Installation
4
+
5
+ ```bash
6
+ npm install nx-helpers
7
+ # Copy the markdown-transformer.ts file to your project
8
+ ```
9
+
10
+ ## Your Exact Example
11
+
12
+ Here's how to transform your markdown example to JSON:
13
+
14
+ ### Input Markdown
15
+
16
+ ```markdown
17
+ ### Short Answer
18
+ The asset is a server named server1 with private IP 192.168.1.1. Next steps include documenting it in the CMDB, identifying its OS/role, and performing baseline security and inventory checks.
19
+
20
+ ### Full Answer
21
+ The input specifies a single asset with the following attributes: assetType = server, assetName = server1, assetIp = 192.168.1.1. The IP address falls within a private RFC1918 range...
22
+
23
+ ### Assumptions
24
+ - The asset is intended to be tracked in an internal asset management system (CMDB/CMR).
25
+ - The IP 192.168.1.1 is an internal address and not publicly routable.
26
+
27
+ ### Unknowns
28
+ - Operating system and version running on server1.
29
+ - Physical vs. virtual server status and its exact location/topology.
30
+
31
+ ### Evidence
32
+ 1. Asset details provided: assetType = "server", assetName = "server1", assetIp = "192.168.1.1".
33
+ 2. The IP address 192.168.1.1 is within the private address space (RFC1918).
34
+ ```
35
+
36
+ ### Code
37
+
38
+ ```typescript
39
+ import { JSONTransformer, Schema } from './markdown-transformer';
40
+
41
+ // Define your desired JSON schema
42
+ const desiredSchema = Schema.object({
43
+ shortAnswer: Schema.string(),
44
+ fullAnswer: Schema.string(),
45
+ assumptions: Schema.array(Schema.string()),
46
+ unknowns: Schema.array(Schema.string()),
47
+ evidence: Schema.array(Schema.string()),
48
+ });
49
+
50
+ // Your markdown text
51
+ const markdownText = `...`; // Your markdown from above
52
+
53
+ // Transform it
54
+ const transformer = new JSONTransformer(desiredSchema);
55
+ const result = transformer.transformMarkdown(markdownText);
56
+
57
+ // Check the result
58
+ console.log(result.status); // "fixed" or "validated"
59
+ console.log(JSON.stringify(result.result, null, 2));
60
+ ```
61
+
62
+ ### Output
63
+
64
+ ```json
65
+ {
66
+ "status": "fixed",
67
+ "result": {
68
+ "shortAnswer": "The asset is a server named server1 with private IP 192.168.1.1. Next steps include documenting it in the CMDB, identifying its OS/role, and performing baseline security and inventory checks.",
69
+ "fullAnswer": "The input specifies a single asset with the following attributes: assetType = server, assetName = server1, assetIp = 192.168.1.1. The IP address falls within a private RFC1918 range...",
70
+ "assumptions": [
71
+ "The asset is intended to be tracked in an internal asset management system (CMDB/CMR).",
72
+ "The IP 192.168.1.1 is an internal address and not publicly routable."
73
+ ],
74
+ "unknowns": [
75
+ "Operating system and version running on server1.",
76
+ "Physical vs. virtual server status and its exact location/topology."
77
+ ],
78
+ "evidence": [
79
+ "Asset details provided: assetType = \"server\", assetName = \"server1\", assetIp = \"192.168.1.1\".",
80
+ "The IP address 192.168.1.1 is within the private address space (RFC1918)."
81
+ ]
82
+ }
83
+ }
84
+ ```
85
+
86
+ ## Alternative: Nested Structure
87
+
88
+ If you prefer a more structured output:
89
+
90
+ ```typescript
91
+ const nestedSchema = Schema.object({
92
+ asset: Schema.object({
93
+ type: Schema.string(),
94
+ name: Schema.string(),
95
+ ip: Schema.string(),
96
+ }),
97
+ analysis: Schema.object({
98
+ shortAnswer: Schema.string(),
99
+ fullAnswer: Schema.string(),
100
+ }),
101
+ metadata: Schema.object({
102
+ assumptions: Schema.array(Schema.string()),
103
+ unknowns: Schema.array(Schema.string()),
104
+ evidence: Schema.array(Schema.string()),
105
+ }),
106
+ });
107
+ ```
108
+
109
+ ## Using with nx-helpers
110
+
111
+ Merge multiple transformation results:
112
+
113
+ ```typescript
114
+ import { mergeNoRedundancy } from 'nx-helpers';
115
+
116
+ const base = transformer1.transform(markdown1).result;
117
+ const override = transformer2.transform(markdown2).result;
118
+
119
+ const merged = mergeNoRedundancy(base, override);
120
+ // Arrays are merged as UNION (deduplicated)
121
+ // Objects are deep-merged
122
+ // Primitives: override wins
123
+ ```
124
+
125
+ ## Key Features You'll Love
126
+
127
+ ✅ **Automatic List Detection**: Bullet points and numbered lists → arrays
128
+ ✅ **Typo Correction**: "Assumtions" → "assumptions"
129
+ ✅ **Case Handling**: "Short Answer" → "shortAnswer"
130
+ ✅ **Type Conversion**: String numbers → actual numbers
131
+ ✅ **Smart Defaults**: Missing fields get sensible defaults
132
+ ✅ **Detailed Fixes**: See exactly what was changed
133
+
134
+ ## Schema Types Reference
135
+
136
+ ```typescript
137
+ // Primitives
138
+ Schema.string()
139
+ Schema.number()
140
+ Schema.boolean()
141
+
142
+ // Arrays
143
+ Schema.array(Schema.string())
144
+ Schema.array(Schema.number())
145
+ Schema.array(Schema.object({ ... }))
146
+
147
+ // Objects (can be nested infinitely)
148
+ Schema.object({
149
+ name: Schema.string(),
150
+ nested: Schema.object({
151
+ deeplyNested: Schema.object({
152
+ value: Schema.number()
153
+ })
154
+ })
155
+ })
156
+ ```
157
+
158
+ ## Integration Example
159
+
160
+ ```typescript
161
+ // In your NX project
162
+ import { JSONTransformer, Schema } from './utils/markdown-transformer';
163
+ import { mergeNoRedundancy } from 'nx-helpers';
164
+
165
+ export function processMarkdownAnalysis(markdown: string) {
166
+ const schema = Schema.object({
167
+ shortAnswer: Schema.string(),
168
+ fullAnswer: Schema.string(),
169
+ assumptions: Schema.array(Schema.string()),
170
+ unknowns: Schema.array(Schema.string()),
171
+ evidence: Schema.array(Schema.string()),
172
+ });
173
+
174
+ const transformer = new JSONTransformer(schema);
175
+ const result = transformer.transformMarkdown(markdown);
176
+
177
+ if (result.status === 'failed') {
178
+ console.error('Transformation failed:', result.errors);
179
+ return null;
180
+ }
181
+
182
+ if (result.status === 'fixed') {
183
+ console.log('Applied fixes:', result.fixes);
184
+ }
185
+
186
+ return result.result;
187
+ }
188
+ ```
189
+
190
+ ## Run the Example
191
+
192
+ ```bash
193
+ npm install
194
+ npm run example
195
+ ```
196
+
197
+ This will run all the examples from `markdown-example.ts` including your exact use case.
package/docs/README.md ADDED
@@ -0,0 +1,366 @@
1
+ # Markdown to JSON Transformer
2
+
3
+ A powerful TypeScript library that parses markdown text and transforms it into JSON objects matching a desired schema. Features intelligent auto-fixing for typos, case mismatches, type conversions, and structural reorganization.
4
+
5
+ ## Features
6
+
7
+ ✨ **Markdown Parsing**
8
+ - Automatically parse markdown sections based on headings
9
+ - Extract lists, key-value pairs, and text content
10
+ - Handle nested structures
11
+
12
+ 🔧 **Smart Auto-Fixing**
13
+ - Fix typos in property names using fuzzy matching
14
+ - Handle case mismatches (camelCase, snake_case, Title Case)
15
+ - Convert types automatically (string → number, string → boolean, etc.)
16
+ - Restructure flat objects into nested schemas
17
+ - Add missing properties with sensible defaults
18
+
19
+ 🎯 **Schema Validation**
20
+ - Define schemas using a clean, intuitive syntax
21
+ - Support for string, number, boolean, array, and object types
22
+ - Nested object support with unlimited depth
23
+ - Returns validation status: `validated`, `fixed`, or `failed`
24
+
25
+ 🔄 **nx-helpers Integration**
26
+ - Uses `mergeNoRedundancy` for intelligent object merging
27
+ - Maintains deep equality and deduplication for arrays
28
+
29
+ ## Installation
30
+
31
+ ```bash
32
+ npm install markdown-json-transformer nx-helpers
33
+ ```
34
+
35
+ ## Quick Start
36
+
37
+ ```typescript
38
+ import { JSONTransformer, Schema } from 'markdown-json-transformer';
39
+
40
+ // 1. Define your desired JSON schema
41
+ const schema = Schema.object({
42
+ title: Schema.string(),
43
+ tags: Schema.array(Schema.string()),
44
+ metadata: Schema.object({
45
+ author: Schema.string(),
46
+ date: Schema.string(),
47
+ }),
48
+ });
49
+
50
+ // 2. Create transformer
51
+ const transformer = new JSONTransformer(schema);
52
+
53
+ // 3. Transform markdown to JSON
54
+ const markdown = `
55
+ ### Title
56
+ My Project
57
+
58
+ ### Tags
59
+ - TypeScript
60
+ - Markdown
61
+
62
+ ### Metadata
63
+ author: John Doe
64
+ date: 2024-01-01
65
+ `;
66
+
67
+ const result = transformer.transformMarkdown(markdown);
68
+
69
+ console.log(result.status); // "validated" or "fixed"
70
+ console.log(result.result); // Your JSON object
71
+ ```
72
+
73
+ ## Your Example Use Case
74
+
75
+ Transform complex markdown with sections into structured JSON:
76
+
77
+ ```typescript
78
+ const markdownInput = `### Short Answer
79
+ The asset is a server named server1 with private IP 192.168.1.1.
80
+
81
+ ### Full Answer
82
+ The input specifies a single asset with the following attributes...
83
+
84
+ ### Assumptions
85
+ - The asset is intended to be tracked in an internal asset management system.
86
+ - The IP 192.168.1.1 is an internal address.
87
+
88
+ ### Evidence
89
+ 1. Asset details provided: assetType = "server"
90
+ 2. The IP address is within private address space.
91
+ `;
92
+
93
+ const schema = Schema.object({
94
+ shortAnswer: Schema.string(),
95
+ fullAnswer: Schema.string(),
96
+ assumptions: Schema.array(Schema.string()),
97
+ evidence: Schema.array(Schema.string()),
98
+ });
99
+
100
+ const transformer = new JSONTransformer(schema);
101
+ const result = transformer.transformMarkdown(markdownInput);
102
+
103
+ // Output:
104
+ {
105
+ "status": "fixed",
106
+ "result": {
107
+ "shortAnswer": "The asset is a server named server1...",
108
+ "fullAnswer": "The input specifies a single asset...",
109
+ "assumptions": [
110
+ "The asset is intended to be tracked...",
111
+ "The IP 192.168.1.1 is an internal address."
112
+ ],
113
+ "evidence": [
114
+ "Asset details provided: assetType = \"server\"",
115
+ "The IP address is within private address space."
116
+ ]
117
+ }
118
+ }
119
+ ```
120
+
121
+ ## Schema Definition API
122
+
123
+ ### Basic Types
124
+
125
+ ```typescript
126
+ Schema.string() // String type
127
+ Schema.number() // Number type
128
+ Schema.boolean() // Boolean type
129
+ ```
130
+
131
+ ### Complex Types
132
+
133
+ ```typescript
134
+ // Array of strings
135
+ Schema.array(Schema.string())
136
+
137
+ // Array of objects
138
+ Schema.array(
139
+ Schema.object({
140
+ name: Schema.string(),
141
+ value: Schema.number(),
142
+ })
143
+ )
144
+
145
+ // Nested objects
146
+ Schema.object({
147
+ user: Schema.object({
148
+ profile: Schema.object({
149
+ name: Schema.string(),
150
+ age: Schema.number(),
151
+ }),
152
+ }),
153
+ })
154
+ ```
155
+
156
+ ## Transform Results
157
+
158
+ Every transformation returns a `TransformResult`:
159
+
160
+ ```typescript
161
+ interface TransformResult<T = any> {
162
+ status: 'validated' | 'fixed' | 'failed';
163
+ result: T | null;
164
+ errors?: string[]; // Only present if failed
165
+ fixes?: string[]; // Only present if fixed
166
+ }
167
+ ```
168
+
169
+ ### Status Values
170
+
171
+ - **`validated`**: Input perfectly matched the schema (no changes needed)
172
+ - **`fixed`**: Input was transformed to match the schema (see `fixes` for details)
173
+ - **`failed`**: Could not transform input to match schema (see `errors`)
174
+
175
+ ## Advanced Features
176
+
177
+ ### Fuzzy Matching Threshold
178
+
179
+ Control how aggressively the transformer matches property names:
180
+
181
+ ```typescript
182
+ // More permissive matching (default: 0.7)
183
+ const transformer = new JSONTransformer(schema, 0.6);
184
+
185
+ // Will match:
186
+ // "assumtions" → "assumptions" ✓
187
+ // "usr" → "user" ✓
188
+ // "meta" → "metadata" ✓
189
+ ```
190
+
191
+ ### Direct Object Transformation
192
+
193
+ Transform plain objects (not just markdown):
194
+
195
+ ```typescript
196
+ const input = {
197
+ "cats": "some text",
198
+ "color": "white"
199
+ };
200
+
201
+ const schema = Schema.object({
202
+ cat: Schema.string(),
203
+ data: Schema.object({
204
+ color: Schema.string(),
205
+ }),
206
+ });
207
+
208
+ const result = transformer.transform(input);
209
+
210
+ // Result:
211
+ {
212
+ "cat": "some text",
213
+ "data": {
214
+ "color": "white"
215
+ }
216
+ }
217
+ ```
218
+
219
+ ### Merging Multiple Results
220
+
221
+ ### Merging Multiple Results (Optional)
222
+
223
+ For advanced use cases where you need to merge results from multiple transformations:
224
+
225
+ **Option 1: Use the built-in helper (recommended)**
226
+ ```typescript
227
+ import { mergeTransformResults } from 'markdown-json-transformer';
228
+
229
+ const result1 = transformer1.transform(input1);
230
+ const result2 = transformer2.transform(input2);
231
+
232
+ const merged = mergeTransformResults(result1, result2);
233
+ ```
234
+
235
+ **Option 2: Use nx-helpers directly**
236
+ ```typescript
237
+ import { mergeNoRedundancy } from 'nx-helpers';
238
+
239
+ const result1 = transformer1.transform(input1);
240
+ const result2 = transformer2.transform(input2);
241
+
242
+ const merged = mergeNoRedundancy(result1.result, result2.result);
243
+ ```
244
+
245
+ ## Markdown Parsing Details
246
+
247
+ ### Section Detection
248
+
249
+ The parser recognizes markdown headings (H1-H6):
250
+
251
+ ```markdown
252
+ # Heading 1
253
+ ## Heading 2
254
+ ### Heading 3
255
+ ```
256
+
257
+ Each heading becomes a property in the resulting JSON object (converted to camelCase).
258
+
259
+ ### Content Types
260
+
261
+ **Lists** → Arrays:
262
+ ```markdown
263
+ ### Tags
264
+ - TypeScript
265
+ - Node.js
266
+ ```
267
+ → `{ tags: ["TypeScript", "Node.js"] }`
268
+
269
+ **Key-Value Pairs** → Objects:
270
+ ```markdown
271
+ ### Metadata
272
+ author: John Doe
273
+ date: 2024-01-01
274
+ ```
275
+ → `{ metadata: { author: "John Doe", date: "2024-01-01" } }`
276
+
277
+ **Plain Text** → Strings:
278
+ ```markdown
279
+ ### Description
280
+ This is a description.
281
+ ```
282
+ → `{ description: "This is a description." }`
283
+
284
+ ## Type Conversions
285
+
286
+ The transformer handles intelligent type conversions:
287
+
288
+ ```typescript
289
+ // String to Number
290
+ "42" → 42
291
+ "3.14" → 3.14
292
+ "1,234" → 1234
293
+
294
+ // String to Boolean
295
+ "true" → true
296
+ "yes" → true
297
+ "1" → true
298
+ "false" → false
299
+
300
+ // Boolean to Number
301
+ true → 1
302
+ false → 0
303
+
304
+ // Any to String
305
+ 42 → "42"
306
+ {a: 1} → '{"a":1}'
307
+ ```
308
+
309
+ ## Examples
310
+
311
+ See `markdown-example.ts` for comprehensive examples including:
312
+
313
+ 1. Your exact use case (markdown sections to JSON)
314
+ 2. Nested schema structures
315
+ 3. Simple markdown parsing
316
+ 4. Direct markdown section parsing
317
+ 5. Fuzzy matching with typos
318
+
319
+ Run examples:
320
+
321
+ ```bash
322
+ npm run example
323
+ ```
324
+
325
+ ## API Reference
326
+
327
+ ### `JSONTransformer`
328
+
329
+ ```typescript
330
+ class JSONTransformer {
331
+ constructor(schema: SchemaType, fuzzyMatchThreshold?: number);
332
+
333
+ transformMarkdown(markdown: string): TransformResult;
334
+ transform(input: any): TransformResult;
335
+ }
336
+ ```
337
+
338
+ ### `MarkdownParser`
339
+
340
+ ```typescript
341
+ class MarkdownParser {
342
+ static parseSections(markdown: string): MarkdownSection[];
343
+ static sectionsToObject(sections: MarkdownSection[]): Record<string, any>;
344
+ static parseContent(content: string): any;
345
+ }
346
+ ```
347
+
348
+ ### `Schema`
349
+
350
+ ```typescript
351
+ const Schema = {
352
+ string(): SchemaType;
353
+ number(): SchemaType;
354
+ boolean(): SchemaType;
355
+ array(items: SchemaType): SchemaType;
356
+ object(properties: Record<string, SchemaType>): SchemaType;
357
+ };
358
+ ```
359
+
360
+ ## License
361
+
362
+ ISC
363
+
364
+ ## Contributing
365
+
366
+ Contributions are welcome! Please feel free to submit a Pull Request.