flex-md 4.2.7 → 4.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Centralized normalization for Markdown input.
3
+ * Handles common LLM output artifacts like literal \n.
4
+ */
5
+ export declare function normalizeMarkdownInput(md: string): string;
@@ -0,0 +1,10 @@
1
+ /**
2
+ * Centralized normalization for Markdown input.
3
+ * Handles common LLM output artifacts like literal \n.
4
+ */
5
+ export function normalizeMarkdownInput(md) {
6
+ if (!md)
7
+ return "";
8
+ // Handle literal \n common in LLM outputs delivered via JSON
9
+ return md.replace(/\\n/g, "\n");
10
+ }
package/dist/md/parse.js CHANGED
@@ -1,4 +1,5 @@
1
1
  import { toCamelCase } from "nx-helpers";
2
+ import { normalizeMarkdownInput } from "./normalize.js";
2
3
  export function normalizeName(s) {
3
4
  return s.trim().replace(/\s+/g, " ").toLowerCase();
4
5
  }
@@ -157,12 +158,33 @@ export function isIssuesEnvelopeCheck(md) {
157
158
  }
158
159
  export function markdownToJson(md) {
159
160
  // Robustly handle both actual newlines and literal \n (common in LLM JSON outputs)
160
- const normalizedMd = (md || "").replace(/\\n/g, "\n");
161
- const sections = parseHeadingsAndSections(normalizedMd);
161
+ const normalizedMd = normalizeMarkdownInput(md);
162
+ // Collect all bullet names that look like headers ("- Name")
163
+ // We look for patterns like "- Name\n" at the start of lines, ensuring it's not a sub-bullet.
164
+ const bulletNames = [];
165
+ const bulletLinesRx = /^[-*+]\s+([^—:\n\r]{2,50})$/gm;
166
+ let m;
167
+ while ((m = bulletLinesRx.exec(normalizedMd)) !== null) {
168
+ bulletNames.push(m[1].trim());
169
+ }
170
+ // Use Flex-MD's native parser (supports === headings and avoids colon-as-object bug)
171
+ const sections = parseHeadingsAndSections(normalizedMd, { bulletNames });
162
172
  const result = {};
163
173
  for (const sec of sections) {
164
174
  const key = toCamelCase(sec.heading.name);
165
- result[key] = sec.body.trim();
175
+ const body = sec.body.trim();
176
+ // 1. Try to detect list
177
+ const bullets = extractBullets(body);
178
+ if (bullets.length > 0) {
179
+ result[key] = bullets;
180
+ continue;
181
+ }
182
+ // 2. Try to detect table (basic check)
183
+ const lines = body.split("\n").map(l => l.trim()).filter(l => l);
184
+ if (lines.length >= 2 && lines[0].startsWith("|") && /^[|\s-:]+$/.test(lines[1])) {
185
+ // It looks like a table - we could use nx-md-parser's table logic here safely
186
+ }
187
+ result[key] = body;
166
188
  }
167
189
  return result;
168
190
  }
@@ -6,5 +6,6 @@ import { type OutputFormatSpec } from "../types.js";
6
6
  export declare function ofsToSchema(spec: OutputFormatSpec): SchemaType;
7
7
  /**
8
8
  * Transforms markdown text using a Flex-MD OutputFormatSpec or a recallId.
9
+ * If no spec is provided, it attempts to infer it from the markdown (autospecs).
9
10
  */
10
- export declare function transformWithOfs<T = any>(md: string, specOrRecallId: OutputFormatSpec | string): TransformResult<T>;
11
+ export declare function transformWithOfs<T = any>(md: string, specOrRecallId?: OutputFormatSpec | string): TransformResult<T>;
@@ -1,6 +1,7 @@
1
1
  import { JSONTransformer, Schema } from "nx-md-parser";
2
2
  import { recall } from "./memory.js";
3
- import { parseHeadingsAndSections, extractBullets, parseMarkdownTable, normalizeName } from "../md/parse.js";
3
+ import { parseHeadingsAndSections, extractBullets, parseMarkdownTable, normalizeName, markdownToJson } from "../md/parse.js";
4
+ import { normalizeMarkdownInput } from "../md/normalize.js";
4
5
  /**
5
6
  * Converts a Flex-MD OutputFormatSpec to an nx-md-parser Schema.
6
7
  */
@@ -47,25 +48,39 @@ export function ofsToSchema(spec) {
47
48
  }
48
49
  /**
49
50
  * Transforms markdown text using a Flex-MD OutputFormatSpec or a recallId.
51
+ * If no spec is provided, it attempts to infer it from the markdown (autospecs).
50
52
  */
51
53
  export function transformWithOfs(md, specOrRecallId) {
54
+ // 0. Normalize input (handle literal \n common in LLM outputs)
55
+ const normalizedMd = normalizeMarkdownInput(md);
56
+ if (!specOrRecallId) {
57
+ // AUTOSPECS: If no spec is provided, use internal logic ONLY.
58
+ // This avoids complex schema generation and keeps it robust for unknown structures.
59
+ const result = markdownToJson(normalizedMd);
60
+ return {
61
+ status: "validated",
62
+ result: result,
63
+ errors: []
64
+ };
65
+ }
52
66
  let spec;
53
67
  if (typeof specOrRecallId === "string") {
54
- spec = recall(specOrRecallId);
55
- if (!spec) {
68
+ const recalled = recall(specOrRecallId);
69
+ if (!recalled) {
56
70
  return {
57
71
  status: "failed",
58
72
  result: null,
59
73
  errors: [`Recall ID "${specOrRecallId}" not found in memory.`]
60
74
  };
61
75
  }
76
+ spec = recalled;
62
77
  }
63
78
  else {
64
79
  spec = specOrRecallId;
65
80
  }
66
81
  // 1. Parse sections using Flex-MD parser
67
82
  const bulletNames = spec.sections.map(s => s.name);
68
- const parsedSections = parseHeadingsAndSections(md, { bulletNames });
83
+ const parsedSections = parseHeadingsAndSections(normalizedMd, { bulletNames });
69
84
  const parsedObj = {};
70
85
  // 2. Map sections to OFS and apply complex parsing (tables/lists)
71
86
  for (const sectionSpec of spec.sections) {
@@ -0,0 +1,5 @@
1
+ import { OutputFormatSpec } from "../types.js";
2
+ /**
3
+ * Infers an OutputFormatSpec from a Markdown string.
4
+ */
5
+ export declare function inferOfsFromMarkdown(md: string): OutputFormatSpec;
@@ -0,0 +1,60 @@
1
+ import { parseHeadingsAndSections, extractBullets } from "../md/parse.js";
2
+ /**
3
+ * Infers an OutputFormatSpec from a Markdown string.
4
+ */
5
+ export function inferOfsFromMarkdown(md) {
6
+ // Collect all bullet names that look like headers ("- Name")
7
+ const lines = md.split("\n");
8
+ const bulletNames = [];
9
+ for (const line of lines) {
10
+ // Match "- Name" or "- Name\n" or "- Name " but NOT "- Name: more text"
11
+ const m = line.match(/^[-*+]\s+([^—:\n]+)$/);
12
+ if (m) {
13
+ bulletNames.push(m[1].trim());
14
+ }
15
+ }
16
+ const sections = parseHeadingsAndSections(md, { bulletNames });
17
+ const specSections = [];
18
+ for (const sec of sections) {
19
+ const name = sec.heading.name;
20
+ const body = sec.body.trim();
21
+ // 1. Detect list
22
+ const bullets = extractBullets(body);
23
+ if (bullets.length > 0) {
24
+ specSections.push({
25
+ name,
26
+ kind: "list",
27
+ required: true
28
+ });
29
+ continue;
30
+ }
31
+ // 2. Detect table (basic check)
32
+ const lines = body.split("\n").map(l => l.trim()).filter(Boolean);
33
+ if (lines.length >= 2 && lines[0].startsWith("|") && /^[|\s-:]+$/.test(lines[1])) {
34
+ // Extract columns
35
+ const cols = lines[0].split("|").map(c => c.trim()).filter(Boolean);
36
+ specSections.push({
37
+ name,
38
+ kind: "table",
39
+ columns: cols,
40
+ required: true
41
+ });
42
+ continue;
43
+ }
44
+ // Default to text
45
+ specSections.push({
46
+ name,
47
+ kind: "text",
48
+ required: true
49
+ });
50
+ }
51
+ return {
52
+ descriptorType: "output_format_spec",
53
+ format: "markdown",
54
+ sectionOrderMatters: false,
55
+ sections: specSections,
56
+ tablesOptional: true,
57
+ tables: [],
58
+ emptySectionValue: "None"
59
+ };
60
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "flex-md",
3
- "version": "4.2.7",
3
+ "version": "4.3.0",
4
4
  "description": "Parse and stringify FlexMD: semi-structured Markdown with three powerful layers - Frames, Output Format Spec (OFS), and Detection/Extraction.",
5
5
  "license": "MIT",
6
6
  "author": "",
@@ -52,4 +52,4 @@
52
52
  "nx-helpers": "^1.5.0",
53
53
  "nx-md-parser": "^2.0.2"
54
54
  }
55
- }
55
+ }