flex-md 4.2.0 → 4.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -564,6 +564,38 @@ if (status === 'validated' || status === 'fixed') {
564
564
  2. **Type Enforcement**: Lists and Tables are automatically converted to JSON arrays and objects.
565
565
  3. **Single Source of Truth**: Use the same spec to guide the LLM AND parse its response.
566
566
 
567
+ ## Advanced AI Features (via NX-MD-Parser 1.4.0)
568
+
569
+ Flex-MD utilizes the full power of `nx-md-parser` v1.4.0, providing enterprise-grade AI transformation capabilities.
570
+
571
+ ### 🤖 Multi-Algorithm Fuzzy Matching
572
+ The engine uses a weighted combination of four powerful algorithms to find the best match for your headings and keys:
573
+ - **Jaro-Winkler**: Character-level similarity (40%)
574
+ - **Jaccard Tokens**: Token-based similarity (30%)
575
+ - **Dice Coefficient**: N-gram similarity (20%)
576
+ - **Levenshtein Ratio**: Edit distance (10%)
577
+
578
+ ### 🧠 Machine Learning (Learn Aliases)
579
+ You can let the system learn from your data to improve matching over time.
580
+
581
+ ```typescript
582
+ import { learnAliasesFromTransformations } from 'flex-md';
583
+
584
+ const learningResult = learnAliasesFromTransformations([
585
+ {
586
+ input: { "Projct Name": "Test" },
587
+ output: { title: "Test" },
588
+ schema: yourSchema
589
+ }
590
+ ]);
591
+ // System now knows "Projct Name" is an alias for "title"
592
+ ```
593
+
594
+ ### ⚙️ Intelligent Auto-Fixing
595
+ - **Typo Correction**: Automatically fixes property name typos.
596
+ - **Structural Repair**: Restructures flat objects into nested schemas.
597
+ - **Smart Conversion**: Automatically handles `string -> number`, `string -> boolean`, and wrapper types.
598
+
567
599
  ## Spec Memory: Remember & Recall
568
600
 
569
601
  Flex-MD includes an in-memory storage feature that allows you to "remember" an Output Format Spec and later reuse it by a unique `recallId`. This is especially useful for maintaining state within a single execution environment.
@@ -3,7 +3,25 @@ import { parseHeadingsAndSections, extractBullets, normalizeName } from "../md/p
3
3
  * Extracts sections, lists, and tables from Markdown based on the OFS.
4
4
  */
5
5
  export function extractFromMarkdown(md, spec) {
6
- const parsed = parseHeadingsAndSections(md);
6
+ // 0. Robustness: check for fenced block that might contain the target content
7
+ // Highly relevant for LLM responses where the model occasionally wraps everything in a container
8
+ // even if not strictly asked, or if the user provided unframed content but we have L2+ expectations elsewhere.
9
+ const rxFence = /```(?:markdown|flexmd)?\s*\n([\s\S]*?)\n```/gi;
10
+ const matches = Array.from(md.matchAll(rxFence));
11
+ let workingContent = md;
12
+ if (matches.length === 1) {
13
+ const content = matches[0][1];
14
+ // If the content inside the fence has more required sections than outside, use it
15
+ const parsedOutside = parseHeadingsAndSections(md);
16
+ const parsedInside = parseHeadingsAndSections(content);
17
+ const specNorms = new Set(spec.sections.map(s => normalizeName(s.name)));
18
+ const countOutside = parsedOutside.filter(p => specNorms.has(normalizeName(p.heading.name))).length;
19
+ const countInside = parsedInside.filter(p => specNorms.has(normalizeName(p.heading.name))).length;
20
+ if (countInside >= countOutside && countInside > 0) {
21
+ workingContent = content;
22
+ }
23
+ }
24
+ const parsed = parseHeadingsAndSections(workingContent);
7
25
  const sectionsByName = {};
8
26
  const tables = [];
9
27
  const specMap = new Map(spec.sections.map(s => [normalizeName(s.name), s]));
package/dist/md/parse.js CHANGED
@@ -29,8 +29,10 @@ export function extractFencedBlocks(text) {
29
29
  return blocks;
30
30
  }
31
31
  export function parseHeadingsAndSections(md) {
32
- // Standard headings #... and alternative ===key
33
- const rx = /^((?:#{1,6})\s+(.+?)\s*|===(.+?)\s*)$/gm;
32
+ // Standard headings #... and alternative ===key.
33
+ // Use [ \t]* instead of \s for the trailing space to avoid matching newlines incorrectly with certain configurations.
34
+ // Also include \r? to handle CRLF if needed, although m and g should handle ^\$ correctly.
35
+ const rx = /^((?:#{1,6})[ \t]+(.+?)[ \t]*|===(.+?)[ \t]*)$/gm;
34
36
  const headings = [];
35
37
  let m;
36
38
  while ((m = rx.exec(md)) !== null) {
@@ -42,7 +44,8 @@ export function parseHeadingsAndSections(md) {
42
44
  name = (m[3] ?? "").trim();
43
45
  }
44
46
  else {
45
- const hashes = (full.match(/^#+/) ?? [""])[0];
47
+ const hashesMatch = full.match(/^#+/);
48
+ const hashes = hashesMatch ? hashesMatch[0] : "";
46
49
  level = hashes.length;
47
50
  name = (m[2] ?? "").trim();
48
51
  }
@@ -1,3 +1,4 @@
1
+ import { normalizeName } from "../md/parse.js";
1
2
  /**
2
3
  * Validate a format specification.
3
4
  * Returns detailed validation results.
@@ -138,6 +139,18 @@ export function parseOutputFormatSpec(md, opts = {}) {
138
139
  }
139
140
  continue;
140
141
  }
142
+ // heading items (e.g. ### Short Answer)
143
+ const headingMatch = line.match(/^#{1,6}\s+(.+)$/);
144
+ if (headingMatch) {
145
+ const name = headingMatch[1].trim();
146
+ // Don't re-parse "Output format" itself if it somehow gets in here
147
+ if (normalizeName(name) !== "output format") {
148
+ const s = { name, kind: "text" };
149
+ sections.push(s);
150
+ currentSection = s;
151
+ }
152
+ continue;
153
+ }
141
154
  // If not a bullet and we have a current section, it's an instruction
142
155
  if (currentSection && line.length > 0) {
143
156
  // Support "Columns: A, B, C" in instructions for tables
@@ -2,8 +2,11 @@ import { isIssuesEnvelopeCheck } from "../md/parse.js";
2
2
  export function detectResponseKind(text, spec) {
3
3
  const issuesResult = isIssuesEnvelopeCheck(text);
4
4
  const hasIssues = issuesResult.isIssuesEnvelope;
5
+ // Use more robust detection: check for both #+ Name and ===Name
5
6
  const hasSections = spec.sections.some(s => {
6
- const rx = new RegExp(`^#+\\s+${s.name}`, "im");
7
+ // Escape special chars in name but match case-insensitively and with flexible whitespace
8
+ const escapedName = s.name.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
9
+ const rx = new RegExp(`^((?:#{1,6}\\s+${escapedName})|(?:===${escapedName}))\\s*$`, "im");
7
10
  return rx.test(text);
8
11
  });
9
12
  const isRawJson = /^\s*(\{|\[)/.test(text.trim()) && /\s*(\}|\])$/.test(text.trim());
@@ -248,13 +248,21 @@ export function validateMarkdownAgainstOfs(input, spec, level, policyOverride) {
248
248
  }
249
249
  }
250
250
  }
251
+ // Compute detectedKind more robustly: if we found more than zero sections, it's at least sectioned
252
+ let detectedKind = "markdown";
253
+ if (level >= 2) {
254
+ detectedKind = fencesAll.length > 0 ? "fenced" : (parsed.length > 0 ? "sectioned" : "markdown");
255
+ }
256
+ else {
257
+ detectedKind = parsed.length > 0 ? "sectioned" : "markdown";
258
+ }
251
259
  const ok = !issues.some(i => i.severity === "error");
252
260
  return {
253
261
  ok,
254
262
  level,
255
263
  issues,
256
264
  stats: {
257
- detectedKind: level >= 2 ? (fencesAll.length ? "fenced" : "markdown") : (parsed.length ? "sectioned" : "markdown"),
265
+ detectedKind,
258
266
  sectionCount: occurrences.size,
259
267
  missingRequired,
260
268
  duplicates,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "flex-md",
3
- "version": "4.2.0",
3
+ "version": "4.2.2",
4
4
  "description": "Parse and stringify FlexMD: semi-structured Markdown with three powerful layers - Frames, Output Format Spec (OFS), and Detection/Extraction.",
5
5
  "license": "MIT",
6
6
  "author": "",