flex-md 4.4.7 → 4.4.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Helpers for "framed vs frameless" Markdown handling before Flex-MD.
|
|
3
|
+
*
|
|
4
|
+
* Goals:
|
|
5
|
+
* 1) Detect framed markdown (single fenced block) and extract its inner content.
|
|
6
|
+
* 2) If NOT markdown-ish, force-wrap into a minimal markdown shape so Flex-MD can still parse.
|
|
7
|
+
*
|
|
8
|
+
* Notes:
|
|
9
|
+
* - This is heuristic by design; tune thresholds as you learn your data.
|
|
10
|
+
*/
|
|
11
|
+
export type MarkdownDetection = {
|
|
12
|
+
isMarkdownLikely: boolean;
|
|
13
|
+
isFramed: boolean;
|
|
14
|
+
frameLanguage?: string | null;
|
|
15
|
+
reasons: string[];
|
|
16
|
+
stats: {
|
|
17
|
+
headings: number;
|
|
18
|
+
atxHeadings: number;
|
|
19
|
+
setextHeadings: number;
|
|
20
|
+
unorderedListLines: number;
|
|
21
|
+
orderedListLines: number;
|
|
22
|
+
tableRows: number;
|
|
23
|
+
codeFences: number;
|
|
24
|
+
inlineCodeSpans: number;
|
|
25
|
+
mdLinks: number;
|
|
26
|
+
emphasisTokens: number;
|
|
27
|
+
};
|
|
28
|
+
};
|
|
29
|
+
export declare function detectMarkdown(text: unknown): MarkdownDetection;
|
|
30
|
+
/**
|
|
31
|
+
* If the entire payload is a single fenced block, return its inner content.
|
|
32
|
+
* Otherwise return the original text.
|
|
33
|
+
*
|
|
34
|
+
* - Also handles ```md / ```markdown / ```json etc.
|
|
35
|
+
*/
|
|
36
|
+
export declare function stripSingleFence(input: string): {
|
|
37
|
+
stripped: string;
|
|
38
|
+
wasFramed: boolean;
|
|
39
|
+
language: string | null;
|
|
40
|
+
};
|
|
41
|
+
/**
|
|
42
|
+
* Force-wrap non-markdown text into a minimal heading-based markdown document.
|
|
43
|
+
* This helps Flex-MD / nx-md-parser operate even when the model returns plain text.
|
|
44
|
+
*
|
|
45
|
+
* Choose a heading that exists in your OFS to maximize alignment.
|
|
46
|
+
* Default: "Full Answer" (common sink section).
|
|
47
|
+
*/
|
|
48
|
+
export declare function forceWrapAsMarkdown(plainText: string, opts?: {
|
|
49
|
+
heading?: string;
|
|
50
|
+
level?: 1 | 2 | 3 | 4 | 5 | 6;
|
|
51
|
+
preserveLeadingWhitespace?: boolean;
|
|
52
|
+
}): string;
|
|
53
|
+
/**
|
|
54
|
+
* End-to-end "normalize input for Flex-MD" helper:
|
|
55
|
+
* - If framed: strip the fence (so Flex-MD sees pure markdown)
|
|
56
|
+
* - Else: if markdown-likely: keep as-is
|
|
57
|
+
* - Else: wrap as markdown under a chosen heading
|
|
58
|
+
*/
|
|
59
|
+
export declare function normalizeForFlexMd(input: unknown, opts?: {
|
|
60
|
+
fallbackHeading?: string;
|
|
61
|
+
fallbackHeadingLevel?: 1 | 2 | 3 | 4 | 5 | 6;
|
|
62
|
+
}): {
|
|
63
|
+
normalizedText: string;
|
|
64
|
+
detection: MarkdownDetection;
|
|
65
|
+
wasStripped: boolean;
|
|
66
|
+
stripLanguage: string | null;
|
|
67
|
+
wasWrapped: boolean;
|
|
68
|
+
};
|
|
69
|
+
/**
|
|
70
|
+
* Example integration with Flex-MD:
|
|
71
|
+
*
|
|
72
|
+
* import { parseOutputFormatSpec, transformWithOfs } from 'flex-md';
|
|
73
|
+
*
|
|
74
|
+
* const spec = parseOutputFormatSpec(ofsMarkdown);
|
|
75
|
+
* const prep = normalizeForFlexMd(llmText, { fallbackHeading: "Full Answer" });
|
|
76
|
+
* const out = transformWithOfs(prep.normalizedText, spec);
|
|
77
|
+
*/
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Helpers for "framed vs frameless" Markdown handling before Flex-MD.
|
|
3
|
+
*
|
|
4
|
+
* Goals:
|
|
5
|
+
* 1) Detect framed markdown (single fenced block) and extract its inner content.
|
|
6
|
+
* 2) If NOT markdown-ish, force-wrap into a minimal markdown shape so Flex-MD can still parse.
|
|
7
|
+
*
|
|
8
|
+
* Notes:
|
|
9
|
+
* - This is heuristic by design; tune thresholds as you learn your data.
|
|
10
|
+
*/
|
|
11
|
+
const SINGLE_FENCE_BLOCK_RE = /^```([a-zA-Z0-9_-]+)?([^\n]*)\n([\s\S]*?)\n```$/;
|
|
12
|
+
const FENCE_OPEN_RE = /(^|\n)```/g;
|
|
13
|
+
export function detectMarkdown(text) {
|
|
14
|
+
const reasons = [];
|
|
15
|
+
const raw = typeof text === "string" ? text : JSON.stringify(text ?? "");
|
|
16
|
+
const s = raw.replace(/\r\n/g, "\n");
|
|
17
|
+
const codeFences = [...s.matchAll(FENCE_OPEN_RE)].length;
|
|
18
|
+
// Framed detection (single fenced block)
|
|
19
|
+
const m = s.match(SINGLE_FENCE_BLOCK_RE);
|
|
20
|
+
const isFramed = !!m && codeFences === 2;
|
|
21
|
+
const frameLanguage = isFramed ? (m?.[1] ?? null) : null;
|
|
22
|
+
if (isFramed)
|
|
23
|
+
reasons.push("Single fenced code block detected (framed payload).");
|
|
24
|
+
else if (codeFences > 1)
|
|
25
|
+
reasons.push("Multiple fenced code blocks detected.");
|
|
26
|
+
const lines = s.split("\n");
|
|
27
|
+
const atxHeadings = lines.filter((l) => /^#{1,6}\s+\S/.test(l.trim())).length;
|
|
28
|
+
let setextHeadings = 0;
|
|
29
|
+
for (let i = 0; i < lines.length - 1; i++) {
|
|
30
|
+
const cur = lines[i].trim();
|
|
31
|
+
const nxt = lines[i + 1].trim();
|
|
32
|
+
if (cur.length > 0 && (/^={2,}$/.test(nxt) || /^-{2,}$/.test(nxt)))
|
|
33
|
+
setextHeadings++;
|
|
34
|
+
}
|
|
35
|
+
const headings = atxHeadings + setextHeadings;
|
|
36
|
+
const unorderedListLines = lines.filter((l) => /^\s*[-*+]\s+\S/.test(l)).length;
|
|
37
|
+
const orderedListLines = lines.filter((l) => /^\s*\d{1,3}([.)])\s+\S/.test(l)).length;
|
|
38
|
+
const tableRows = lines.filter((l) => {
|
|
39
|
+
const t = l.trim();
|
|
40
|
+
if (!t.startsWith("|") || t.length < 3)
|
|
41
|
+
return false;
|
|
42
|
+
const pipeCount = (t.match(/\|/g) ?? []).length;
|
|
43
|
+
return pipeCount >= 2;
|
|
44
|
+
}).length;
|
|
45
|
+
const inlineCodeSpans = (s.match(/`[^`\n]+`/g) ?? []).length;
|
|
46
|
+
const mdLinks = (s.match(/\[[^\]]+\]\([^)]+\)/g) ?? []).length;
|
|
47
|
+
const emphasisTokens = (s.match(/\*\*[^*\n]+\*\*/g) ?? []).length +
|
|
48
|
+
(s.match(/__[^_\n]+__/g) ?? []).length +
|
|
49
|
+
(s.match(/(^|[^*])\*[^*\n]+\*([^*]|$)/g) ?? []).length +
|
|
50
|
+
(s.match(/(^|[^_])_[^_\n]+_([^_]|$)/g) ?? []).length;
|
|
51
|
+
// Heuristic decision rules
|
|
52
|
+
const hasList = unorderedListLines + orderedListLines >= 2;
|
|
53
|
+
const hasTable = tableRows >= 2;
|
|
54
|
+
const hasOtherSignals = inlineCodeSpans + mdLinks + emphasisTokens >= 2;
|
|
55
|
+
let isMarkdownLikely = false;
|
|
56
|
+
if (isFramed) {
|
|
57
|
+
isMarkdownLikely = true;
|
|
58
|
+
}
|
|
59
|
+
else if (headings >= 2) {
|
|
60
|
+
isMarkdownLikely = true;
|
|
61
|
+
reasons.push(`Detected ${headings} markdown heading(s) (>=2).`);
|
|
62
|
+
}
|
|
63
|
+
else if (headings >= 1 && (hasList || hasTable)) {
|
|
64
|
+
isMarkdownLikely = true;
|
|
65
|
+
reasons.push(`Detected heading(s) plus ${hasList ? "list" : "table"} structure.`);
|
|
66
|
+
}
|
|
67
|
+
else if ((hasList && hasTable) || (hasTable && hasOtherSignals) || (hasList && hasOtherSignals)) {
|
|
68
|
+
isMarkdownLikely = true;
|
|
69
|
+
reasons.push("Detected multiple markdown structural signals (lists/tables/links/code/emphasis).");
|
|
70
|
+
}
|
|
71
|
+
else {
|
|
72
|
+
reasons.push("Insufficient markdown structure signals; treating as plain text.");
|
|
73
|
+
}
|
|
74
|
+
if (/^\s*#{1,6}\s+\S/.test(lines[0] ?? "")) {
|
|
75
|
+
reasons.push("Text starts with an ATX heading (#...).");
|
|
76
|
+
isMarkdownLikely = true;
|
|
77
|
+
}
|
|
78
|
+
return {
|
|
79
|
+
isMarkdownLikely,
|
|
80
|
+
isFramed,
|
|
81
|
+
frameLanguage,
|
|
82
|
+
reasons,
|
|
83
|
+
stats: {
|
|
84
|
+
headings,
|
|
85
|
+
atxHeadings,
|
|
86
|
+
setextHeadings,
|
|
87
|
+
unorderedListLines,
|
|
88
|
+
orderedListLines,
|
|
89
|
+
tableRows,
|
|
90
|
+
codeFences,
|
|
91
|
+
inlineCodeSpans,
|
|
92
|
+
mdLinks,
|
|
93
|
+
emphasisTokens,
|
|
94
|
+
},
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* If the entire payload is a single fenced block, return its inner content.
|
|
99
|
+
* Otherwise return the original text.
|
|
100
|
+
*
|
|
101
|
+
* - Also handles ```md / ```markdown / ```json etc.
|
|
102
|
+
*/
|
|
103
|
+
export function stripSingleFence(input) {
|
|
104
|
+
const s = input.replace(/\r\n/g, "\n").trim();
|
|
105
|
+
const fenceCount = [...s.matchAll(FENCE_OPEN_RE)].length;
|
|
106
|
+
if (fenceCount !== 2) {
|
|
107
|
+
return { stripped: input, wasFramed: false, language: null };
|
|
108
|
+
}
|
|
109
|
+
const m = s.match(SINGLE_FENCE_BLOCK_RE);
|
|
110
|
+
if (!m)
|
|
111
|
+
return { stripped: input, wasFramed: false, language: null };
|
|
112
|
+
const lang = (m[1] ?? null);
|
|
113
|
+
const inner = (m[3] ?? "").trim();
|
|
114
|
+
return { stripped: inner, wasFramed: true, language: lang };
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* Force-wrap non-markdown text into a minimal heading-based markdown document.
|
|
118
|
+
* This helps Flex-MD / nx-md-parser operate even when the model returns plain text.
|
|
119
|
+
*
|
|
120
|
+
* Choose a heading that exists in your OFS to maximize alignment.
|
|
121
|
+
* Default: "Full Answer" (common sink section).
|
|
122
|
+
*/
|
|
123
|
+
export function forceWrapAsMarkdown(plainText, opts) {
|
|
124
|
+
const heading = opts?.heading ?? "Full Answer";
|
|
125
|
+
const level = opts?.level ?? 3;
|
|
126
|
+
const hashes = "#".repeat(level);
|
|
127
|
+
const raw = plainText.replace(/\r\n/g, "\n");
|
|
128
|
+
const body = opts?.preserveLeadingWhitespace ? raw : raw.trim();
|
|
129
|
+
// If empty, keep it explicit.
|
|
130
|
+
const safeBody = body.length ? body : "None";
|
|
131
|
+
return `${hashes} ${heading}\n${safeBody}\n`;
|
|
132
|
+
}
|
|
133
|
+
/**
|
|
134
|
+
* End-to-end "normalize input for Flex-MD" helper:
|
|
135
|
+
* - If framed: strip the fence (so Flex-MD sees pure markdown)
|
|
136
|
+
* - Else: if markdown-likely: keep as-is
|
|
137
|
+
* - Else: wrap as markdown under a chosen heading
|
|
138
|
+
*/
|
|
139
|
+
export function normalizeForFlexMd(input, opts) {
|
|
140
|
+
const raw = typeof input === "string" ? input : JSON.stringify(input ?? "");
|
|
141
|
+
const detection = detectMarkdown(raw);
|
|
142
|
+
// 1) Strip if framed
|
|
143
|
+
const { stripped, wasFramed, language } = stripSingleFence(raw);
|
|
144
|
+
if (wasFramed) {
|
|
145
|
+
return {
|
|
146
|
+
normalizedText: stripped,
|
|
147
|
+
detection,
|
|
148
|
+
wasStripped: true,
|
|
149
|
+
stripLanguage: language,
|
|
150
|
+
wasWrapped: false,
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
// 2) Keep if markdown-likely
|
|
154
|
+
if (detection.isMarkdownLikely) {
|
|
155
|
+
return {
|
|
156
|
+
normalizedText: raw,
|
|
157
|
+
detection,
|
|
158
|
+
wasStripped: false,
|
|
159
|
+
stripLanguage: null,
|
|
160
|
+
wasWrapped: false,
|
|
161
|
+
};
|
|
162
|
+
}
|
|
163
|
+
// 3) Wrap if not markdown-likely
|
|
164
|
+
const wrapped = forceWrapAsMarkdown(raw, {
|
|
165
|
+
heading: opts?.fallbackHeading ?? "Full Answer",
|
|
166
|
+
level: opts?.fallbackHeadingLevel ?? 3,
|
|
167
|
+
});
|
|
168
|
+
return {
|
|
169
|
+
normalizedText: wrapped,
|
|
170
|
+
detection,
|
|
171
|
+
wasStripped: false,
|
|
172
|
+
stripLanguage: null,
|
|
173
|
+
wasWrapped: true,
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
/**
|
|
177
|
+
* Example integration with Flex-MD:
|
|
178
|
+
*
|
|
179
|
+
* import { parseOutputFormatSpec, transformWithOfs } from 'flex-md';
|
|
180
|
+
*
|
|
181
|
+
* const spec = parseOutputFormatSpec(ofsMarkdown);
|
|
182
|
+
* const prep = normalizeForFlexMd(llmText, { fallbackHeading: "Full Answer" });
|
|
183
|
+
* const out = transformWithOfs(prep.normalizedText, spec);
|
|
184
|
+
*/
|
|
@@ -2,6 +2,7 @@ import { DetectJsonAllResult } from "./types.js";
|
|
|
2
2
|
export * from "./types.js";
|
|
3
3
|
export { detectJsonIntent } from "./detectIntent.js";
|
|
4
4
|
export { detectJsonContainers, detectJsonPresence } from "./detectPresence.js";
|
|
5
|
+
export * from "./detectMarkdown.js";
|
|
5
6
|
export declare function detectJsonAll(textOrMd: string, opts?: {
|
|
6
7
|
parseJson?: boolean;
|
|
7
8
|
}): DetectJsonAllResult;
|
|
@@ -3,6 +3,7 @@ import { detectJsonContainers, detectJsonPresence } from "./detectPresence.js";
|
|
|
3
3
|
export * from "./types.js";
|
|
4
4
|
export { detectJsonIntent } from "./detectIntent.js";
|
|
5
5
|
export { detectJsonContainers, detectJsonPresence } from "./detectPresence.js";
|
|
6
|
+
export * from "./detectMarkdown.js";
|
|
6
7
|
export function detectJsonAll(textOrMd, opts) {
|
|
7
8
|
return {
|
|
8
9
|
intent: detectJsonIntent(textOrMd),
|
package/package.json
CHANGED