flex-md 1.1.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +75 -29
- package/SPEC.md +559 -0
- package/dist/__tests__/validate.test.d.ts +1 -0
- package/dist/__tests__/validate.test.js +108 -0
- package/dist/detect/json/detectIntent.d.ts +2 -0
- package/dist/detect/json/detectIntent.js +79 -0
- package/dist/detect/json/detectPresence.d.ts +6 -0
- package/dist/detect/json/detectPresence.js +191 -0
- package/dist/detect/json/index.d.ts +7 -0
- package/dist/detect/json/index.js +12 -0
- package/dist/detect/json/types.d.ts +43 -0
- package/dist/detect/json/types.js +1 -0
- package/dist/detection/detector.d.ts +6 -0
- package/dist/detection/detector.js +104 -0
- package/dist/detection/extractor.d.ts +10 -0
- package/dist/detection/extractor.js +54 -0
- package/dist/extract/extract.d.ts +5 -0
- package/dist/extract/extract.js +50 -0
- package/dist/extract/types.d.ts +11 -0
- package/dist/extract/types.js +1 -0
- package/dist/index.d.ts +13 -3
- package/dist/index.js +20 -3
- package/dist/issues/build.d.ts +26 -0
- package/dist/issues/build.js +62 -0
- package/dist/md/lists.d.ts +14 -0
- package/dist/md/lists.js +33 -0
- package/dist/md/match.d.ts +12 -0
- package/dist/md/match.js +44 -0
- package/dist/md/outline.d.ts +6 -0
- package/dist/md/outline.js +67 -0
- package/dist/md/parse.d.ts +29 -0
- package/dist/md/parse.js +105 -0
- package/dist/md/tables.d.ts +25 -0
- package/dist/md/tables.js +72 -0
- package/dist/ofs/enricher.d.ts +16 -0
- package/dist/ofs/enricher.js +77 -0
- package/dist/ofs/extractor.d.ts +9 -0
- package/dist/ofs/extractor.js +75 -0
- package/dist/ofs/issues.d.ts +14 -0
- package/dist/ofs/issues.js +92 -0
- package/dist/ofs/issuesEnvelope.d.ts +15 -0
- package/dist/ofs/issuesEnvelope.js +71 -0
- package/dist/ofs/parser.d.ts +9 -0
- package/dist/ofs/parser.js +133 -0
- package/dist/ofs/stringify.d.ts +5 -0
- package/dist/ofs/stringify.js +32 -0
- package/dist/ofs/validator.d.ts +10 -0
- package/dist/ofs/validator.js +91 -0
- package/dist/outline/builder.d.ts +10 -0
- package/dist/outline/builder.js +85 -0
- package/dist/outline/renderer.d.ts +6 -0
- package/dist/outline/renderer.js +23 -0
- package/dist/parser.js +58 -10
- package/dist/parsers/lists.d.ts +6 -0
- package/dist/parsers/lists.js +36 -0
- package/dist/parsers/tables.d.ts +10 -0
- package/dist/parsers/tables.js +58 -0
- package/dist/pipeline/enforce.d.ts +10 -0
- package/dist/pipeline/enforce.js +46 -0
- package/dist/pipeline/kind.d.ts +16 -0
- package/dist/pipeline/kind.js +24 -0
- package/dist/pipeline/repair.d.ts +14 -0
- package/dist/pipeline/repair.js +112 -0
- package/dist/strictness/container.d.ts +14 -0
- package/dist/strictness/container.js +46 -0
- package/dist/strictness/processor.d.ts +5 -0
- package/dist/strictness/processor.js +29 -0
- package/dist/strictness/types.d.ts +77 -0
- package/dist/strictness/types.js +106 -0
- package/dist/test-pipeline.d.ts +1 -0
- package/dist/test-pipeline.js +53 -0
- package/dist/test-runner.d.ts +1 -0
- package/dist/test-runner.js +331 -0
- package/dist/test-strictness.d.ts +1 -0
- package/dist/test-strictness.js +213 -0
- package/dist/types.d.ts +140 -22
- package/dist/validate/policy.d.ts +10 -0
- package/dist/validate/policy.js +17 -0
- package/dist/validate/types.d.ts +11 -0
- package/dist/validate/types.js +1 -0
- package/dist/validate/validate.d.ts +2 -0
- package/dist/validate/validate.js +308 -0
- package/docs/mdflex-compliance.md +216 -0
- package/package.json +15 -6
package/README.md
CHANGED
|
@@ -1,44 +1,90 @@
|
|
|
1
|
-
#
|
|
1
|
+
# Flex-MD (v3.0) — Markdown Output Contract
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Flex-MD is a TypeScript library for building and enforcing **Markdown Output Contracts** with LLMs. It treats Markdown as a semi-structured data format, allowing you to define required sections, list types, and tables while maintaining 100% standard Markdown compatibility.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
Version 3.0 introduces the **Detect-Repair-Enforce** pipeline, ensuring LLM responses are coerced into compliant structures before being parsed.
|
|
6
|
+
|
|
7
|
+
## Key Features
|
|
8
|
+
|
|
9
|
+
- **Standard Markdown**: No proprietary tags. Pure headings, lists, and tables.
|
|
10
|
+
- **Strictness Levels (L0–L3)**: From loose guidance to rigid structural enforcement.
|
|
11
|
+
- **Deterministic Repair**: Auto-fixes misformatted LLM output (merged fences, missing headings, format conversion).
|
|
12
|
+
- **Issues Envelope**: A structured failure format for when repairs fail, allowing safe fallbacks.
|
|
13
|
+
- **Tax-Aware Prompts**: Generates minimal, relevant instructions to save tokens.
|
|
14
|
+
|
|
15
|
+
## Installation
|
|
6
16
|
|
|
7
17
|
```bash
|
|
8
|
-
npm
|
|
18
|
+
npm install flex-md
|
|
9
19
|
```
|
|
10
20
|
|
|
11
|
-
##
|
|
21
|
+
## Quick Start
|
|
12
22
|
|
|
13
|
-
|
|
14
|
-
import { parseFlexMd, stringifyFlexMd } from "flex-md";
|
|
23
|
+
### 1. Define your Output Format Spec (OFS)
|
|
15
24
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
Hello
|
|
25
|
+
```typescript
|
|
26
|
+
import { parseOutputFormatSpec } from 'flex-md';
|
|
19
27
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
28
|
+
const spec = parseOutputFormatSpec(`
|
|
29
|
+
## Output format
|
|
30
|
+
- Short answer — text (required)
|
|
31
|
+
- Reasoning — ordered list (required)
|
|
32
|
+
- Assumptions — list (optional)
|
|
33
|
+
|
|
34
|
+
empty sections:
|
|
35
|
+
- If a section is empty, write \`None\`.
|
|
36
|
+
`);
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### 2. Generate Prompt Guidance
|
|
40
|
+
|
|
41
|
+
```typescript
|
|
42
|
+
import { buildMarkdownGuidance } from 'flex-md';
|
|
43
|
+
|
|
44
|
+
const guidance = buildMarkdownGuidance(spec, { level: 1 });
|
|
45
|
+
// Output: "Reply in Markdown. Include these headings... If a section is empty, write 'None'."
|
|
46
|
+
```
|
|
25
47
|
|
|
26
|
-
|
|
27
|
-
console.log(doc.frames[0]?.type); // "message"
|
|
48
|
+
### 3. Enforce the Contract
|
|
28
49
|
|
|
29
|
-
|
|
30
|
-
|
|
50
|
+
```typescript
|
|
51
|
+
import { enforceFlexMd } from 'flex-md';
|
|
31
52
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
if (
|
|
36
|
-
console.log("
|
|
53
|
+
const llmResponse = "I think... ## Short answer \n Yes. ## Reasoning \n 1. Logic";
|
|
54
|
+
const result = enforceFlexMd(llmResponse, spec, { level: 1 });
|
|
55
|
+
|
|
56
|
+
if (result.ok) {
|
|
57
|
+
console.log(result.extracted.sectionsByName["Short answer"].md);
|
|
58
|
+
} else {
|
|
59
|
+
// result.outputText will contain the Issues Envelope if strictness level >= 1
|
|
60
|
+
console.log(result.outputText);
|
|
37
61
|
}
|
|
38
62
|
```
|
|
39
63
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
64
|
+
## Strictness Levels
|
|
65
|
+
|
|
66
|
+
| Level | Goal | Guidance | Enforcement |
|
|
67
|
+
| :--- | :--- | :--- | :--- |
|
|
68
|
+
| **L0** | Plain Markdown | "Reply in Markdown." | None. Accept as-is. |
|
|
69
|
+
| **L1** | Sectioned MD | "Include these headings..." | Headings must exist. |
|
|
70
|
+
| **L2** | Fenced Container | "Return inside a single block..." | Exactly one fenced block. |
|
|
71
|
+
| **L3** | Typed Structure | "Reasoning is an ordered list..." | Enforce list/table kinds. |
|
|
72
|
+
|
|
73
|
+
## The Repair Pipeline
|
|
74
|
+
|
|
75
|
+
Flex-MD doesn't just validate; it **repairs**. Our deterministic 9-step plan handles:
|
|
76
|
+
1. **Container Normalization**: Wrapping or merging multiple fenced blocks.
|
|
77
|
+
2. **Heading Standardization**: Case-insensitive matching and naming cleanup.
|
|
78
|
+
3. **Missing Headings**: Adding required sections as `None`.
|
|
79
|
+
4. **Stray Content**: Moving text outside headings into a default section.
|
|
80
|
+
5. **Format Conversion**: Transforming bullets to numbered lists (and vice-versa) based on spec.
|
|
81
|
+
|
|
82
|
+
## Documentation
|
|
83
|
+
|
|
84
|
+
Detailed guides and specs can be found in the [docs](./docs) folder:
|
|
85
|
+
- [MDFlex Compliance Spec](./docs/mdflex-compliance.md)
|
|
86
|
+
- [OFS Syntax Guide](./SPEC.md)
|
|
87
|
+
|
|
88
|
+
## License
|
|
89
|
+
|
|
90
|
+
MIT
|
package/SPEC.md
ADDED
|
@@ -0,0 +1,559 @@
|
|
|
1
|
+
# flex-md — End-to-end Spec (v1.1)
|
|
2
|
+
|
|
3
|
+
## 0) What this package does
|
|
4
|
+
|
|
5
|
+
`flex-md` provides **two complementary layers**:
|
|
6
|
+
|
|
7
|
+
### Layer A — FlexMD Frames (semi-structured Markdown)
|
|
8
|
+
|
|
9
|
+
A tiny set of anchors (frames, meta, payload binding) on top of Markdown to reliably round-trip to/from JSON.
|
|
10
|
+
|
|
11
|
+
### Layer B — Plain Markdown "Output Format Spec" (OFS)
|
|
12
|
+
|
|
13
|
+
A **Markdown-native contract** that any LLM can follow without knowing FlexMD.
|
|
14
|
+
From OFS, the package can:
|
|
15
|
+
|
|
16
|
+
* generate minimal LLM guidance (enricher)
|
|
17
|
+
* validate the response format
|
|
18
|
+
* extract content to JSON, building a **nested tree** from heading levels
|
|
19
|
+
|
|
20
|
+
### Layer C — Detection & Extraction from arbitrary text
|
|
21
|
+
|
|
22
|
+
Find and parse:
|
|
23
|
+
|
|
24
|
+
* fenced ` ```flexmd ` objects (best)
|
|
25
|
+
* fenced JSON "FlexDocument" objects
|
|
26
|
+
* raw/unframed FlexMD (best-effort)
|
|
27
|
+
* optional generic Markdown snippets (opaque or lightly structured)
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## 1) Core principles (hard requirements)
|
|
32
|
+
|
|
33
|
+
1. **Markdown-first**: All guidance is written in plain Markdown concepts (headings, lists, tables, fenced blocks).
|
|
34
|
+
2. **Section order never matters**.
|
|
35
|
+
3. **Order matters only when the content type says it does**:
|
|
36
|
+
* `ordered list` ⇒ numbered list
|
|
37
|
+
* `ordered table` ⇒ `#` column with `1..N`
|
|
38
|
+
4. **Structure comes from heading levels**: accept any heading level (`#..######`) and build nested JSON from it.
|
|
39
|
+
5. **Internal keys/ids/paths are never rendered back to Markdown** (unless debug mode).
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
## 2) Data models (TypeScript)
|
|
44
|
+
|
|
45
|
+
### 2.1 FlexMD Frames (Layer A)
|
|
46
|
+
|
|
47
|
+
```ts
|
|
48
|
+
export type FlexMetaValue = string | string[] | number | boolean | null;
|
|
49
|
+
|
|
50
|
+
export interface FlexPayload {
|
|
51
|
+
lang?: string; // e.g. "json", "table"
|
|
52
|
+
raw: string; // always preserved
|
|
53
|
+
value: unknown; // parsed JSON for json; parsed table structure for table; otherwise string
|
|
54
|
+
parseError?: string;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export interface FlexFrame {
|
|
58
|
+
type: string; // e.g. "message", "section", ...
|
|
59
|
+
role?: string; // user|assistant|system|tool|...
|
|
60
|
+
id?: string;
|
|
61
|
+
ts?: string;
|
|
62
|
+
|
|
63
|
+
meta?: Record<string, FlexMetaValue>;
|
|
64
|
+
title?: string;
|
|
65
|
+
body_md?: string;
|
|
66
|
+
|
|
67
|
+
payloads?: Record<string, FlexPayload>;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
export interface FlexDocument {
|
|
71
|
+
title?: string;
|
|
72
|
+
meta?: Record<string, FlexMetaValue>;
|
|
73
|
+
frames: FlexFrame[];
|
|
74
|
+
}
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### 2.2 Output Format Spec (OFS) (Layer B)
|
|
78
|
+
|
|
79
|
+
```ts
|
|
80
|
+
export type SectionKind = "prose" | "list" | "ordered_list";
|
|
81
|
+
|
|
82
|
+
export interface OfsSection {
|
|
83
|
+
name: string; // "Short answer"
|
|
84
|
+
kind: SectionKind; // prose/list/ordered_list
|
|
85
|
+
hint?: string; // optional text after delimiter, not required
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
export type TableKind = "table" | "ordered_table";
|
|
89
|
+
|
|
90
|
+
export interface OfsTable {
|
|
91
|
+
columns: string[]; // ["property1","property2"]
|
|
92
|
+
kind: TableKind;
|
|
93
|
+
by?: string; // informational dimension, not mandatory sorting
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
export interface OutputFormatSpec {
|
|
97
|
+
descriptorType: "output_format_spec";
|
|
98
|
+
format: "markdown";
|
|
99
|
+
sectionOrderMatters: false;
|
|
100
|
+
|
|
101
|
+
sections: OfsSection[];
|
|
102
|
+
|
|
103
|
+
tablesOptional: boolean; // default true
|
|
104
|
+
tables: OfsTable[];
|
|
105
|
+
|
|
106
|
+
emptySectionValue?: string; // default "None"
|
|
107
|
+
}
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### 2.3 Markdown Outline Tree (nested headings)
|
|
111
|
+
|
|
112
|
+
```ts
|
|
113
|
+
export interface MdNode {
|
|
114
|
+
title: string; // heading text, cleaned
|
|
115
|
+
level: number; // 1..6
|
|
116
|
+
key: string; // slugified internal key
|
|
117
|
+
id?: string; // optional internal
|
|
118
|
+
content_md: string;
|
|
119
|
+
children: MdNode[];
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
export interface MdOutline {
|
|
123
|
+
type: "md_outline";
|
|
124
|
+
nodes: MdNode[];
|
|
125
|
+
}
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### 2.4 Extracted Result (sections + structure)
|
|
129
|
+
|
|
130
|
+
```ts
|
|
131
|
+
export interface ListItem {
|
|
132
|
+
text: string;
|
|
133
|
+
index?: number; // for ordered lists
|
|
134
|
+
children: ListItem[];
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
export interface ParsedList {
|
|
138
|
+
kind: "list";
|
|
139
|
+
ordered: boolean;
|
|
140
|
+
items: ListItem[];
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
export interface ParsedTable {
|
|
144
|
+
kind: "table" | "ordered_table";
|
|
145
|
+
by?: string;
|
|
146
|
+
columns: string[]; // includes "#" first column for ordered_table
|
|
147
|
+
rows: string[][];
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
export interface ExtractedResult {
|
|
151
|
+
outline: MdOutline;
|
|
152
|
+
sectionsByName: Record<string, {
|
|
153
|
+
nodeKey: string;
|
|
154
|
+
nodeLevel: number;
|
|
155
|
+
md: string; // raw content markdown
|
|
156
|
+
list?: ParsedList; // only if section kind requires list parsing (or enabled)
|
|
157
|
+
}>;
|
|
158
|
+
tables: ParsedTable[];
|
|
159
|
+
}
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
---
|
|
163
|
+
|
|
164
|
+
## 3) Layer A — FlexMD Frames format
|
|
165
|
+
|
|
166
|
+
### 3.1 Frame header (two accepted forms)
|
|
167
|
+
|
|
168
|
+
**Bracket line**
|
|
169
|
+
|
|
170
|
+
```md
|
|
171
|
+
[[message role=user id=m1 ts=...]]
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
**Markdown heading with brackets**
|
|
175
|
+
|
|
176
|
+
```md
|
|
177
|
+
## [[message role=user id=m1]]
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
### 3.2 Meta lines
|
|
181
|
+
|
|
182
|
+
Immediately after a frame header (meta block), lines like:
|
|
183
|
+
|
|
184
|
+
```md
|
|
185
|
+
@tags: a, b
|
|
186
|
+
@priority: high
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
Default rule: meta is only recognized before the first non-meta body line (unless `metaAnywhere=true`).
|
|
190
|
+
|
|
191
|
+
### 3.3 Payload binding
|
|
192
|
+
|
|
193
|
+
````md
|
|
194
|
+
@payload:name: input
|
|
195
|
+
```json
|
|
196
|
+
{"a":1}
|
|
197
|
+
```
|
|
198
|
+
````
|
|
199
|
+
|
|
200
|
+
- The line `@payload:name: X` binds the **next fenced block** to payload `X`.
|
|
201
|
+
- `json` is parsed; parsing errors go to `parseError`.
|
|
202
|
+
|
|
203
|
+
---
|
|
204
|
+
|
|
205
|
+
## 4) Layer B — Output Format Spec (OFS): the LLM-facing descriptor
|
|
206
|
+
|
|
207
|
+
### 4.1 Canonical OFS block (example)
|
|
208
|
+
|
|
209
|
+
```md
|
|
210
|
+
## Output format (Markdown)
|
|
211
|
+
Include these sections somewhere (order does not matter):
|
|
212
|
+
|
|
213
|
+
- Short answer — prose
|
|
214
|
+
- Long answer — prose
|
|
215
|
+
- Reasoning — ordered list
|
|
216
|
+
- Assumptions — list
|
|
217
|
+
- Unknowns — list
|
|
218
|
+
|
|
219
|
+
Tables (only if needed):
|
|
220
|
+
- (property1, property2, property3 — table)
|
|
221
|
+
- (property1, property2, property3 — ordered table, by property2)
|
|
222
|
+
|
|
223
|
+
Empty sections:
|
|
224
|
+
- If a section is empty, write `None`.
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
### 4.2 Meaning of kinds (hard rules)
|
|
228
|
+
|
|
229
|
+
* `prose`: any Markdown text; lists allowed but not required.
|
|
230
|
+
* `list`: must be `None` OR contain at least one bullet line `- ` (nested allowed).
|
|
231
|
+
* `ordered list`: must be `None` OR contain at least one numbered line `^\d+\.` (nested allowed).
|
|
232
|
+
* `table`: Markdown pipe table with listed columns.
|
|
233
|
+
* `ordered table`: same but includes first column **exactly** `#` and rows numbered `1..N`.
|
|
234
|
+
|
|
235
|
+
> "by property2" is informational; do **not** enforce sorting unless the task explicitly requests sorting.
|
|
236
|
+
|
|
237
|
+
---
|
|
238
|
+
|
|
239
|
+
## 5) Instruction Enricher (feature-driven, minimal)
|
|
240
|
+
|
|
241
|
+
Given an `OutputFormatSpec`, generate only relevant guidance.
|
|
242
|
+
|
|
243
|
+
### 5.1 Enricher output template (generated)
|
|
244
|
+
|
|
245
|
+
Always include (if emptySectionValue is set):
|
|
246
|
+
|
|
247
|
+
* `If a section is empty, write \`None\`.`
|
|
248
|
+
|
|
249
|
+
If any `list` sections:
|
|
250
|
+
|
|
251
|
+
* `List sections must use '-' bullets (nested allowed).`
|
|
252
|
+
|
|
253
|
+
If any `ordered_list` sections:
|
|
254
|
+
|
|
255
|
+
* `Ordered-list sections must use numbered items (nested allowed).`
|
|
256
|
+
|
|
257
|
+
If any `table` declared:
|
|
258
|
+
|
|
259
|
+
* `Tables must be Markdown pipe tables with the specified columns.`
|
|
260
|
+
|
|
261
|
+
If any `ordered_table` declared:
|
|
262
|
+
|
|
263
|
+
* `Ordered tables must add a first column named '#' with rows numbered 1..N.`
|
|
264
|
+
|
|
265
|
+
### 5.2 Example implementation (TypeScript)
|
|
266
|
+
|
|
267
|
+
```ts
|
|
268
|
+
export function enrichInstructions(spec: OutputFormatSpec): string {
|
|
269
|
+
const lines: string[] = [];
|
|
270
|
+
|
|
271
|
+
if (spec.emptySectionValue) {
|
|
272
|
+
lines.push(`- If a section is empty, write \`${spec.emptySectionValue}\`.`);
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
const hasList = spec.sections.some(s => s.kind === "list");
|
|
276
|
+
const hasOrderedList = spec.sections.some(s => s.kind === "ordered_list");
|
|
277
|
+
const hasTable = spec.tables.length > 0;
|
|
278
|
+
const hasOrderedTable = spec.tables.some(t => t.kind === "ordered_table");
|
|
279
|
+
|
|
280
|
+
if (hasList) lines.push(`- List sections must use '-' bullets (nested allowed).`);
|
|
281
|
+
if (hasOrderedList) lines.push(`- Ordered-list sections must use numbered items (nested allowed).`);
|
|
282
|
+
if (hasTable) lines.push(`- Tables must be Markdown pipe tables with the specified columns.`);
|
|
283
|
+
if (hasOrderedTable) lines.push(`- Ordered tables must add a first column named '#' with rows numbered 1..N.`);
|
|
284
|
+
|
|
285
|
+
return lines.length ? `Rules:\n${lines.map(l => `- ${l.replace(/^- /, "")}`).join("\n")}\n` : "";
|
|
286
|
+
}
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
---
|
|
290
|
+
|
|
291
|
+
## 6) Accept any heading level and build nested JSON outline
|
|
292
|
+
|
|
293
|
+
### 6.1 Outline build algorithm (stack)
|
|
294
|
+
|
|
295
|
+
**Parsing rule**
|
|
296
|
+
|
|
297
|
+
* Any heading `#..######` is accepted.
|
|
298
|
+
* Heading levels define parent/child relationships.
|
|
299
|
+
|
|
300
|
+
**Tree construction**
|
|
301
|
+
|
|
302
|
+
* Use a stack of nodes.
|
|
303
|
+
* New node at level `L` becomes:
|
|
304
|
+
* child of the nearest previous node with level `< L`,
|
|
305
|
+
* otherwise a root node.
|
|
306
|
+
|
|
307
|
+
### 6.2 Rendering rule (tree → Markdown/FlexMD)
|
|
308
|
+
|
|
309
|
+
When converting outline back to Markdown:
|
|
310
|
+
|
|
311
|
+
* Render headings using `level` and `title`
|
|
312
|
+
* Append `content_md`
|
|
313
|
+
* Render children recursively
|
|
314
|
+
* **Do not render** `key`, `id`, `path`, dedup suffixes (internal only)
|
|
315
|
+
|
|
316
|
+
---
|
|
317
|
+
|
|
318
|
+
## 7) Nested lists (sub-items) parsing & rendering
|
|
319
|
+
|
|
320
|
+
### 7.1 Parsing lists into a tree
|
|
321
|
+
|
|
322
|
+
* Unordered item: `^\s*-\s+`
|
|
323
|
+
* Ordered item: `^\s*\d+\.\s+`
|
|
324
|
+
* Nesting is indentation-based.
|
|
325
|
+
|
|
326
|
+
**Output structure**
|
|
327
|
+
|
|
328
|
+
* `items[]` with `children[]`
|
|
329
|
+
|
|
330
|
+
---
|
|
331
|
+
|
|
332
|
+
## 8) Tables + ordered tables
|
|
333
|
+
|
|
334
|
+
### 8.1 Parsing GFM pipe tables (minimum)
|
|
335
|
+
|
|
336
|
+
* Header row + separator row required
|
|
337
|
+
* Alignment markers optional
|
|
338
|
+
* Cells are strings
|
|
339
|
+
|
|
340
|
+
### 8.2 Ordered table rule (hard)
|
|
341
|
+
|
|
342
|
+
If a table is declared as `ordered table`:
|
|
343
|
+
|
|
344
|
+
* first column header must be `#`
|
|
345
|
+
* rows must have `#` values `1..N`
|
|
346
|
+
|
|
347
|
+
---
|
|
348
|
+
|
|
349
|
+
## 9) Datatype handling
|
|
350
|
+
|
|
351
|
+
### 9.1 Meta values (FlexMD Frames meta and doc meta)
|
|
352
|
+
|
|
353
|
+
Add parse option:
|
|
354
|
+
|
|
355
|
+
* `metaTypeMode: "strings" | "infer" | "schema"`
|
|
356
|
+
|
|
357
|
+
**strings (default)**
|
|
358
|
+
All meta values are strings (except configured array keys like `tags`, `refs`).
|
|
359
|
+
|
|
360
|
+
**infer**
|
|
361
|
+
Safely infer:
|
|
362
|
+
|
|
363
|
+
* `true/false` → boolean
|
|
364
|
+
* `null` → null
|
|
365
|
+
* integers/floats → number (avoid leading-zero pitfalls like `"0012"` unless `0` or `0.xxx`)
|
|
366
|
+
|
|
367
|
+
**schema**
|
|
368
|
+
User provides types per key; schema wins.
|
|
369
|
+
|
|
370
|
+
### 9.2 Tables
|
|
371
|
+
|
|
372
|
+
Default: all table cells remain strings.
|
|
373
|
+
Typed tables are optional future feature; keep v1.1 simple unless you explicitly need it.
|
|
374
|
+
|
|
375
|
+
### 9.3 JSON payloads
|
|
376
|
+
|
|
377
|
+
If payload fence is `json`:
|
|
378
|
+
|
|
379
|
+
* parse JSON → native types
|
|
380
|
+
* on parse error: keep raw + parseError
|
|
381
|
+
|
|
382
|
+
---
|
|
383
|
+
|
|
384
|
+
## 10) Detection & extraction from arbitrary text
|
|
385
|
+
|
|
386
|
+
### 10.1 Supported "objects" to detect
|
|
387
|
+
|
|
388
|
+
1. ` ```flexmd ` fenced blocks (highest confidence)
|
|
389
|
+
2. ` ```json ` fenced blocks that match FlexDocument shape (`{frames:[...]}`)
|
|
390
|
+
3. Raw/unframed FlexMD markers (best effort)
|
|
391
|
+
4. Optional generic Markdown snippets (opaque or lightly structured)
|
|
392
|
+
|
|
393
|
+
### 10.2 Detection tiers
|
|
394
|
+
|
|
395
|
+
* Tier A: ` ```flexmd `
|
|
396
|
+
* Tier B: ` ```json ` + shape match
|
|
397
|
+
* Tier C: raw sniff (at least 2 strong markers within first N lines): `[[...]]`, `@key:`, `@payload:name:`
|
|
398
|
+
|
|
399
|
+
### 10.3 API (spec)
|
|
400
|
+
|
|
401
|
+
```ts
|
|
402
|
+
export type DetectedKind =
|
|
403
|
+
| "flexmd_fence"
|
|
404
|
+
| "flexdoc_json_fence"
|
|
405
|
+
| "raw_flexmd"
|
|
406
|
+
| "markdown_snippet"
|
|
407
|
+
| "none";
|
|
408
|
+
|
|
409
|
+
export interface DetectedObject {
|
|
410
|
+
kind: DetectedKind;
|
|
411
|
+
confidence: number;
|
|
412
|
+
start: number;
|
|
413
|
+
end: number;
|
|
414
|
+
raw: string;
|
|
415
|
+
inner?: string; // for fenced blocks
|
|
416
|
+
}
|
|
417
|
+
```
|
|
418
|
+
|
|
419
|
+
---
|
|
420
|
+
|
|
421
|
+
## 11) End-to-end pipeline (recommended)
|
|
422
|
+
|
|
423
|
+
### 11.1 Creating a call (you know desired output)
|
|
424
|
+
|
|
425
|
+
1. Build OFS block based on required sections/tables.
|
|
426
|
+
2. Enrich with only relevant rules (lists/tables/None).
|
|
427
|
+
3. Send to LLM as the "Output format" section.
|
|
428
|
+
|
|
429
|
+
### 11.2 Receiving a response
|
|
430
|
+
|
|
431
|
+
1. Detect and extract structured objects:
|
|
432
|
+
* if ` ```flexmd ` exists → parse FlexMD Frames
|
|
433
|
+
* else treat response as Markdown and apply OFS validator/extractor
|
|
434
|
+
2. Build outline tree from headings.
|
|
435
|
+
3. Match required sections by name (case-insensitive, ignore `:`).
|
|
436
|
+
4. For each section:
|
|
437
|
+
* extract `content_md`
|
|
438
|
+
* if `kind=list|ordered_list` parse nested lists
|
|
439
|
+
5. Extract tables (if needed) and validate ordered tables (`#` column).
|
|
440
|
+
|
|
441
|
+
### 11.3 Duplicates (safe rule)
|
|
442
|
+
|
|
443
|
+
If required section title appears multiple times:
|
|
444
|
+
|
|
445
|
+
* choose the match at the **highest level** (smallest heading level number)
|
|
446
|
+
* if multiple at same highest level: merge content in appearance order
|
|
447
|
+
|
|
448
|
+
Nested occurrences remain as children in outline.
|
|
449
|
+
|
|
450
|
+
---
|
|
451
|
+
|
|
452
|
+
## 12) Practical "public API" exports (what you ship)
|
|
453
|
+
|
|
454
|
+
### Layer A
|
|
455
|
+
|
|
456
|
+
* `parseFlexMd(text, options) -> FlexDocument`
|
|
457
|
+
* `stringifyFlexMd(doc, options) -> string`
|
|
458
|
+
|
|
459
|
+
### Layer B
|
|
460
|
+
|
|
461
|
+
* `parseOutputFormatSpec(md) -> OutputFormatSpec`
|
|
462
|
+
* `stringifyOutputFormatSpec(spec) -> string`
|
|
463
|
+
* `enrichInstructions(spec) -> string`
|
|
464
|
+
* `buildOutline(md) -> MdOutline`
|
|
465
|
+
* `validateOutput(md, spec) -> { ok: boolean; errors: ...; warnings: ... }`
|
|
466
|
+
* `extractOutput(md, spec, opts) -> ExtractedResult`
|
|
467
|
+
* `renderOutline(outline) -> string`
|
|
468
|
+
|
|
469
|
+
### Layer C
|
|
470
|
+
|
|
471
|
+
* `detectObjects(text) -> DetectedObject[]`
|
|
472
|
+
* `parseAny(text) -> { flexDocs: FlexDocument[]; markdownSnippets: string[]; remainder: string }`
|
|
473
|
+
|
|
474
|
+
---
|
|
475
|
+
|
|
476
|
+
## Appendix A — A canonical OFS generator (example)
|
|
477
|
+
|
|
478
|
+
```ts
|
|
479
|
+
export function makeDefaultOfs(): OutputFormatSpec {
|
|
480
|
+
return {
|
|
481
|
+
descriptorType: "output_format_spec",
|
|
482
|
+
format: "markdown",
|
|
483
|
+
sectionOrderMatters: false,
|
|
484
|
+
sections: [
|
|
485
|
+
{ name: "Short answer", kind: "prose" },
|
|
486
|
+
{ name: "Long answer", kind: "prose" },
|
|
487
|
+
{ name: "Reasoning", kind: "ordered_list" },
|
|
488
|
+
{ name: "Assumptions", kind: "list" },
|
|
489
|
+
{ name: "Unknowns", kind: "list" },
|
|
490
|
+
],
|
|
491
|
+
tablesOptional: true,
|
|
492
|
+
tables: [],
|
|
493
|
+
emptySectionValue: "None",
|
|
494
|
+
};
|
|
495
|
+
}
|
|
496
|
+
```
|
|
497
|
+
|
|
498
|
+
---
|
|
499
|
+
|
|
500
|
+
## Appendix B — Validator essentials (skeleton)
|
|
501
|
+
|
|
502
|
+
```ts
|
|
503
|
+
export function validateOutput(md: string, spec: OutputFormatSpec) {
|
|
504
|
+
const outline = buildOutline(md);
|
|
505
|
+
|
|
506
|
+
// index nodes by normalized title
|
|
507
|
+
const matches = collectMatches(outline);
|
|
508
|
+
|
|
509
|
+
const errors: string[] = [];
|
|
510
|
+
|
|
511
|
+
for (const s of spec.sections) {
|
|
512
|
+
const key = normalizeTitle(s.name);
|
|
513
|
+
const nodes = matches.get(key) ?? [];
|
|
514
|
+
if (nodes.length === 0) {
|
|
515
|
+
errors.push(`missing_section:${s.name}`);
|
|
516
|
+
continue;
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
const chosen = chooseBestNode(nodes); // highest-level; merge if needed
|
|
520
|
+
const body = chosen.content_md.trim();
|
|
521
|
+
|
|
522
|
+
if (spec.emptySectionValue && body === "") {
|
|
523
|
+
errors.push(`empty_section_without_none:${s.name}`);
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
if (spec.emptySectionValue && normalizeNone(body) === true) continue;
|
|
527
|
+
|
|
528
|
+
if (s.kind === "list") {
|
|
529
|
+
if (!/^\s*-\s+/.test(body)) errors.push(`section_not_bullets:${s.name}`);
|
|
530
|
+
}
|
|
531
|
+
if (s.kind === "ordered_list") {
|
|
532
|
+
if (!/^\s*\d+\.\s+/.test(body)) errors.push(`section_not_numbered:${s.name}`);
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
return { ok: errors.length === 0, errors };
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
function normalizeTitle(t: string) {
|
|
540
|
+
return t.trim().replace(/[:\-–—]\s*$/, "").trim().toLowerCase();
|
|
541
|
+
}
|
|
542
|
+
function normalizeNone(body: string) {
|
|
543
|
+
return body.trim().toLowerCase() === "none";
|
|
544
|
+
}
|
|
545
|
+
```
|
|
546
|
+
|
|
547
|
+
---
|
|
548
|
+
|
|
549
|
+
## Appendix C — What we ignore when rendering tree → Markdown
|
|
550
|
+
|
|
551
|
+
When converting outline JSON back to Markdown/FlexMD:
|
|
552
|
+
|
|
553
|
+
* ignore: `id`, `key`, `path`, array indexes
|
|
554
|
+
* render:
|
|
555
|
+
* heading level
|
|
556
|
+
* title
|
|
557
|
+
* content
|
|
558
|
+
* ordered list numbering (when kind requires)
|
|
559
|
+
* ordered table `#` column (when required)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|