flex-md 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +312 -6
- package/SPEC.md +559 -0
- package/dist/detection/detector.d.ts +6 -0
- package/dist/detection/detector.js +104 -0
- package/dist/detection/extractor.d.ts +10 -0
- package/dist/detection/extractor.js +54 -0
- package/dist/index.d.ts +15 -0
- package/dist/index.js +17 -0
- package/dist/ofs/enricher.d.ts +6 -0
- package/dist/ofs/enricher.js +29 -0
- package/dist/ofs/extractor.d.ts +9 -0
- package/dist/ofs/extractor.js +75 -0
- package/dist/ofs/parser.d.ts +21 -0
- package/dist/ofs/parser.js +64 -0
- package/dist/ofs/stringify.d.ts +5 -0
- package/dist/ofs/stringify.js +30 -0
- package/dist/ofs/validator.d.ts +10 -0
- package/dist/ofs/validator.js +91 -0
- package/dist/outline/builder.d.ts +10 -0
- package/dist/outline/builder.js +85 -0
- package/dist/outline/renderer.d.ts +6 -0
- package/dist/outline/renderer.js +23 -0
- package/dist/parser.js +58 -10
- package/dist/parsers/lists.d.ts +6 -0
- package/dist/parsers/lists.js +36 -0
- package/dist/parsers/tables.d.ts +10 -0
- package/dist/parsers/tables.js +58 -0
- package/dist/test-runner.d.ts +1 -0
- package/dist/test-runner.js +328 -0
- package/dist/types.d.ts +91 -0
- package/dist/validator.d.ts +2 -0
- package/dist/validator.js +80 -0
- package/package.json +20 -6
package/SPEC.md
ADDED
|
@@ -0,0 +1,559 @@
|
|
|
1
|
+
# flex-md — End-to-end Spec (v1.1)
|
|
2
|
+
|
|
3
|
+
## 0) What this package does
|
|
4
|
+
|
|
5
|
+
`flex-md` provides **two complementary layers**:
|
|
6
|
+
|
|
7
|
+
### Layer A — FlexMD Frames (semi-structured Markdown)
|
|
8
|
+
|
|
9
|
+
A tiny set of anchors (frames, meta, payload binding) on top of Markdown to reliably round-trip to/from JSON.
|
|
10
|
+
|
|
11
|
+
### Layer B — Plain Markdown "Output Format Spec" (OFS)
|
|
12
|
+
|
|
13
|
+
A **Markdown-native contract** that any LLM can follow without knowing FlexMD.
|
|
14
|
+
From OFS, the package can:
|
|
15
|
+
|
|
16
|
+
* generate minimal LLM guidance (enricher)
|
|
17
|
+
* validate the response format
|
|
18
|
+
* extract content to JSON, building a **nested tree** from heading levels
|
|
19
|
+
|
|
20
|
+
### Layer C — Detection & Extraction from arbitrary text
|
|
21
|
+
|
|
22
|
+
Find and parse:
|
|
23
|
+
|
|
24
|
+
* fenced ` ```flexmd ` objects (best)
|
|
25
|
+
* fenced JSON "FlexDocument" objects
|
|
26
|
+
* raw/unframed FlexMD (best-effort)
|
|
27
|
+
* optional generic Markdown snippets (opaque or lightly structured)
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## 1) Core principles (hard requirements)
|
|
32
|
+
|
|
33
|
+
1. **Markdown-first**: All guidance is written in plain Markdown concepts (headings, lists, tables, fenced blocks).
|
|
34
|
+
2. **Section order never matters**.
|
|
35
|
+
3. **Order matters only when the content type says it does**:
|
|
36
|
+
* `ordered list` ⇒ numbered list
|
|
37
|
+
* `ordered table` ⇒ `#` column with `1..N`
|
|
38
|
+
4. **Structure comes from heading levels**: accept any heading level (`#..######`) and build nested JSON from it.
|
|
39
|
+
5. **Internal keys/ids/paths are never rendered back to Markdown** (unless debug mode).
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
## 2) Data models (TypeScript)
|
|
44
|
+
|
|
45
|
+
### 2.1 FlexMD Frames (Layer A)
|
|
46
|
+
|
|
47
|
+
```ts
|
|
48
|
+
export type FlexMetaValue = string | string[] | number | boolean | null;
|
|
49
|
+
|
|
50
|
+
export interface FlexPayload {
|
|
51
|
+
lang?: string; // e.g. "json", "table"
|
|
52
|
+
raw: string; // always preserved
|
|
53
|
+
value: unknown; // parsed JSON for json; parsed table structure for table; otherwise string
|
|
54
|
+
parseError?: string;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export interface FlexFrame {
|
|
58
|
+
type: string; // e.g. "message", "section", ...
|
|
59
|
+
role?: string; // user|assistant|system|tool|...
|
|
60
|
+
id?: string;
|
|
61
|
+
ts?: string;
|
|
62
|
+
|
|
63
|
+
meta?: Record<string, FlexMetaValue>;
|
|
64
|
+
title?: string;
|
|
65
|
+
body_md?: string;
|
|
66
|
+
|
|
67
|
+
payloads?: Record<string, FlexPayload>;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
export interface FlexDocument {
|
|
71
|
+
title?: string;
|
|
72
|
+
meta?: Record<string, FlexMetaValue>;
|
|
73
|
+
frames: FlexFrame[];
|
|
74
|
+
}
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### 2.2 Output Format Spec (OFS) (Layer B)
|
|
78
|
+
|
|
79
|
+
```ts
|
|
80
|
+
export type SectionKind = "prose" | "list" | "ordered_list";
|
|
81
|
+
|
|
82
|
+
export interface OfsSection {
|
|
83
|
+
name: string; // "Short answer"
|
|
84
|
+
kind: SectionKind; // prose/list/ordered_list
|
|
85
|
+
hint?: string; // optional text after delimiter, not required
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
export type TableKind = "table" | "ordered_table";
|
|
89
|
+
|
|
90
|
+
export interface OfsTable {
|
|
91
|
+
columns: string[]; // ["property1","property2"]
|
|
92
|
+
kind: TableKind;
|
|
93
|
+
by?: string; // informational dimension, not mandatory sorting
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
export interface OutputFormatSpec {
|
|
97
|
+
descriptorType: "output_format_spec";
|
|
98
|
+
format: "markdown";
|
|
99
|
+
sectionOrderMatters: false;
|
|
100
|
+
|
|
101
|
+
sections: OfsSection[];
|
|
102
|
+
|
|
103
|
+
tablesOptional: boolean; // default true
|
|
104
|
+
tables: OfsTable[];
|
|
105
|
+
|
|
106
|
+
emptySectionValue?: string; // default "None"
|
|
107
|
+
}
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### 2.3 Markdown Outline Tree (nested headings)
|
|
111
|
+
|
|
112
|
+
```ts
|
|
113
|
+
export interface MdNode {
|
|
114
|
+
title: string; // heading text, cleaned
|
|
115
|
+
level: number; // 1..6
|
|
116
|
+
key: string; // slugified internal key
|
|
117
|
+
id?: string; // optional internal
|
|
118
|
+
content_md: string;
|
|
119
|
+
children: MdNode[];
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
export interface MdOutline {
|
|
123
|
+
type: "md_outline";
|
|
124
|
+
nodes: MdNode[];
|
|
125
|
+
}
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### 2.4 Extracted Result (sections + structure)
|
|
129
|
+
|
|
130
|
+
```ts
|
|
131
|
+
export interface ListItem {
|
|
132
|
+
text: string;
|
|
133
|
+
index?: number; // for ordered lists
|
|
134
|
+
children: ListItem[];
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
export interface ParsedList {
|
|
138
|
+
kind: "list";
|
|
139
|
+
ordered: boolean;
|
|
140
|
+
items: ListItem[];
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
export interface ParsedTable {
|
|
144
|
+
kind: "table" | "ordered_table";
|
|
145
|
+
by?: string;
|
|
146
|
+
columns: string[]; // includes "#" first column for ordered_table
|
|
147
|
+
rows: string[][];
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
export interface ExtractedResult {
|
|
151
|
+
outline: MdOutline;
|
|
152
|
+
sectionsByName: Record<string, {
|
|
153
|
+
nodeKey: string;
|
|
154
|
+
nodeLevel: number;
|
|
155
|
+
md: string; // raw content markdown
|
|
156
|
+
list?: ParsedList; // only if section kind requires list parsing (or enabled)
|
|
157
|
+
}>;
|
|
158
|
+
tables: ParsedTable[];
|
|
159
|
+
}
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
---
|
|
163
|
+
|
|
164
|
+
## 3) Layer A — FlexMD Frames format
|
|
165
|
+
|
|
166
|
+
### 3.1 Frame header (two accepted forms)
|
|
167
|
+
|
|
168
|
+
**Bracket line**
|
|
169
|
+
|
|
170
|
+
```md
|
|
171
|
+
[[message role=user id=m1 ts=...]]
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
**Markdown heading with brackets**
|
|
175
|
+
|
|
176
|
+
```md
|
|
177
|
+
## [[message role=user id=m1]]
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
### 3.2 Meta lines
|
|
181
|
+
|
|
182
|
+
Immediately after a frame header (meta block), lines like:
|
|
183
|
+
|
|
184
|
+
```md
|
|
185
|
+
@tags: a, b
|
|
186
|
+
@priority: high
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
Default rule: meta is only recognized before the first non-meta body line (unless `metaAnywhere=true`).
|
|
190
|
+
|
|
191
|
+
### 3.3 Payload binding
|
|
192
|
+
|
|
193
|
+
````md
|
|
194
|
+
@payload:name: input
|
|
195
|
+
```json
|
|
196
|
+
{"a":1}
|
|
197
|
+
```
|
|
198
|
+
````
|
|
199
|
+
|
|
200
|
+
- The line `@payload:name: X` binds the **next fenced block** to payload `X`.
|
|
201
|
+
- `json` is parsed; parsing errors go to `parseError`.
|
|
202
|
+
|
|
203
|
+
---
|
|
204
|
+
|
|
205
|
+
## 4) Layer B — Output Format Spec (OFS): the LLM-facing descriptor
|
|
206
|
+
|
|
207
|
+
### 4.1 Canonical OFS block (example)
|
|
208
|
+
|
|
209
|
+
```md
|
|
210
|
+
## Output format (Markdown)
|
|
211
|
+
Include these sections somewhere (order does not matter):
|
|
212
|
+
|
|
213
|
+
- Short answer — prose
|
|
214
|
+
- Long answer — prose
|
|
215
|
+
- Reasoning — ordered list
|
|
216
|
+
- Assumptions — list
|
|
217
|
+
- Unknowns — list
|
|
218
|
+
|
|
219
|
+
Tables (only if needed):
|
|
220
|
+
- (property1, property2, property3 — table)
|
|
221
|
+
- (property1, property2, property3 — ordered table, by property2)
|
|
222
|
+
|
|
223
|
+
Empty sections:
|
|
224
|
+
- If a section is empty, write `None`.
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
### 4.2 Meaning of kinds (hard rules)
|
|
228
|
+
|
|
229
|
+
* `prose`: any Markdown text; lists allowed but not required.
|
|
230
|
+
* `list`: must be `None` OR contain at least one bullet line `- ` (nested allowed).
|
|
231
|
+
* `ordered list`: must be `None` OR contain at least one numbered line `^\d+\.` (nested allowed).
|
|
232
|
+
* `table`: Markdown pipe table with listed columns.
|
|
233
|
+
* `ordered table`: same but includes first column **exactly** `#` and rows numbered `1..N`.
|
|
234
|
+
|
|
235
|
+
> "by property2" is informational; do **not** enforce sorting unless the task explicitly requests sorting.
|
|
236
|
+
|
|
237
|
+
---
|
|
238
|
+
|
|
239
|
+
## 5) Instruction Enricher (feature-driven, minimal)
|
|
240
|
+
|
|
241
|
+
Given an `OutputFormatSpec`, generate only relevant guidance.
|
|
242
|
+
|
|
243
|
+
### 5.1 Enricher output template (generated)
|
|
244
|
+
|
|
245
|
+
Always include (if emptySectionValue is set):
|
|
246
|
+
|
|
247
|
+
* `If a section is empty, write \`None\`.`
|
|
248
|
+
|
|
249
|
+
If any `list` sections:
|
|
250
|
+
|
|
251
|
+
* `List sections must use '-' bullets (nested allowed).`
|
|
252
|
+
|
|
253
|
+
If any `ordered_list` sections:
|
|
254
|
+
|
|
255
|
+
* `Ordered-list sections must use numbered items (nested allowed).`
|
|
256
|
+
|
|
257
|
+
If any `table` declared:
|
|
258
|
+
|
|
259
|
+
* `Tables must be Markdown pipe tables with the specified columns.`
|
|
260
|
+
|
|
261
|
+
If any `ordered_table` declared:
|
|
262
|
+
|
|
263
|
+
* `Ordered tables must add a first column named '#' with rows numbered 1..N.`
|
|
264
|
+
|
|
265
|
+
### 5.2 Example implementation (TypeScript)
|
|
266
|
+
|
|
267
|
+
```ts
|
|
268
|
+
export function enrichInstructions(spec: OutputFormatSpec): string {
|
|
269
|
+
const lines: string[] = [];
|
|
270
|
+
|
|
271
|
+
if (spec.emptySectionValue) {
|
|
272
|
+
lines.push(`- If a section is empty, write \`${spec.emptySectionValue}\`.`);
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
const hasList = spec.sections.some(s => s.kind === "list");
|
|
276
|
+
const hasOrderedList = spec.sections.some(s => s.kind === "ordered_list");
|
|
277
|
+
const hasTable = spec.tables.length > 0;
|
|
278
|
+
const hasOrderedTable = spec.tables.some(t => t.kind === "ordered_table");
|
|
279
|
+
|
|
280
|
+
if (hasList) lines.push(`- List sections must use '-' bullets (nested allowed).`);
|
|
281
|
+
if (hasOrderedList) lines.push(`- Ordered-list sections must use numbered items (nested allowed).`);
|
|
282
|
+
if (hasTable) lines.push(`- Tables must be Markdown pipe tables with the specified columns.`);
|
|
283
|
+
if (hasOrderedTable) lines.push(`- Ordered tables must add a first column named '#' with rows numbered 1..N.`);
|
|
284
|
+
|
|
285
|
+
return lines.length ? `Rules:\n${lines.map(l => `- ${l.replace(/^- /, "")}`).join("\n")}\n` : "";
|
|
286
|
+
}
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
---
|
|
290
|
+
|
|
291
|
+
## 6) Accept any heading level and build nested JSON outline
|
|
292
|
+
|
|
293
|
+
### 6.1 Outline build algorithm (stack)
|
|
294
|
+
|
|
295
|
+
**Parsing rule**
|
|
296
|
+
|
|
297
|
+
* Any heading `#..######` is accepted.
|
|
298
|
+
* Heading levels define parent/child relationships.
|
|
299
|
+
|
|
300
|
+
**Tree construction**
|
|
301
|
+
|
|
302
|
+
* Use a stack of nodes.
|
|
303
|
+
* New node at level `L` becomes:
|
|
304
|
+
* child of the nearest previous node with level `< L`,
|
|
305
|
+
* otherwise a root node.
|
|
306
|
+
|
|
307
|
+
### 6.2 Rendering rule (tree → Markdown/FlexMD)
|
|
308
|
+
|
|
309
|
+
When converting outline back to Markdown:
|
|
310
|
+
|
|
311
|
+
* Render headings using `level` and `title`
|
|
312
|
+
* Append `content_md`
|
|
313
|
+
* Render children recursively
|
|
314
|
+
* **Do not render** `key`, `id`, `path`, dedup suffixes (internal only)
|
|
315
|
+
|
|
316
|
+
---
|
|
317
|
+
|
|
318
|
+
## 7) Nested lists (sub-items) parsing & rendering
|
|
319
|
+
|
|
320
|
+
### 7.1 Parsing lists into a tree
|
|
321
|
+
|
|
322
|
+
* Unordered item: `^\s*-\s+`
|
|
323
|
+
* Ordered item: `^\s*\d+\.\s+`
|
|
324
|
+
* Nesting is indentation-based.
|
|
325
|
+
|
|
326
|
+
**Output structure**
|
|
327
|
+
|
|
328
|
+
* `items[]` with `children[]`
|
|
329
|
+
|
|
330
|
+
---
|
|
331
|
+
|
|
332
|
+
## 8) Tables + ordered tables
|
|
333
|
+
|
|
334
|
+
### 8.1 Parsing GFM pipe tables (minimum)
|
|
335
|
+
|
|
336
|
+
* Header row + separator row required
|
|
337
|
+
* Alignment markers optional
|
|
338
|
+
* Cells are strings
|
|
339
|
+
|
|
340
|
+
### 8.2 Ordered table rule (hard)
|
|
341
|
+
|
|
342
|
+
If a table is declared as `ordered table`:
|
|
343
|
+
|
|
344
|
+
* first column header must be `#`
|
|
345
|
+
* rows must have `#` values `1..N`
|
|
346
|
+
|
|
347
|
+
---
|
|
348
|
+
|
|
349
|
+
## 9) Datatype handling
|
|
350
|
+
|
|
351
|
+
### 9.1 Meta values (FlexMD Frames meta and doc meta)
|
|
352
|
+
|
|
353
|
+
Add parse option:
|
|
354
|
+
|
|
355
|
+
* `metaTypeMode: "strings" | "infer" | "schema"`
|
|
356
|
+
|
|
357
|
+
**strings (default)**
|
|
358
|
+
All meta values are strings (except configured array keys like `tags`, `refs`).
|
|
359
|
+
|
|
360
|
+
**infer**
|
|
361
|
+
Safely infer:
|
|
362
|
+
|
|
363
|
+
* `true/false` → boolean
|
|
364
|
+
* `null` → null
|
|
365
|
+
* integers/floats → number (avoid leading-zero pitfalls like `"0012"` unless `0` or `0.xxx`)
|
|
366
|
+
|
|
367
|
+
**schema**
|
|
368
|
+
User provides types per key; schema wins.
|
|
369
|
+
|
|
370
|
+
### 9.2 Tables
|
|
371
|
+
|
|
372
|
+
Default: all table cells remain strings.
|
|
373
|
+
Typed tables are optional future feature; keep v1.1 simple unless you explicitly need it.
|
|
374
|
+
|
|
375
|
+
### 9.3 JSON payloads
|
|
376
|
+
|
|
377
|
+
If payload fence is `json`:
|
|
378
|
+
|
|
379
|
+
* parse JSON → native types
|
|
380
|
+
* on parse error: keep raw + parseError
|
|
381
|
+
|
|
382
|
+
---
|
|
383
|
+
|
|
384
|
+
## 10) Detection & extraction from arbitrary text
|
|
385
|
+
|
|
386
|
+
### 10.1 Supported "objects" to detect
|
|
387
|
+
|
|
388
|
+
1. ` ```flexmd ` fenced blocks (highest confidence)
|
|
389
|
+
2. ` ```json ` fenced blocks that match FlexDocument shape (`{frames:[...]}`)
|
|
390
|
+
3. Raw/unframed FlexMD markers (best effort)
|
|
391
|
+
4. Optional generic Markdown snippets (opaque or lightly structured)
|
|
392
|
+
|
|
393
|
+
### 10.2 Detection tiers
|
|
394
|
+
|
|
395
|
+
* Tier A: ` ```flexmd `
|
|
396
|
+
* Tier B: ` ```json ` + shape match
|
|
397
|
+
* Tier C: raw sniff (at least 2 strong markers within first N lines): `[[...]]`, `@key:`, `@payload:name:`
|
|
398
|
+
|
|
399
|
+
### 10.3 API (spec)
|
|
400
|
+
|
|
401
|
+
```ts
|
|
402
|
+
export type DetectedKind =
|
|
403
|
+
| "flexmd_fence"
|
|
404
|
+
| "flexdoc_json_fence"
|
|
405
|
+
| "raw_flexmd"
|
|
406
|
+
| "markdown_snippet"
|
|
407
|
+
| "none";
|
|
408
|
+
|
|
409
|
+
export interface DetectedObject {
|
|
410
|
+
kind: DetectedKind;
|
|
411
|
+
confidence: number;
|
|
412
|
+
start: number;
|
|
413
|
+
end: number;
|
|
414
|
+
raw: string;
|
|
415
|
+
inner?: string; // for fenced blocks
|
|
416
|
+
}
|
|
417
|
+
```
|
|
418
|
+
|
|
419
|
+
---
|
|
420
|
+
|
|
421
|
+
## 11) End-to-end pipeline (recommended)
|
|
422
|
+
|
|
423
|
+
### 11.1 Creating a call (you know desired output)
|
|
424
|
+
|
|
425
|
+
1. Build OFS block based on required sections/tables.
|
|
426
|
+
2. Enrich with only relevant rules (lists/tables/None).
|
|
427
|
+
3. Send to LLM as the "Output format" section.
|
|
428
|
+
|
|
429
|
+
### 11.2 Receiving a response
|
|
430
|
+
|
|
431
|
+
1. Detect and extract structured objects:
|
|
432
|
+
* if ` ```flexmd ` exists → parse FlexMD Frames
|
|
433
|
+
* else treat response as Markdown and apply OFS validator/extractor
|
|
434
|
+
2. Build outline tree from headings.
|
|
435
|
+
3. Match required sections by name (case-insensitive, ignore `:`).
|
|
436
|
+
4. For each section:
|
|
437
|
+
* extract `content_md`
|
|
438
|
+
* if `kind=list|ordered_list` parse nested lists
|
|
439
|
+
5. Extract tables (if needed) and validate ordered tables (`#` column).
|
|
440
|
+
|
|
441
|
+
### 11.3 Duplicates (safe rule)
|
|
442
|
+
|
|
443
|
+
If required section title appears multiple times:
|
|
444
|
+
|
|
445
|
+
* choose the match at the **highest level** (smallest heading level number)
|
|
446
|
+
* if multiple at same highest level: merge content in appearance order
|
|
447
|
+
|
|
448
|
+
Nested occurrences remain as children in outline.
|
|
449
|
+
|
|
450
|
+
---
|
|
451
|
+
|
|
452
|
+
## 12) Practical "public API" exports (what you ship)
|
|
453
|
+
|
|
454
|
+
### Layer A
|
|
455
|
+
|
|
456
|
+
* `parseFlexMd(text, options) -> FlexDocument`
|
|
457
|
+
* `stringifyFlexMd(doc, options) -> string`
|
|
458
|
+
|
|
459
|
+
### Layer B
|
|
460
|
+
|
|
461
|
+
* `parseOutputFormatSpec(md) -> OutputFormatSpec`
|
|
462
|
+
* `stringifyOutputFormatSpec(spec) -> string`
|
|
463
|
+
* `enrichInstructions(spec) -> string`
|
|
464
|
+
* `buildOutline(md) -> MdOutline`
|
|
465
|
+
* `validateOutput(md, spec) -> { ok: boolean; errors: ...; warnings: ... }`
|
|
466
|
+
* `extractOutput(md, spec, opts) -> ExtractedResult`
|
|
467
|
+
* `renderOutline(outline) -> string`
|
|
468
|
+
|
|
469
|
+
### Layer C
|
|
470
|
+
|
|
471
|
+
* `detectObjects(text) -> DetectedObject[]`
|
|
472
|
+
* `parseAny(text) -> { flexDocs: FlexDocument[]; markdownSnippets: string[]; remainder: string }`
|
|
473
|
+
|
|
474
|
+
---
|
|
475
|
+
|
|
476
|
+
## Appendix A — A canonical OFS generator (example)
|
|
477
|
+
|
|
478
|
+
```ts
|
|
479
|
+
export function makeDefaultOfs(): OutputFormatSpec {
|
|
480
|
+
return {
|
|
481
|
+
descriptorType: "output_format_spec",
|
|
482
|
+
format: "markdown",
|
|
483
|
+
sectionOrderMatters: false,
|
|
484
|
+
sections: [
|
|
485
|
+
{ name: "Short answer", kind: "prose" },
|
|
486
|
+
{ name: "Long answer", kind: "prose" },
|
|
487
|
+
{ name: "Reasoning", kind: "ordered_list" },
|
|
488
|
+
{ name: "Assumptions", kind: "list" },
|
|
489
|
+
{ name: "Unknowns", kind: "list" },
|
|
490
|
+
],
|
|
491
|
+
tablesOptional: true,
|
|
492
|
+
tables: [],
|
|
493
|
+
emptySectionValue: "None",
|
|
494
|
+
};
|
|
495
|
+
}
|
|
496
|
+
```
|
|
497
|
+
|
|
498
|
+
---
|
|
499
|
+
|
|
500
|
+
## Appendix B — Validator essentials (skeleton)
|
|
501
|
+
|
|
502
|
+
```ts
|
|
503
|
+
export function validateOutput(md: string, spec: OutputFormatSpec) {
|
|
504
|
+
const outline = buildOutline(md);
|
|
505
|
+
|
|
506
|
+
// index nodes by normalized title
|
|
507
|
+
const matches = collectMatches(outline);
|
|
508
|
+
|
|
509
|
+
const errors: string[] = [];
|
|
510
|
+
|
|
511
|
+
for (const s of spec.sections) {
|
|
512
|
+
const key = normalizeTitle(s.name);
|
|
513
|
+
const nodes = matches.get(key) ?? [];
|
|
514
|
+
if (nodes.length === 0) {
|
|
515
|
+
errors.push(`missing_section:${s.name}`);
|
|
516
|
+
continue;
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
const chosen = chooseBestNode(nodes); // highest-level; merge if needed
|
|
520
|
+
const body = chosen.content_md.trim();
|
|
521
|
+
|
|
522
|
+
if (spec.emptySectionValue && body === "") {
|
|
523
|
+
errors.push(`empty_section_without_none:${s.name}`);
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
if (spec.emptySectionValue && normalizeNone(body) === true) continue;
|
|
527
|
+
|
|
528
|
+
if (s.kind === "list") {
|
|
529
|
+
if (!/^\s*-\s+/.test(body)) errors.push(`section_not_bullets:${s.name}`);
|
|
530
|
+
}
|
|
531
|
+
if (s.kind === "ordered_list") {
|
|
532
|
+
if (!/^\s*\d+\.\s+/.test(body)) errors.push(`section_not_numbered:${s.name}`);
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
return { ok: errors.length === 0, errors };
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
function normalizeTitle(t: string) {
|
|
540
|
+
return t.trim().replace(/[:\-–—]\s*$/, "").trim().toLowerCase();
|
|
541
|
+
}
|
|
542
|
+
function normalizeNone(body: string) {
|
|
543
|
+
return body.trim().toLowerCase() === "none";
|
|
544
|
+
}
|
|
545
|
+
```
|
|
546
|
+
|
|
547
|
+
---
|
|
548
|
+
|
|
549
|
+
## Appendix C — What we ignore when rendering tree → Markdown
|
|
550
|
+
|
|
551
|
+
When converting outline JSON back to Markdown/FlexMD:
|
|
552
|
+
|
|
553
|
+
* ignore: `id`, `key`, `path`, array indexes
|
|
554
|
+
* render:
|
|
555
|
+
* heading level
|
|
556
|
+
* title
|
|
557
|
+
* content
|
|
558
|
+
* ordered list numbering (when kind requires)
|
|
559
|
+
* ordered table `#` column (when required)
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import type { DetectedObject } from "../types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Detect FlexMD and other structured objects in arbitrary text.
|
|
4
|
+
* Returns all detected objects with confidence scores and byte ranges.
|
|
5
|
+
*/
|
|
6
|
+
export declare function detectObjects(text: string): DetectedObject[];
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Detect FlexMD and other structured objects in arbitrary text.
|
|
3
|
+
* Returns all detected objects with confidence scores and byte ranges.
|
|
4
|
+
*/
|
|
5
|
+
export function detectObjects(text) {
|
|
6
|
+
const detected = [];
|
|
7
|
+
// Tier A: Detect ```flexmd fenced blocks (highest confidence)
|
|
8
|
+
detected.push(...detectFlexMdFences(text));
|
|
9
|
+
// Tier B: Detect ```json blocks with FlexDocument shape
|
|
10
|
+
detected.push(...detectFlexDocJsonFences(text));
|
|
11
|
+
// Tier C: Detect raw FlexMD markers (best effort)
|
|
12
|
+
detected.push(...detectRawFlexMd(text));
|
|
13
|
+
// Sort by start position
|
|
14
|
+
detected.sort((a, b) => a.start - b.start);
|
|
15
|
+
return detected;
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Tier A: Detect ```flexmd fenced blocks.
|
|
19
|
+
*/
|
|
20
|
+
function detectFlexMdFences(text) {
|
|
21
|
+
const detected = [];
|
|
22
|
+
const regex = /```flexmd\n([\s\S]*?)```/g;
|
|
23
|
+
let match;
|
|
24
|
+
while ((match = regex.exec(text)) !== null) {
|
|
25
|
+
detected.push({
|
|
26
|
+
kind: "flexmd_fence",
|
|
27
|
+
confidence: 1.0,
|
|
28
|
+
start: match.index,
|
|
29
|
+
end: match.index + match[0].length,
|
|
30
|
+
raw: match[0],
|
|
31
|
+
inner: match[1]
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
return detected;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Tier B: Detect ```json blocks that match FlexDocument shape.
|
|
38
|
+
*/
|
|
39
|
+
function detectFlexDocJsonFences(text) {
|
|
40
|
+
const detected = [];
|
|
41
|
+
const regex = /```json\n([\s\S]*?)```/g;
|
|
42
|
+
let match;
|
|
43
|
+
while ((match = regex.exec(text)) !== null) {
|
|
44
|
+
const inner = match[1];
|
|
45
|
+
// Try to parse as JSON
|
|
46
|
+
try {
|
|
47
|
+
const parsed = JSON.parse(inner);
|
|
48
|
+
// Check if it has the FlexDocument shape (has "frames" array)
|
|
49
|
+
if (parsed && typeof parsed === "object" && Array.isArray(parsed.frames)) {
|
|
50
|
+
detected.push({
|
|
51
|
+
kind: "flexdoc_json_fence",
|
|
52
|
+
confidence: 0.9,
|
|
53
|
+
start: match.index,
|
|
54
|
+
end: match.index + match[0].length,
|
|
55
|
+
raw: match[0],
|
|
56
|
+
inner
|
|
57
|
+
});
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
catch {
|
|
61
|
+
// Not valid JSON, skip
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
return detected;
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Tier C: Detect raw FlexMD markers (best effort).
|
|
68
|
+
* Looks for at least 2 strong markers within first 500 chars:
|
|
69
|
+
* - [[...]]
|
|
70
|
+
* - @key:
|
|
71
|
+
* - @payload:name:
|
|
72
|
+
*/
|
|
73
|
+
function detectRawFlexMd(text) {
|
|
74
|
+
const detected = [];
|
|
75
|
+
// Look for frame markers
|
|
76
|
+
const frameRegex = /\[\[([^\]]+)\]\]/g;
|
|
77
|
+
const metaRegex = /@[a-zA-Z_][a-zA-Z0-9_]*:/g;
|
|
78
|
+
const payloadRegex = /@payload:[a-zA-Z_][a-zA-Z0-9_]*:/g;
|
|
79
|
+
let match;
|
|
80
|
+
const markers = [];
|
|
81
|
+
// Collect all marker positions
|
|
82
|
+
while ((match = frameRegex.exec(text)) !== null) {
|
|
83
|
+
markers.push(match.index);
|
|
84
|
+
}
|
|
85
|
+
while ((match = metaRegex.exec(text)) !== null) {
|
|
86
|
+
markers.push(match.index);
|
|
87
|
+
}
|
|
88
|
+
while ((match = payloadRegex.exec(text)) !== null) {
|
|
89
|
+
markers.push(match.index);
|
|
90
|
+
}
|
|
91
|
+
// If we have at least 2 markers, consider it raw FlexMD
|
|
92
|
+
if (markers.length >= 2) {
|
|
93
|
+
const start = Math.min(...markers);
|
|
94
|
+
const end = text.length;
|
|
95
|
+
detected.push({
|
|
96
|
+
kind: "raw_flexmd",
|
|
97
|
+
confidence: 0.7,
|
|
98
|
+
start,
|
|
99
|
+
end,
|
|
100
|
+
raw: text.substring(start, end)
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
return detected;
|
|
104
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { FlexDocument } from "../types.js";
|
|
2
|
+
export interface ParseAnyResult {
|
|
3
|
+
flexDocs: FlexDocument[];
|
|
4
|
+
markdownSnippets: string[];
|
|
5
|
+
remainder: string;
|
|
6
|
+
}
|
|
7
|
+
/**
|
|
8
|
+
* Parse any text and extract all FlexMD documents and Markdown snippets.
|
|
9
|
+
*/
|
|
10
|
+
export declare function parseAny(text: string): ParseAnyResult;
|