flex-md 2.0.0 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +57 -309
- package/dist/__tests__/ofs.test.d.ts +1 -0
- package/dist/__tests__/ofs.test.js +51 -0
- package/dist/__tests__/validate.test.d.ts +1 -0
- package/dist/__tests__/validate.test.js +108 -0
- package/dist/detect/json/detectIntent.d.ts +2 -0
- package/dist/detect/json/detectIntent.js +79 -0
- package/dist/detect/json/detectPresence.d.ts +6 -0
- package/dist/detect/json/detectPresence.js +191 -0
- package/dist/detect/json/index.d.ts +7 -0
- package/dist/detect/json/index.js +12 -0
- package/dist/detect/json/types.d.ts +43 -0
- package/dist/detect/json/types.js +1 -0
- package/dist/extract/extract.d.ts +5 -0
- package/dist/extract/extract.js +50 -0
- package/dist/extract/types.d.ts +11 -0
- package/dist/extract/types.js +1 -0
- package/dist/index.d.ts +11 -15
- package/dist/index.js +18 -17
- package/dist/issues/build.d.ts +26 -0
- package/dist/issues/build.js +62 -0
- package/dist/md/lists.d.ts +14 -0
- package/dist/md/lists.js +33 -0
- package/dist/md/match.d.ts +12 -0
- package/dist/md/match.js +44 -0
- package/dist/md/outline.d.ts +6 -0
- package/dist/md/outline.js +67 -0
- package/dist/md/parse.d.ts +29 -0
- package/dist/md/parse.js +105 -0
- package/dist/md/tables.d.ts +25 -0
- package/dist/md/tables.js +72 -0
- package/dist/ofs/enricher.d.ts +14 -4
- package/dist/ofs/enricher.js +76 -20
- package/dist/ofs/issues.d.ts +14 -0
- package/dist/ofs/issues.js +92 -0
- package/dist/ofs/issuesEnvelope.d.ts +15 -0
- package/dist/ofs/issuesEnvelope.js +71 -0
- package/dist/ofs/parser.d.ts +5 -17
- package/dist/ofs/parser.js +114 -45
- package/dist/ofs/stringify.js +33 -21
- package/dist/pipeline/enforce.d.ts +10 -0
- package/dist/pipeline/enforce.js +46 -0
- package/dist/pipeline/kind.d.ts +16 -0
- package/dist/pipeline/kind.js +24 -0
- package/dist/pipeline/repair.d.ts +14 -0
- package/dist/pipeline/repair.js +112 -0
- package/dist/strictness/container.d.ts +14 -0
- package/dist/strictness/container.js +46 -0
- package/dist/strictness/processor.d.ts +5 -0
- package/dist/strictness/processor.js +29 -0
- package/dist/strictness/types.d.ts +77 -0
- package/dist/strictness/types.js +106 -0
- package/dist/test-pipeline.d.ts +1 -0
- package/dist/test-pipeline.js +53 -0
- package/dist/test-runner.js +10 -7
- package/dist/test-strictness.d.ts +1 -0
- package/dist/test-strictness.js +213 -0
- package/dist/types.d.ts +82 -43
- package/dist/validate/policy.d.ts +10 -0
- package/dist/validate/policy.js +17 -0
- package/dist/validate/types.d.ts +11 -0
- package/dist/validate/types.js +1 -0
- package/dist/validate/validate.d.ts +2 -0
- package/dist/validate/validate.js +308 -0
- package/docs/mdflex-compliance.md +216 -0
- package/package.json +7 -3
package/README.md
CHANGED
|
@@ -1,341 +1,89 @@
|
|
|
1
|
-
#
|
|
1
|
+
# Flex-MD (v3.0) — Markdown Output Contract
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Flex-MD is a TypeScript library for building and enforcing **Markdown Output Contracts** with LLMs. It treats Markdown as a semi-structured data format, allowing you to define required sections, list types, and tables while maintaining 100% standard Markdown compatibility.
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
- **Layer B**: Output Format Spec (OFS) - LLM-friendly Markdown contracts
|
|
7
|
-
- **Layer C**: Detection & Extraction - find FlexMD in arbitrary text
|
|
5
|
+
Version 3.0 introduces the **Detect-Repair-Enforce** pipeline, ensuring LLM responses are coerced into compliant structures before being parsed.
|
|
8
6
|
|
|
9
|
-
##
|
|
7
|
+
## Key Features
|
|
10
8
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
### Layer A: FlexMD Frames
|
|
18
|
-
|
|
19
|
-
Parse and stringify semi-structured Markdown with frames, metadata, and payloads:
|
|
20
|
-
|
|
21
|
-
```javascript
|
|
22
|
-
import { parseFlexMd, stringifyFlexMd } from "flex-md";
|
|
23
|
-
|
|
24
|
-
const md = `[[message role=user id=m1]]
|
|
25
|
-
@tags: auth, login
|
|
26
|
-
Hello
|
|
27
|
-
|
|
28
|
-
@payload:name: input
|
|
29
|
-
\`\`\`json
|
|
30
|
-
{"a":1}
|
|
31
|
-
\`\`\`
|
|
32
|
-
`;
|
|
33
|
-
|
|
34
|
-
const doc = parseFlexMd(md);
|
|
35
|
-
console.log(doc.frames[0]?.type); // "message"
|
|
36
|
-
console.log(doc.frames[0]?.meta?.tags); // ["auth", "login"]
|
|
37
|
-
|
|
38
|
-
const back = stringifyFlexMd(doc, { skipEmpty: true });
|
|
39
|
-
console.log(back);
|
|
40
|
-
```
|
|
41
|
-
|
|
42
|
-
**Validation:**
|
|
43
|
-
|
|
44
|
-
```javascript
|
|
45
|
-
import { validateFlexMd } from "flex-md";
|
|
46
|
-
|
|
47
|
-
const result = validateFlexMd(md);
|
|
48
|
-
if (!result.valid) {
|
|
49
|
-
console.log("Validation errors:", result.errors);
|
|
50
|
-
}
|
|
51
|
-
```
|
|
52
|
-
|
|
53
|
-
**Datatype Handling:**
|
|
9
|
+
- **Standard Markdown**: No proprietary tags. Pure headings, lists, and tables.
|
|
10
|
+
- **Strictness Levels (L0–L3)**: From loose guidance to rigid structural enforcement.
|
|
11
|
+
- **Deterministic Repair**: Auto-fixes misformatted LLM output (merged fences, missing headings, format conversion).
|
|
12
|
+
- **Issues Envelope**: A structured failure format for when repairs fail, allowing safe fallbacks.
|
|
13
|
+
- **Tax-Aware Prompts**: Generates minimal, relevant instructions to save tokens.
|
|
54
14
|
|
|
55
|
-
|
|
56
|
-
// Infer types automatically
|
|
57
|
-
const doc = parseFlexMd(md, { metaTypeMode: "infer" });
|
|
58
|
-
// @priority: 5 → number
|
|
59
|
-
// @enabled: true → boolean
|
|
60
|
-
// @value: null → null
|
|
15
|
+
## Installation
|
|
61
16
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
metaTypeMode: "schema",
|
|
65
|
-
metaSchema: {
|
|
66
|
-
priority: "number",
|
|
67
|
-
enabled: "boolean"
|
|
68
|
-
}
|
|
69
|
-
});
|
|
70
|
-
```
|
|
71
|
-
|
|
72
|
-
---
|
|
73
|
-
|
|
74
|
-
### Layer B: Output Format Spec (OFS)
|
|
75
|
-
|
|
76
|
-
Create LLM-friendly Markdown contracts and extract structured data:
|
|
77
|
-
|
|
78
|
-
```javascript
|
|
79
|
-
import {
|
|
80
|
-
parseOutputFormatSpec,
|
|
81
|
-
stringifyOutputFormatSpec,
|
|
82
|
-
enrichInstructions,
|
|
83
|
-
validateOutput,
|
|
84
|
-
extractOutput
|
|
85
|
-
} from "flex-md";
|
|
86
|
-
|
|
87
|
-
// Define an output format
|
|
88
|
-
const spec = {
|
|
89
|
-
descriptorType: "output_format_spec",
|
|
90
|
-
format: "markdown",
|
|
91
|
-
sectionOrderMatters: false,
|
|
92
|
-
sections: [
|
|
93
|
-
{ name: "Short answer", kind: "prose" },
|
|
94
|
-
{ name: "Reasoning", kind: "ordered_list" },
|
|
95
|
-
{ name: "Assumptions", kind: "list" }
|
|
96
|
-
],
|
|
97
|
-
tablesOptional: true,
|
|
98
|
-
tables: [],
|
|
99
|
-
emptySectionValue: "None"
|
|
100
|
-
};
|
|
101
|
-
|
|
102
|
-
// Generate minimal LLM instructions
|
|
103
|
-
const instructions = enrichInstructions(spec);
|
|
104
|
-
console.log(instructions);
|
|
105
|
-
// Output:
|
|
106
|
-
// Rules:
|
|
107
|
-
// - If a section is empty, write `None`.
|
|
108
|
-
// - Ordered-list sections must use numbered items (nested allowed).
|
|
109
|
-
// - List sections must use '-' bullets (nested allowed).
|
|
110
|
-
|
|
111
|
-
// Stringify for LLM prompt
|
|
112
|
-
const ofsMarkdown = stringifyOutputFormatSpec(spec);
|
|
113
|
-
|
|
114
|
-
// Later, validate LLM response
|
|
115
|
-
const llmResponse = `
|
|
116
|
-
## Short answer
|
|
117
|
-
The answer is 42.
|
|
118
|
-
|
|
119
|
-
## Reasoning
|
|
120
|
-
1. First, we analyze the question
|
|
121
|
-
2. Then we compute the result
|
|
122
|
-
|
|
123
|
-
## Assumptions
|
|
124
|
-
- The universe is deterministic
|
|
125
|
-
- We have sufficient compute
|
|
126
|
-
`;
|
|
127
|
-
|
|
128
|
-
const validation = validateOutput(llmResponse, spec);
|
|
129
|
-
console.log(validation.ok); // true
|
|
130
|
-
|
|
131
|
-
// Extract structured data
|
|
132
|
-
const extracted = extractOutput(llmResponse, spec);
|
|
133
|
-
console.log(extracted.sectionsByName["Short answer"].md);
|
|
134
|
-
// "The answer is 42."
|
|
135
|
-
|
|
136
|
-
console.log(extracted.sectionsByName["Reasoning"].list);
|
|
137
|
-
// { kind: "list", ordered: true, items: [...] }
|
|
17
|
+
```bash
|
|
18
|
+
npm install flex-md
|
|
138
19
|
```
|
|
139
20
|
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
### Outline & Tree Building
|
|
143
|
-
|
|
144
|
-
Build nested heading trees from any Markdown:
|
|
145
|
-
|
|
146
|
-
```javascript
|
|
147
|
-
import { buildOutline, renderOutline } from "flex-md";
|
|
148
|
-
|
|
149
|
-
const md = `
|
|
150
|
-
# Introduction
|
|
151
|
-
Some intro text
|
|
152
|
-
|
|
153
|
-
## Background
|
|
154
|
-
More details
|
|
21
|
+
## Quick Start
|
|
155
22
|
|
|
156
|
-
###
|
|
157
|
-
Even more details
|
|
23
|
+
### 1. Define your Output Format Spec (OFS)
|
|
158
24
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
`;
|
|
25
|
+
```typescript
|
|
26
|
+
import { parseOutputFormatSpec } from 'flex-md';
|
|
162
27
|
|
|
163
|
-
const
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
28
|
+
const spec = parseOutputFormatSpec(`
|
|
29
|
+
## Output format
|
|
30
|
+
- Short answer — text (required)
|
|
31
|
+
- Reasoning — ordered list (required)
|
|
32
|
+
- Assumptions — list (optional)
|
|
167
33
|
|
|
168
|
-
|
|
169
|
-
|
|
34
|
+
empty sections:
|
|
35
|
+
- If a section is empty, write \`None\`.
|
|
36
|
+
`);
|
|
170
37
|
```
|
|
171
38
|
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
### List & Table Parsing
|
|
175
|
-
|
|
176
|
-
Parse nested lists and tables:
|
|
177
|
-
|
|
178
|
-
```javascript
|
|
179
|
-
import { parseList, parsePipeTable, extractAllTables } from "flex-md";
|
|
180
|
-
|
|
181
|
-
// Parse nested lists
|
|
182
|
-
const listMd = `
|
|
183
|
-
- Item 1
|
|
184
|
-
- Nested 1.1
|
|
185
|
-
- Nested 1.2
|
|
186
|
-
- Item 2
|
|
187
|
-
`;
|
|
188
|
-
|
|
189
|
-
const list = parseList(listMd);
|
|
190
|
-
console.log(list.items[0].children.length); // 2
|
|
39
|
+
### 2. Generate Prompt Guidance
|
|
191
40
|
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
1. First step
|
|
195
|
-
1. Sub-step 1.1
|
|
196
|
-
2. Sub-step 1.2
|
|
197
|
-
2. Second step
|
|
198
|
-
`;
|
|
41
|
+
```typescript
|
|
42
|
+
import { buildMarkdownGuidance } from 'flex-md';
|
|
199
43
|
|
|
200
|
-
const
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
// Parse tables
|
|
204
|
-
const tableMd = `
|
|
205
|
-
| Name | Age |
|
|
206
|
-
|------|-----|
|
|
207
|
-
| Alice | 30 |
|
|
208
|
-
| Bob | 25 |
|
|
209
|
-
`;
|
|
210
|
-
|
|
211
|
-
const table = parsePipeTable(tableMd);
|
|
212
|
-
console.log(table.columns); // ["Name", "Age"]
|
|
213
|
-
console.log(table.rows); // [["Alice", "30"], ["Bob", "25"]]
|
|
214
|
-
|
|
215
|
-
// Extract all tables from a document
|
|
216
|
-
const allTables = extractAllTables(documentMd);
|
|
44
|
+
const guidance = buildMarkdownGuidance(spec, { level: 1 });
|
|
45
|
+
// Output: "Reply in Markdown. Include these headings... If a section is empty, write 'None'."
|
|
217
46
|
```
|
|
218
47
|
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
### Layer C: Detection & Extraction
|
|
222
|
-
|
|
223
|
-
Find and parse FlexMD from arbitrary text:
|
|
48
|
+
### 3. Enforce the Contract
|
|
224
49
|
|
|
225
|
-
```
|
|
226
|
-
import {
|
|
50
|
+
```typescript
|
|
51
|
+
import { enforceFlexMd } from 'flex-md';
|
|
227
52
|
|
|
228
|
-
const
|
|
229
|
-
|
|
53
|
+
const llmResponse = "I think... ## Short answer \n Yes. ## Reasoning \n 1. Logic";
|
|
54
|
+
const result = enforceFlexMd(llmResponse, spec, { level: 1 });
|
|
230
55
|
|
|
231
|
-
|
|
232
|
-
[
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
More text.
|
|
237
|
-
|
|
238
|
-
\`\`\`json
|
|
239
|
-
{
|
|
240
|
-
"frames": [
|
|
241
|
-
{"type": "message", "body_md": "JSON FlexDocument"}
|
|
242
|
-
]
|
|
56
|
+
if (result.ok) {
|
|
57
|
+
console.log(result.extracted.sectionsByName["Short answer"].md);
|
|
58
|
+
} else {
|
|
59
|
+
// result.outputText will contain the Issues Envelope if strictness level >= 1
|
|
60
|
+
console.log(result.outputText);
|
|
243
61
|
}
|
|
244
|
-
\`\`\`
|
|
245
|
-
`;
|
|
246
|
-
|
|
247
|
-
// Detect all objects
|
|
248
|
-
const detected = detectObjects(mixedText);
|
|
249
|
-
console.log(detected[0].kind); // "flexmd_fence"
|
|
250
|
-
console.log(detected[0].confidence); // 1.0
|
|
251
|
-
console.log(detected[1].kind); // "flexdoc_json_fence"
|
|
252
|
-
console.log(detected[1].confidence); // 0.9
|
|
253
|
-
|
|
254
|
-
// Parse everything
|
|
255
|
-
const result = parseAny(mixedText);
|
|
256
|
-
console.log(result.flexDocs.length); // 2
|
|
257
|
-
console.log(result.remainder); // "Some random text here.\n\nMore text."
|
|
258
62
|
```
|
|
259
63
|
|
|
260
|
-
|
|
64
|
+
## Strictness Levels
|
|
261
65
|
|
|
262
|
-
|
|
66
|
+
| Level | Goal | Guidance | Enforcement |
|
|
67
|
+
| :--- | :--- | :--- | :--- |
|
|
68
|
+
| **L0** | Plain Markdown | "Reply in Markdown." | None. Accept as-is. |
|
|
69
|
+
| **L1** | Sectioned MD | "Include these headings..." | Headings must exist. |
|
|
70
|
+
| **L2** | Fenced Container | "Return inside a single block..." | Exactly one fenced block. |
|
|
71
|
+
| **L3** | Typed Structure | "Reasoning is an ordered list..." | Enforce list/table kinds. |
|
|
263
72
|
|
|
264
|
-
|
|
265
|
-
- ✅ **Layer B**: Output Format Spec for LLM contracts
|
|
266
|
-
- ✅ **Layer C**: Multi-tier detection from arbitrary text
|
|
267
|
-
- ✅ **Nested heading trees**: Build and render outline structures
|
|
268
|
-
- ✅ **List parsing**: Support for nested ordered/unordered lists
|
|
269
|
-
- ✅ **Table parsing**: GFM pipe tables with ordered table support
|
|
270
|
-
- ✅ **Datatype handling**: Infer or schema-based type conversion
|
|
271
|
-
- ✅ **Validation**: Validate FlexMD syntax and OFS compliance
|
|
272
|
-
- ✅ **TypeScript**: Full type definitions included
|
|
73
|
+
## The Repair Pipeline
|
|
273
74
|
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
### LLM Prompting & Response Parsing
|
|
282
|
-
|
|
283
|
-
```javascript
|
|
284
|
-
// 1. Define what you want from the LLM
|
|
285
|
-
const spec = {
|
|
286
|
-
descriptorType: "output_format_spec",
|
|
287
|
-
format: "markdown",
|
|
288
|
-
sectionOrderMatters: false,
|
|
289
|
-
sections: [
|
|
290
|
-
{ name: "Analysis", kind: "prose" },
|
|
291
|
-
{ name: "Steps", kind: "ordered_list" },
|
|
292
|
-
{ name: "Risks", kind: "list" }
|
|
293
|
-
],
|
|
294
|
-
tablesOptional: true,
|
|
295
|
-
tables: [],
|
|
296
|
-
emptySectionValue: "None"
|
|
297
|
-
};
|
|
298
|
-
|
|
299
|
-
// 2. Generate instructions
|
|
300
|
-
const instructions = enrichInstructions(spec);
|
|
301
|
-
|
|
302
|
-
// 3. Send to LLM with your prompt + instructions
|
|
303
|
-
|
|
304
|
-
// 4. Validate and extract response
|
|
305
|
-
const validation = validateOutput(llmResponse, spec);
|
|
306
|
-
const data = extractOutput(llmResponse, spec);
|
|
307
|
-
```
|
|
75
|
+
Flex-MD doesn't just validate; it **repairs**. Our deterministic 9-step plan handles:
|
|
76
|
+
1. **Container Normalization**: Wrapping or merging multiple fenced blocks.
|
|
77
|
+
2. **Heading Standardization**: Case-insensitive matching and naming cleanup.
|
|
78
|
+
3. **Missing Headings**: Adding required sections as `None`.
|
|
79
|
+
4. **Stray Content**: Moving text outside headings into a default section.
|
|
80
|
+
5. **Format Conversion**: Transforming bullets to numbered lists (and vice-versa) based on spec.
|
|
308
81
|
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
```javascript
|
|
312
|
-
const conversation = parseFlexMd(`
|
|
313
|
-
[[message role=user id=msg1]]
|
|
314
|
-
What is the capital of France?
|
|
315
|
-
|
|
316
|
-
[[message role=assistant id=msg2]]
|
|
317
|
-
The capital of France is Paris.
|
|
318
|
-
`);
|
|
319
|
-
|
|
320
|
-
// Store as JSON, query, manipulate, then stringify back
|
|
321
|
-
const md = stringifyFlexMd(conversation);
|
|
322
|
-
```
|
|
323
|
-
|
|
324
|
-
### Markdown Documentation Processing
|
|
325
|
-
|
|
326
|
-
```javascript
|
|
327
|
-
// Build outline from docs
|
|
328
|
-
const outline = buildOutline(documentationMd);
|
|
82
|
+
## Documentation
|
|
329
83
|
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
{ name: "Installation", kind: "prose" },
|
|
334
|
-
{ name: "API Reference", kind: "list" }
|
|
335
|
-
],
|
|
336
|
-
// ... rest of spec
|
|
337
|
-
});
|
|
338
|
-
```
|
|
84
|
+
Detailed guides and specs can be found in the [docs](./docs) folder:
|
|
85
|
+
- [MDFlex Compliance Spec](./docs/mdflex-compliance.md)
|
|
86
|
+
- [OFS Syntax Guide](./SPEC.md)
|
|
339
87
|
|
|
340
88
|
## License
|
|
341
89
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import { stringifyOutputFormatSpec } from "../ofs/stringify.js";
|
|
3
|
+
import { buildMarkdownGuidance } from "../ofs/enricher.js";
|
|
4
|
+
describe("OFS Object-to-Prompt Flow", () => {
|
|
5
|
+
const spec = {
|
|
6
|
+
description: "Standard report format for technical analysis.",
|
|
7
|
+
sections: [
|
|
8
|
+
{
|
|
9
|
+
name: "Summary",
|
|
10
|
+
kind: "text",
|
|
11
|
+
required: true,
|
|
12
|
+
description: "A brief summary of the topic.",
|
|
13
|
+
instruction: "Keep it under 3 sentences."
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
name: "Key Points",
|
|
17
|
+
kind: "list",
|
|
18
|
+
required: false,
|
|
19
|
+
description: "Important takeaways."
|
|
20
|
+
}
|
|
21
|
+
],
|
|
22
|
+
emptySectionValue: "N/A"
|
|
23
|
+
};
|
|
24
|
+
it("should stringify an OFS object correctly", () => {
|
|
25
|
+
const md = stringifyOutputFormatSpec(spec);
|
|
26
|
+
expect(md).toContain("## Output format (Markdown)");
|
|
27
|
+
expect(md).toContain("Standard report format for technical analysis.");
|
|
28
|
+
expect(md).toContain("- Summary — text (required)");
|
|
29
|
+
expect(md).toContain("Description: A brief summary of the topic.");
|
|
30
|
+
expect(md).toContain("Instruction: Keep it under 3 sentences.");
|
|
31
|
+
expect(md).toContain("- Key Points — list (optional)");
|
|
32
|
+
expect(md).toContain("Description: Important takeaways.");
|
|
33
|
+
expect(md).toContain("If a section is empty, write `N/A`.");
|
|
34
|
+
});
|
|
35
|
+
it("should build markdown guidance (L1) correctly", () => {
|
|
36
|
+
const guidance = buildMarkdownGuidance(spec, { level: 1 });
|
|
37
|
+
expect(guidance).toContain("Include these section headings somewhere");
|
|
38
|
+
expect(guidance).toContain("- Summary");
|
|
39
|
+
expect(guidance).toContain("Description: A brief summary of the topic.");
|
|
40
|
+
expect(guidance).toContain("Instruction: Keep it under 3 sentences.");
|
|
41
|
+
expect(guidance).toContain("- Key Points");
|
|
42
|
+
expect(guidance).toContain("Description: Important takeaways.");
|
|
43
|
+
});
|
|
44
|
+
it("should build markdown guidance (L3) correctly", () => {
|
|
45
|
+
const guidance = buildMarkdownGuidance(spec, { level: 3 });
|
|
46
|
+
expect(guidance).toContain("Return your entire answer inside a single ```markdown fenced block");
|
|
47
|
+
expect(guidance).toContain("- Summary");
|
|
48
|
+
expect(guidance).toContain("- Key Points (list)");
|
|
49
|
+
expect(guidance).toContain("Do not return JSON");
|
|
50
|
+
});
|
|
51
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import { validateMarkdownAgainstOfs } from "../validate/validate.js";
|
|
3
|
+
import { parseIssuesEnvelope } from "../ofs/issuesEnvelope.js";
|
|
4
|
+
const SPEC = {
|
|
5
|
+
emptySectionValue: "None",
|
|
6
|
+
sections: [
|
|
7
|
+
{ name: "Short answer", kind: "text", required: true },
|
|
8
|
+
{ name: "Long answer", kind: "text", required: true },
|
|
9
|
+
{ name: "Reasoning", kind: "ordered_list", required: true },
|
|
10
|
+
{ name: "Assumptions", kind: "list", required: true },
|
|
11
|
+
{ name: "Unknowns", kind: "list", required: true },
|
|
12
|
+
],
|
|
13
|
+
};
|
|
14
|
+
function mdL1Good() {
|
|
15
|
+
return [
|
|
16
|
+
"## Short answer",
|
|
17
|
+
"Yes.",
|
|
18
|
+
"",
|
|
19
|
+
"## Long answer",
|
|
20
|
+
"More details.",
|
|
21
|
+
"",
|
|
22
|
+
"## Reasoning",
|
|
23
|
+
"1. First",
|
|
24
|
+
"2. Second",
|
|
25
|
+
"",
|
|
26
|
+
"## Assumptions",
|
|
27
|
+
"- A",
|
|
28
|
+
"",
|
|
29
|
+
"## Unknowns",
|
|
30
|
+
"- U",
|
|
31
|
+
].join("\n");
|
|
32
|
+
}
|
|
33
|
+
describe("parseIssuesEnvelope()", () => {
|
|
34
|
+
it("detects envelope and extracts bullets", () => {
|
|
35
|
+
const env = [
|
|
36
|
+
"## Status",
|
|
37
|
+
"Non-compliant output (cannot be repaired to the required format).",
|
|
38
|
+
"",
|
|
39
|
+
"## Issues",
|
|
40
|
+
"- Missing required section: \"Short answer\"",
|
|
41
|
+
"",
|
|
42
|
+
"## Expected",
|
|
43
|
+
"- Headings ...",
|
|
44
|
+
"",
|
|
45
|
+
"## Found",
|
|
46
|
+
"- Something ...",
|
|
47
|
+
"",
|
|
48
|
+
"## How to fix",
|
|
49
|
+
"- Do X",
|
|
50
|
+
].join("\n");
|
|
51
|
+
const parsed = parseIssuesEnvelope(env);
|
|
52
|
+
expect(parsed.isIssuesEnvelope).toBe(true);
|
|
53
|
+
expect(parsed.sections["issues"].bullets[0]).toContain("Missing required section");
|
|
54
|
+
});
|
|
55
|
+
});
|
|
56
|
+
describe("validateMarkdownAgainstOfs()", () => {
|
|
57
|
+
it("L0 accepts anything", () => {
|
|
58
|
+
const r = validateMarkdownAgainstOfs("whatever", SPEC, 0);
|
|
59
|
+
expect(r.ok).toBe(true);
|
|
60
|
+
});
|
|
61
|
+
it("L1 fails when required section missing", () => {
|
|
62
|
+
const md = [
|
|
63
|
+
"## Short answer",
|
|
64
|
+
"Yes.",
|
|
65
|
+
"",
|
|
66
|
+
"## Reasoning",
|
|
67
|
+
"1. A",
|
|
68
|
+
].join("\n");
|
|
69
|
+
const r = validateMarkdownAgainstOfs(md, SPEC, 1);
|
|
70
|
+
expect(r.ok).toBe(false);
|
|
71
|
+
expect(r.issues.some(i => i.code === "MISSING_SECTION")).toBe(true);
|
|
72
|
+
});
|
|
73
|
+
it("L2 requires a single fenced markdown block", () => {
|
|
74
|
+
const r = validateMarkdownAgainstOfs(mdL1Good(), SPEC, 2);
|
|
75
|
+
expect(r.ok).toBe(false);
|
|
76
|
+
expect(r.issues.some(i => i.code === "CONTAINER_MISSING")).toBe(true);
|
|
77
|
+
});
|
|
78
|
+
it("L2 passes with one fence containing valid L1", () => {
|
|
79
|
+
const md = ["```markdown", mdL1Good(), "```"].join("\n");
|
|
80
|
+
const r = validateMarkdownAgainstOfs(md, SPEC, 2);
|
|
81
|
+
expect(r.ok).toBe(true);
|
|
82
|
+
});
|
|
83
|
+
it("L3 enforces section kinds (Reasoning must be ordered list)", () => {
|
|
84
|
+
const bad = [
|
|
85
|
+
"```markdown",
|
|
86
|
+
[
|
|
87
|
+
"## Short answer",
|
|
88
|
+
"Yes",
|
|
89
|
+
"",
|
|
90
|
+
"## Long answer",
|
|
91
|
+
"Details",
|
|
92
|
+
"",
|
|
93
|
+
"## Reasoning",
|
|
94
|
+
"- bullet but should be ordered",
|
|
95
|
+
"",
|
|
96
|
+
"## Assumptions",
|
|
97
|
+
"- A",
|
|
98
|
+
"",
|
|
99
|
+
"## Unknowns",
|
|
100
|
+
"- U",
|
|
101
|
+
].join("\n"),
|
|
102
|
+
"```",
|
|
103
|
+
].join("\n");
|
|
104
|
+
const r = validateMarkdownAgainstOfs(bad, SPEC, 3);
|
|
105
|
+
expect(r.ok).toBe(false);
|
|
106
|
+
expect(r.issues.some(i => i.code === "WRONG_SECTION_KIND")).toBe(true);
|
|
107
|
+
});
|
|
108
|
+
});
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
const HARD_PATTERNS = [
|
|
2
|
+
/\breturn\s+only\s+json\b/i,
|
|
3
|
+
/\boutput\s+only\s+json\b/i,
|
|
4
|
+
/\bvalid\s+json\s+(object|array)\b/i,
|
|
5
|
+
/\bno\s+(prose|text|markdown)\b/i,
|
|
6
|
+
/\bdo\s+not\s+output\s+anything\s+else\b/i,
|
|
7
|
+
];
|
|
8
|
+
const SOFT_PATTERNS = [
|
|
9
|
+
/\binclude\s+json\b/i,
|
|
10
|
+
/\bjson\s+preferred\b/i,
|
|
11
|
+
/\badd\s+a\s+json\b/i,
|
|
12
|
+
/\bjson\s+format\b/i,
|
|
13
|
+
];
|
|
14
|
+
const SCHEMA_PATTERNS = [
|
|
15
|
+
/\bjson\s+schema\b/i,
|
|
16
|
+
/\bschema\b.*\bjson\b/i,
|
|
17
|
+
/\bajv\b/i,
|
|
18
|
+
/\bzod\b/i,
|
|
19
|
+
/\bopenapi\b/i,
|
|
20
|
+
];
|
|
21
|
+
const TOOLING_PATTERNS = [
|
|
22
|
+
/\bfunction\s+calling\b/i,
|
|
23
|
+
/\btools?\b/i,
|
|
24
|
+
/\bresponse_format\b/i,
|
|
25
|
+
/\btool\s+call\b/i,
|
|
26
|
+
/\barguments\b.*\bjson\b/i,
|
|
27
|
+
];
|
|
28
|
+
export function detectJsonIntent(text) {
|
|
29
|
+
const signals = [];
|
|
30
|
+
const add = (rx, strength, idx, len) => {
|
|
31
|
+
signals.push({
|
|
32
|
+
type: "pattern",
|
|
33
|
+
value: rx.source,
|
|
34
|
+
strength,
|
|
35
|
+
start: idx,
|
|
36
|
+
end: idx != null && len != null ? idx + len : undefined,
|
|
37
|
+
});
|
|
38
|
+
};
|
|
39
|
+
const scan = (patterns, strength) => {
|
|
40
|
+
for (const base of patterns) {
|
|
41
|
+
const rx = new RegExp(base.source, base.flags.includes("g") ? base.flags : base.flags + "g");
|
|
42
|
+
rx.lastIndex = 0;
|
|
43
|
+
let m;
|
|
44
|
+
while ((m = rx.exec(text)) !== null) {
|
|
45
|
+
add(base, strength, m.index, m[0]?.length ?? 0);
|
|
46
|
+
if (m[0]?.length === 0)
|
|
47
|
+
rx.lastIndex++;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
};
|
|
51
|
+
scan(HARD_PATTERNS, "hard");
|
|
52
|
+
scan(TOOLING_PATTERNS, "soft");
|
|
53
|
+
scan(SCHEMA_PATTERNS, "soft");
|
|
54
|
+
scan(SOFT_PATTERNS, "soft");
|
|
55
|
+
const hasHard = signals.some(s => s.strength === "hard");
|
|
56
|
+
const hasSchema = signals.some(s => SCHEMA_PATTERNS.some(rx => rx.source === s.value));
|
|
57
|
+
const hasTooling = signals.some(s => TOOLING_PATTERNS.some(rx => rx.source === s.value));
|
|
58
|
+
const hasSoft = signals.some(s => s.strength === "soft");
|
|
59
|
+
let intent = "none";
|
|
60
|
+
if (hasHard)
|
|
61
|
+
intent = "hard";
|
|
62
|
+
else if (hasSchema)
|
|
63
|
+
intent = "schema";
|
|
64
|
+
else if (hasTooling)
|
|
65
|
+
intent = "tooling";
|
|
66
|
+
else if (hasSoft)
|
|
67
|
+
intent = "soft";
|
|
68
|
+
const confidence = scoreConfidence(intent, signals);
|
|
69
|
+
return { intent, signals, confidence };
|
|
70
|
+
}
|
|
71
|
+
function scoreConfidence(intent, signals) {
|
|
72
|
+
if (intent === "none")
|
|
73
|
+
return signals.length ? 0.2 : 0.0;
|
|
74
|
+
const hard = signals.filter(s => s.strength === "hard").length;
|
|
75
|
+
const soft = signals.filter(s => s.strength === "soft").length;
|
|
76
|
+
if (intent === "hard")
|
|
77
|
+
return Math.min(1, 0.7 + hard * 0.15);
|
|
78
|
+
return Math.min(1, 0.4 + soft * 0.1);
|
|
79
|
+
}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import { DetectJsonPresenceResult, DetectJsonContainersResult } from "./types.js";
|
|
2
|
+
export declare function detectJsonContainers(md: string): DetectJsonContainersResult;
|
|
3
|
+
export declare function detectJsonPresence(text: string, opts?: {
|
|
4
|
+
parse?: boolean;
|
|
5
|
+
maxParses?: number;
|
|
6
|
+
}): DetectJsonPresenceResult;
|