yume-dsl-rich-text 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,366 @@
1
+ # yume-rich-text
2
+
3
+ A zero-dependency, recursive rich-text DSL parser with pluggable tag handlers and configurable syntax.
4
+
5
+ The core package provides only the parsing engine — no built-in tags.
6
+ You define your own tags, or install a companion handler package.
7
+
8
+ ## Features
9
+
10
+ - Zero dependencies
11
+ - Recursive parsing
12
+ - Pluggable tag handlers
13
+ - Inline / Raw / Block tag forms
14
+ - Configurable syntax tokens
15
+ - Graceful degradation for unknown tags
16
+ - Depth-limited parsing
17
+ - Custom error reporting
18
+ - Utility helpers for pipe arguments and token processing
19
+
20
+ ## Install
21
+
22
+ ```bash
23
+ npm install yume-dsl-rich-text
24
+ ```
25
+
26
+ ## Quick Start
27
+
28
+ ```ts
29
+ import { parseRichText } from "yume-dsl-rich-text";
30
+
31
+ const tokens = parseRichText("Hello $$bold(world)$$!", {
32
+ handlers: {
33
+ bold: {
34
+ inline: (tokens) => ({ type: "bold", value: tokens }),
35
+ },
36
+ },
37
+ });
38
+
39
+ // [
40
+ // { type: "text", value: "Hello ", id: "rt-0" },
41
+ // {
42
+ // type: "bold",
43
+ // value: [{ type: "text", value: "world", id: "rt-1" }],
44
+ // id: "rt-2"
45
+ // },
46
+ // { type: "text", value: "!", id: "rt-3" },
47
+ // ]
48
+ ```
49
+
50
+ Unregistered tags degrade to plain text instead of throwing or crashing.
51
+
52
+ ---
53
+
54
+ ## DSL Syntax
55
+
56
+ By default, the DSL uses `$$` as the tag prefix.
57
+
58
+ Three forms are supported:
59
+
60
+ ### Inline
61
+
62
+ ```text
63
+ $$tagName(content)$$
64
+ ```
65
+
66
+ Inline content is parsed recursively, so nesting works naturally.
67
+
68
+ ```text
69
+ $$bold(Hello $$italic(world)$$)$$
70
+ ```
71
+
72
+ ### Raw
73
+
74
+ ```text
75
+ $$tagName(arg)%
76
+ raw content preserved as-is
77
+ %end$$
78
+ ```
79
+
80
+ Raw content is not recursively parsed.
81
+
82
+ The close marker `%end$$` must be on its own line.
83
+
84
+ ### Block
85
+
86
+ ```text
87
+ $$tagName(arg)*
88
+ block content parsed recursively
89
+ *end$$
90
+ ```
91
+
92
+ Block content is parsed recursively.
93
+
94
+ The close marker `*end$$` must be on its own line.
95
+
96
+ ### Pipe Parameters
97
+
98
+ Inside arguments, `|` separates parameters.
99
+
100
+ ```text
101
+ $$link(https://example.com | click here)$$
102
+ $$code(js | Title | label)%
103
+ const x = 1;
104
+ %end$$
105
+ ```
106
+
107
+ Use `\|` to escape a literal pipe.
108
+
109
+ ### Escape Sequences
110
+
111
+ Prefix syntax tokens with `\` to produce them literally.
112
+
113
+ | Escape | Output |
114
+ |--------|--------|
115
+ | `\(` | `(` |
116
+ | `\)` | `)` |
117
+ | `\|` | `|` |
118
+ | `\\` | `\` |
119
+ | `\%end$$` | `%end$$` |
120
+ | `\*end$$` | `*end$$` |
121
+
122
+ ---
123
+
124
+ ## API
125
+
126
+ ### `parseRichText(text, options?)`
127
+
128
+ Parses a DSL string into a token tree.
129
+
130
+ ```ts
131
+ function parseRichText(text: string, options?: ParseOptions): TextToken[];
132
+ ```
133
+
134
+ ### `stripRichText(text, options?)`
135
+
136
+ Parses a DSL string and flattens the result into plain text.
137
+
138
+ ```ts
139
+ function stripRichText(text: string, options?: ParseOptions): string;
140
+ ```
141
+
142
+ ---
143
+
144
+ ## ParseOptions
145
+
146
+ ```ts
147
+ interface ParseOptions {
148
+ handlers?: Record<string, TagHandler>;
149
+ blockTags?: string[];
150
+ depthLimit?: number;
151
+ mode?: "render" | "highlight";
152
+ onError?: (error: ParseError) => void;
153
+ syntax?: Partial<SyntaxInput>;
154
+ }
155
+ ```
156
+
157
+ ### Fields
158
+
159
+ - `handlers`: tag name → handler definition
160
+ - `blockTags`: tags treated as block-level for line-break normalization
161
+ - `depthLimit`: maximum nesting depth, default `50`
162
+ - `mode`:
163
+ - `"render"` normalizes block line breaks
164
+ - `"highlight"` preserves them
165
+ - `onError`: callback for parse errors
166
+ - `syntax`: override default syntax tokens
167
+
168
+ ---
169
+
170
+ ## Token Structure
171
+
172
+ ```ts
173
+ interface TextToken {
174
+ type: string;
175
+ value: string | TextToken[];
176
+ id: string;
177
+ }
178
+ ```
179
+
180
+ Handlers may attach additional runtime fields such as `url`, `lang`, or `title`.
181
+
182
+ ---
183
+
184
+ ## Writing Tag Handlers
185
+
186
+ A `TagHandler` can define behavior for any of the three tag forms.
187
+
188
+ ```ts
189
+ interface TagHandler {
190
+ inline?: (tokens: TextToken[]) => TokenDraft;
191
+ raw?: (arg: string | undefined, content: string) => TokenDraft;
192
+ block?: (arg: string | undefined, content: TextToken[]) => TokenDraft;
193
+ }
194
+ ```
195
+
196
+ You only need to implement the forms your tag supports.
197
+ Unsupported forms fall back to plain text.
198
+
199
+ ### Example
200
+
201
+ ```ts
202
+ import {
203
+ parseRichText,
204
+ parsePipeArgs,
205
+ extractText,
206
+ } from "yume-rich-text";
207
+
208
+ const handlers = {
209
+ bold: {
210
+ inline: (tokens) => ({ type: "bold", value: tokens }),
211
+ },
212
+
213
+ link: {
214
+ inline: (tokens) => {
215
+ const args = parsePipeArgs(tokens);
216
+
217
+ return {
218
+ type: "link",
219
+ url: args.text(0),
220
+ value:
221
+ args.parts.length > 1
222
+ ? args.materializedTailTokens(1)
223
+ : args.materializedTokens(0),
224
+ };
225
+ },
226
+ },
227
+
228
+ code: {
229
+ raw: (arg, content) => ({
230
+ type: "code-block",
231
+ lang: arg ?? "text",
232
+ value: content,
233
+ }),
234
+ },
235
+
236
+ info: {
237
+ block: (arg, content) => ({
238
+ type: "info",
239
+ title: arg || "Info",
240
+ value: content,
241
+ }),
242
+
243
+ inline: (tokens) => {
244
+ const args = parsePipeArgs(tokens);
245
+
246
+ return {
247
+ type: "info",
248
+ title: extractText(args.materializedTokens(0)),
249
+ value: args.materializedTailTokens(1),
250
+ };
251
+ },
252
+ },
253
+ };
254
+
255
+ const tokens = parseRichText(input, { handlers });
256
+ ```
257
+
258
+ ---
259
+
260
+ ## Utility Exports
261
+
262
+ These helpers are useful when writing handlers.
263
+
264
+ | Export | Description |
265
+ |--------|-------------|
266
+ | `parsePipeArgs(tokens)` | Split tokens by `\|` and access parsed parts |
267
+ | `parsePipeTextArgs(text)` | Same as above, but from plain text |
268
+ | `splitTokensByPipe(tokens)` | Low-level token splitter |
269
+ | `extractText(tokens)` | Flatten a token tree into plain text |
270
+ | `materializeTextTokens(tokens)` | Unescape text tokens in a tree |
271
+ | `unescapeInline(str)` | Unescape a single string |
272
+ | `createToken(draft)` | Add an auto-incremented `id` to a token draft |
273
+ | `resetTokenIdSeed()` | Reset the token id counter, useful in tests |
274
+
275
+ ---
276
+
277
+ ## Custom Syntax
278
+
279
+ You can override syntax tokens through `options.syntax`.
280
+
281
+ ```ts
282
+ import { parseRichText } from "yume-rich-text";
283
+
284
+ const tokens = parseRichText("@@bold(hello)@@", {
285
+ syntax: {
286
+ tagPrefix: "@@",
287
+ endTag: ")@@",
288
+ },
289
+ handlers: {
290
+ bold: {
291
+ inline: (tokens) => ({ type: "bold", value: tokens }),
292
+ },
293
+ },
294
+ });
295
+ ```
296
+
297
+ ### Default Syntax
298
+
299
+ ```ts
300
+ import { DEFAULT_SYNTAX } from "yume-rich-text";
301
+
302
+ // {
303
+ // tagPrefix: "$$",
304
+ // tagOpen: "(",
305
+ // tagClose: ")",
306
+ // tagDivider: "|",
307
+ // endTag: ")$$",
308
+ // rawOpen: ")%",
309
+ // blockOpen: ")*",
310
+ // blockClose: "*end$$",
311
+ // rawClose: "%end$$",
312
+ // escapeChar: "\\",
313
+ // }
314
+ ```
315
+
316
+ > Warning:
317
+ > Syntax tokens must remain distinguishable from one another.
318
+ > If two tokens are configured to the same string, behavior is undefined.
319
+
320
+ ---
321
+
322
+ ## Error Handling
323
+
324
+ Use `onError` to collect parse errors.
325
+
326
+ ```ts
327
+ const errors: ParseError[] = [];
328
+
329
+ parseRichText("$$bold(unclosed", {
330
+ handlers: {
331
+ bold: {
332
+ inline: (tokens) => ({ type: "bold", value: tokens }),
333
+ },
334
+ },
335
+ onError: (error) => errors.push(error),
336
+ });
337
+
338
+ // errors[0]
339
+ // {
340
+ // code: "INLINE_NOT_CLOSED",
341
+ // message: "(L1:C1) Inline tag not closed: >>>$$bold(<<< unclosed",
342
+ // line: 1,
343
+ // column: 1,
344
+ // snippet: " >>>$$bold(<<< unclosed"
345
+ // }
346
+ ```
347
+
348
+ If `onError` is omitted, malformed markup degrades to plain text and errors are discarded.
349
+
350
+ ### Error Codes
351
+
352
+ | Code | Meaning |
353
+ |------|---------|
354
+ | `DEPTH_LIMIT` | Nesting exceeded `depthLimit` |
355
+ | `UNEXPECTED_CLOSE` | Stray close tag with no matching open |
356
+ | `INLINE_NOT_CLOSED` | Inline tag was never closed |
357
+ | `BLOCK_NOT_CLOSED` | Block close marker is missing |
358
+ | `BLOCK_CLOSE_MALFORMED` | Block close marker exists but is malformed |
359
+ | `RAW_NOT_CLOSED` | Raw close marker is missing |
360
+ | `RAW_CLOSE_MALFORMED` | Raw close marker exists but is malformed |
361
+
362
+ ---
363
+
364
+ ## License
365
+
366
+ MIT
@@ -0,0 +1,80 @@
1
+ interface TextToken {
2
+ type: string;
3
+ value: string | TextToken[];
4
+ id: string;
5
+ }
6
+ interface TokenDraft {
7
+ type: string;
8
+ value: string | TextToken[];
9
+ [key: string]: unknown;
10
+ }
11
+ interface ParseError {
12
+ code: string;
13
+ message: string;
14
+ line: number;
15
+ column: number;
16
+ snippet: string;
17
+ }
18
+ interface TagHandler {
19
+ inline?: (tokens: TextToken[]) => TokenDraft;
20
+ raw?: (arg: string | undefined, content: string) => TokenDraft;
21
+ block?: (arg: string | undefined, content: TextToken[]) => TokenDraft;
22
+ }
23
+ interface SyntaxInput {
24
+ tagPrefix: string;
25
+ tagOpen: string;
26
+ tagClose: string;
27
+ tagDivider: string;
28
+ endTag: string;
29
+ rawOpen: string;
30
+ blockOpen: string;
31
+ blockClose: string;
32
+ rawClose: string;
33
+ escapeChar: string;
34
+ }
35
+ interface SyntaxConfig extends SyntaxInput {
36
+ /** Precomputed, sorted descending by length. */
37
+ escapableTokens: string[];
38
+ }
39
+ interface ParseOptions {
40
+ /** Tag handler map – keys are tag names, values define how each tag is parsed. */
41
+ handlers?: Record<string, TagHandler>;
42
+ /**
43
+ * Tags that receive block-level line-break normalization.
44
+ * Defaults to every tag whose handler has a `raw` or `block` parser.
45
+ */
46
+ blockTags?: string[];
47
+ /** Maximum nesting depth (default 50). */
48
+ depthLimit?: number;
49
+ /** `"render"` (default) strips leading/trailing line breaks inside blocks; `"highlight"` preserves them. */
50
+ mode?: "render" | "highlight";
51
+ /** Called for every parse error. If omitted, errors are silently discarded. */
52
+ onError?: (error: ParseError) => void;
53
+ /** Override DSL syntax tokens (default: `$$tag(…)$$` family). */
54
+ syntax?: Partial<SyntaxInput>;
55
+ }
56
+
57
+ declare const parseRichText: (text: string, options?: ParseOptions) => TextToken[];
58
+ declare const stripRichText: (text: string, options?: ParseOptions) => string;
59
+
60
+ declare const extractText: (tokens?: TextToken[]) => string;
61
+ declare const materializeTextTokens: (tokens: TextToken[]) => TextToken[];
62
+ interface PipeArgs {
63
+ parts: TextToken[][];
64
+ text: (index: number) => string;
65
+ materializedTokens: (index: number) => TextToken[];
66
+ materializedTailTokens: (startIndex: number) => TextToken[];
67
+ }
68
+ declare const splitTokensByPipe: (tokens: TextToken[]) => TextToken[][];
69
+ declare const parsePipeArgs: (tokens: TextToken[]) => PipeArgs;
70
+ declare const parsePipeTextArgs: (text: string) => PipeArgs;
71
+
72
+ declare const unescapeInline: (str: string) => string;
73
+
74
+ declare const createToken: (token: TokenDraft) => TextToken;
75
+ declare const resetTokenIdSeed: () => void;
76
+
77
+ declare const DEFAULT_SYNTAX: SyntaxInput;
78
+ declare const createSyntax: (overrides?: Partial<SyntaxInput>) => SyntaxConfig;
79
+
80
+ export { DEFAULT_SYNTAX, type ParseError, type ParseOptions, type PipeArgs, type SyntaxConfig, type SyntaxInput, type TagHandler, type TextToken, type TokenDraft, createSyntax, createToken, extractText, materializeTextTokens, parsePipeArgs, parsePipeTextArgs, parseRichText, resetTokenIdSeed, splitTokensByPipe, stripRichText, unescapeInline };
@@ -0,0 +1,80 @@
1
+ interface TextToken {
2
+ type: string;
3
+ value: string | TextToken[];
4
+ id: string;
5
+ }
6
+ interface TokenDraft {
7
+ type: string;
8
+ value: string | TextToken[];
9
+ [key: string]: unknown;
10
+ }
11
+ interface ParseError {
12
+ code: string;
13
+ message: string;
14
+ line: number;
15
+ column: number;
16
+ snippet: string;
17
+ }
18
+ interface TagHandler {
19
+ inline?: (tokens: TextToken[]) => TokenDraft;
20
+ raw?: (arg: string | undefined, content: string) => TokenDraft;
21
+ block?: (arg: string | undefined, content: TextToken[]) => TokenDraft;
22
+ }
23
+ interface SyntaxInput {
24
+ tagPrefix: string;
25
+ tagOpen: string;
26
+ tagClose: string;
27
+ tagDivider: string;
28
+ endTag: string;
29
+ rawOpen: string;
30
+ blockOpen: string;
31
+ blockClose: string;
32
+ rawClose: string;
33
+ escapeChar: string;
34
+ }
35
+ interface SyntaxConfig extends SyntaxInput {
36
+ /** Precomputed, sorted descending by length. */
37
+ escapableTokens: string[];
38
+ }
39
+ interface ParseOptions {
40
+ /** Tag handler map – keys are tag names, values define how each tag is parsed. */
41
+ handlers?: Record<string, TagHandler>;
42
+ /**
43
+ * Tags that receive block-level line-break normalization.
44
+ * Defaults to every tag whose handler has a `raw` or `block` parser.
45
+ */
46
+ blockTags?: string[];
47
+ /** Maximum nesting depth (default 50). */
48
+ depthLimit?: number;
49
+ /** `"render"` (default) strips leading/trailing line breaks inside blocks; `"highlight"` preserves them. */
50
+ mode?: "render" | "highlight";
51
+ /** Called for every parse error. If omitted, errors are silently discarded. */
52
+ onError?: (error: ParseError) => void;
53
+ /** Override DSL syntax tokens (default: `$$tag(…)$$` family). */
54
+ syntax?: Partial<SyntaxInput>;
55
+ }
56
+
57
+ declare const parseRichText: (text: string, options?: ParseOptions) => TextToken[];
58
+ declare const stripRichText: (text: string, options?: ParseOptions) => string;
59
+
60
+ declare const extractText: (tokens?: TextToken[]) => string;
61
+ declare const materializeTextTokens: (tokens: TextToken[]) => TextToken[];
62
+ interface PipeArgs {
63
+ parts: TextToken[][];
64
+ text: (index: number) => string;
65
+ materializedTokens: (index: number) => TextToken[];
66
+ materializedTailTokens: (startIndex: number) => TextToken[];
67
+ }
68
+ declare const splitTokensByPipe: (tokens: TextToken[]) => TextToken[][];
69
+ declare const parsePipeArgs: (tokens: TextToken[]) => PipeArgs;
70
+ declare const parsePipeTextArgs: (text: string) => PipeArgs;
71
+
72
+ declare const unescapeInline: (str: string) => string;
73
+
74
+ declare const createToken: (token: TokenDraft) => TextToken;
75
+ declare const resetTokenIdSeed: () => void;
76
+
77
+ declare const DEFAULT_SYNTAX: SyntaxInput;
78
+ declare const createSyntax: (overrides?: Partial<SyntaxInput>) => SyntaxConfig;
79
+
80
+ export { DEFAULT_SYNTAX, type ParseError, type ParseOptions, type PipeArgs, type SyntaxConfig, type SyntaxInput, type TagHandler, type TextToken, type TokenDraft, createSyntax, createToken, extractText, materializeTextTokens, parsePipeArgs, parsePipeTextArgs, parseRichText, resetTokenIdSeed, splitTokensByPipe, stripRichText, unescapeInline };