@uniweb/semantic-parser 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +9 -0
- package/.eslintrc.json +28 -0
- package/LICENSE +674 -0
- package/README.md +395 -0
- package/docs/api.md +352 -0
- package/docs/file-structure.md +50 -0
- package/docs/guide.md +206 -0
- package/docs/mapping-patterns.md +928 -0
- package/docs/text-component-reference.md +515 -0
- package/package.json +41 -0
- package/reference/README.md +195 -0
- package/reference/Text.js +188 -0
- package/src/index.js +35 -0
- package/src/mappers/accessor.js +312 -0
- package/src/mappers/extractors.js +397 -0
- package/src/mappers/helpers.js +234 -0
- package/src/mappers/index.js +28 -0
- package/src/mappers/types.js +495 -0
- package/src/processors/byType.js +129 -0
- package/src/processors/groups.js +330 -0
- package/src/processors/groups_backup.js +379 -0
- package/src/processors/groups_doc.md +179 -0
- package/src/processors/sequence.js +573 -0
- package/src/processors/sequence_backup.js +402 -0
- package/src/utils/role.js +53 -0
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
# Reference Implementations
|
|
2
|
+
|
|
3
|
+
This folder contains production-ready reference implementations for common patterns when working with the semantic parser. These are **not** part of the published npm package but are provided for you to copy and adapt to your project.
|
|
4
|
+
|
|
5
|
+
## Available Components
|
|
6
|
+
|
|
7
|
+
### Text.js
|
|
8
|
+
|
|
9
|
+
A complete, production-ready React component for rendering content extracted by the semantic parser.
|
|
10
|
+
|
|
11
|
+
**Features:**
|
|
12
|
+
- Handles single strings or arrays of paragraphs
|
|
13
|
+
- Smart semantic defaults (headings, paragraphs, divs)
|
|
14
|
+
- Automatic empty content filtering
|
|
15
|
+
- Semantic wrapper components (H1-H6, P, PlainText, Div)
|
|
16
|
+
- Support for color marks and rich formatting
|
|
17
|
+
- **Trusts engine-sanitized data** - No component-level sanitization
|
|
18
|
+
- Simple and lightweight - no performance overhead
|
|
19
|
+
|
|
20
|
+
**Security Model:**
|
|
21
|
+
This component assumes content is **already sanitized by your engine**. It does NOT sanitize HTML itself. See the [Sanitization](#sanitization) section below.
|
|
22
|
+
|
|
23
|
+
**Installation:**
|
|
24
|
+
|
|
25
|
+
1. **Copy the file to your project:**
|
|
26
|
+
```bash
|
|
27
|
+
cp reference/Text.js src/components/Text.js
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
2. **No additional dependencies needed** - Just React
|
|
31
|
+
|
|
32
|
+
3. **Sanitize at engine level** (see [Sanitization](#sanitization))
|
|
33
|
+
|
|
34
|
+
4. **Use in your components:**
|
|
35
|
+
```jsx
|
|
36
|
+
import Text, { H1, P } from './components/Text';
|
|
37
|
+
import { parseContent, mappers } from '@uniwebcms/semantic-parser';
|
|
38
|
+
|
|
39
|
+
function MyComponent({ document }) {
|
|
40
|
+
const parsed = parseContent(document);
|
|
41
|
+
const hero = mappers.extractors.hero(parsed);
|
|
42
|
+
|
|
43
|
+
return (
|
|
44
|
+
<>
|
|
45
|
+
<H1 text={hero.title} />
|
|
46
|
+
<P text={hero.description} />
|
|
47
|
+
</>
|
|
48
|
+
);
|
|
49
|
+
}
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
**TypeScript Support:**
|
|
53
|
+
|
|
54
|
+
If using TypeScript, add this type definition file:
|
|
55
|
+
|
|
56
|
+
```typescript
|
|
57
|
+
// Text.d.ts
|
|
58
|
+
import { ReactElement } from 'react';
|
|
59
|
+
|
|
60
|
+
interface TextProps {
|
|
61
|
+
text: string | string[];
|
|
62
|
+
as?: 'h1' | 'h2' | 'h3' | 'h4' | 'h5' | 'h6' | 'p' | 'div' | 'span';
|
|
63
|
+
html?: boolean;
|
|
64
|
+
className?: string;
|
|
65
|
+
lineAs?: string;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
declare const Text: React.FC<TextProps>;
|
|
69
|
+
export default Text;
|
|
70
|
+
|
|
71
|
+
export const H1: React.FC<Omit<TextProps, 'as'>>;
|
|
72
|
+
export const H2: React.FC<Omit<TextProps, 'as'>>;
|
|
73
|
+
export const H3: React.FC<Omit<TextProps, 'as'>>;
|
|
74
|
+
export const H4: React.FC<Omit<TextProps, 'as'>>;
|
|
75
|
+
export const H5: React.FC<Omit<TextProps, 'as'>>;
|
|
76
|
+
export const H6: React.FC<Omit<TextProps, 'as'>>;
|
|
77
|
+
export const P: React.FC<Omit<TextProps, 'as'>>;
|
|
78
|
+
export const PlainText: React.FC<Omit<TextProps, 'html'>>;
|
|
79
|
+
export const Div: React.FC<Omit<TextProps, 'as'>>;
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## Sanitization
|
|
83
|
+
|
|
84
|
+
**IMPORTANT:** This component does NOT sanitize HTML. Sanitization happens at the **engine level**.
|
|
85
|
+
|
|
86
|
+
### Why Engine-Level Sanitization?
|
|
87
|
+
|
|
88
|
+
1. **Performance** - Sanitize once during data preparation, not on every render
|
|
89
|
+
2. **Context-aware** - Engine knows if content is from trusted TipTap or external sources
|
|
90
|
+
3. **Cacheable** - Sanitized content can be memoized
|
|
91
|
+
4. **Clear responsibility** - Engine owns the data pipeline
|
|
92
|
+
|
|
93
|
+
### How to Sanitize
|
|
94
|
+
|
|
95
|
+
Use the parser's built-in utilities in your engine:
|
|
96
|
+
|
|
97
|
+
```javascript
|
|
98
|
+
import { sanitizeHtml } from '@uniwebcms/semantic-parser/mappers/types';
|
|
99
|
+
import { parseContent, mappers } from '@uniwebcms/semantic-parser';
|
|
100
|
+
|
|
101
|
+
// In your engine (NOT in the component)
|
|
102
|
+
function prepareHeroData(document) {
|
|
103
|
+
const parsed = parseContent(document);
|
|
104
|
+
const hero = mappers.extractors.hero(parsed);
|
|
105
|
+
|
|
106
|
+
// Sanitize here, before passing to component
|
|
107
|
+
return {
|
|
108
|
+
...hero,
|
|
109
|
+
title: sanitizeHtml(hero.title, {
|
|
110
|
+
allowedTags: ['strong', 'em', 'mark', 'span'],
|
|
111
|
+
allowedAttr: ['class', 'data-variant']
|
|
112
|
+
}),
|
|
113
|
+
description: hero.description.map(p => sanitizeHtml(p))
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// Component receives clean data
|
|
118
|
+
const heroData = prepareHeroData(doc);
|
|
119
|
+
<H1 text={heroData.title} /> {/* Already sanitized */}
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### When to Sanitize
|
|
123
|
+
|
|
124
|
+
- **Always**: External content, user-generated content
|
|
125
|
+
- **Optional**: Trusted TipTap editor with locked schema
|
|
126
|
+
- **Never needed**: Hard-coded content in your app
|
|
127
|
+
|
|
128
|
+
See [docs/text-component-reference.md](../docs/text-component-reference.md#sanitization-tools) for detailed sanitization guidance.
|
|
129
|
+
|
|
130
|
+
## Customization
|
|
131
|
+
|
|
132
|
+
These reference implementations are designed to be copied and customized for your needs:
|
|
133
|
+
|
|
134
|
+
### Add Custom Styling Props
|
|
135
|
+
|
|
136
|
+
```jsx
|
|
137
|
+
// Add a spacing prop
|
|
138
|
+
const Text = React.memo(({ text, as = 'p', className, spacing = 'normal', ... }) => {
|
|
139
|
+
const spacingClass = spacing !== 'normal' ? `spacing-${spacing}` : '';
|
|
140
|
+
const combinedClass = [className, spacingClass].filter(Boolean).join(' ');
|
|
141
|
+
|
|
142
|
+
// Use combinedClass in rendering
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
// Usage
|
|
146
|
+
<P text={paragraphs} spacing="comfortable" />
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### Remove Features You Don't Need
|
|
150
|
+
|
|
151
|
+
If you don't need certain features, simplify the component:
|
|
152
|
+
|
|
153
|
+
- Remove sanitization if you sanitize at engine level
|
|
154
|
+
- Remove wrapper components if you don't use them
|
|
155
|
+
- Remove HTML support if you only render plain text
|
|
156
|
+
- Remove array support if you always use strings
|
|
157
|
+
|
|
158
|
+
## Why Reference Implementations?
|
|
159
|
+
|
|
160
|
+
The semantic parser is a **data transformation library**, not a UI component library. It focuses on parsing and structuring content.
|
|
161
|
+
|
|
162
|
+
However, rendering that content requires common patterns that most projects need. Rather than forcing specific implementations, we provide battle-tested reference code that you can:
|
|
163
|
+
|
|
164
|
+
1. **Copy as-is** - Use immediately without modification
|
|
165
|
+
2. **Customize** - Adapt to your specific needs
|
|
166
|
+
3. **Learn from** - Understand best practices
|
|
167
|
+
4. **Replace** - Use your own implementations
|
|
168
|
+
|
|
169
|
+
This approach:
|
|
170
|
+
- ✅ Keeps the parser lightweight and focused
|
|
171
|
+
- ✅ Gives you full control over rendering
|
|
172
|
+
- ✅ Avoids forcing UI framework choices
|
|
173
|
+
- ✅ Provides working code, not just documentation
|
|
174
|
+
|
|
175
|
+
## Documentation
|
|
176
|
+
|
|
177
|
+
For detailed usage guides, see:
|
|
178
|
+
- [Text Component Reference](../docs/text-component-reference.md) - Complete documentation
|
|
179
|
+
- [Mapping Patterns Guide](../docs/mapping-patterns.md) - Integration examples
|
|
180
|
+
- [API Reference](../docs/api.md) - Parser API documentation
|
|
181
|
+
|
|
182
|
+
## Contributing
|
|
183
|
+
|
|
184
|
+
If you develop improved versions or new reference implementations, consider contributing them back to help other users.
|
|
185
|
+
|
|
186
|
+
Common additions that would be valuable:
|
|
187
|
+
- Vue.js version of Text component
|
|
188
|
+
- Svelte version of Text component
|
|
189
|
+
- Image component for handling image data
|
|
190
|
+
- Link component for handling link objects
|
|
191
|
+
- Video component for media handling
|
|
192
|
+
|
|
193
|
+
## License
|
|
194
|
+
|
|
195
|
+
These reference implementations are provided under the same license as the semantic parser (GPL-3.0-or-later) and can be freely used in your projects.
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
import React from 'react';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Text - A smart typography component for rendering content from semantic-parser
|
|
5
|
+
*
|
|
6
|
+
* Features:
|
|
7
|
+
* - Handles single strings or arrays of paragraphs
|
|
8
|
+
* - Smart semantic defaults for different content types
|
|
9
|
+
* - Automatic filtering of empty content
|
|
10
|
+
*
|
|
11
|
+
* Security Model:
|
|
12
|
+
* - Assumes content is ALREADY SANITIZED at the engine level
|
|
13
|
+
* - Does NOT sanitize HTML (that's the engine's responsibility)
|
|
14
|
+
* - Trusts the data it receives and renders it as-is
|
|
15
|
+
*
|
|
16
|
+
* @param {Object} props
|
|
17
|
+
* @param {string|string[]} props.text - The content to render. Can be a string or an array of strings.
|
|
18
|
+
* @param {string} [props.as='p'] - The tag to use for the wrapper or primary semantic element (e.g. 'h1', 'p', 'div').
|
|
19
|
+
* @param {boolean} [props.html=true] - If true, renders content as HTML. If false, renders as plain text.
|
|
20
|
+
* @param {string} [props.className] - Optional className to apply to the outer wrapper or individual elements.
|
|
21
|
+
* @param {string} [props.lineAs] - For array inputs: tag to wrap each line. Defaults to 'div' for headings, 'p' for others.
|
|
22
|
+
*
|
|
23
|
+
* @example
|
|
24
|
+
* // Simple paragraph (semantic default)
|
|
25
|
+
* <Text text="Hello World" />
|
|
26
|
+
*
|
|
27
|
+
* // Explicit heading
|
|
28
|
+
* <Text text="Hello World" as="h1" />
|
|
29
|
+
*
|
|
30
|
+
* // Multi-line heading
|
|
31
|
+
* <Text text={["Welcome to", "Our Platform"]} as="h1" />
|
|
32
|
+
*
|
|
33
|
+
* // Multiple paragraphs (clean semantic output)
|
|
34
|
+
* <Text text={["First paragraph", "Second paragraph"]} />
|
|
35
|
+
*
|
|
36
|
+
* // Rich HTML content (assumes already sanitized by engine)
|
|
37
|
+
* <Text text={["Safe <strong>bold</strong> text", "With <em>emphasis</em>"]} />
|
|
38
|
+
*
|
|
39
|
+
* // Plain text when HTML is disabled
|
|
40
|
+
* <Text text="No <strong>formatting</strong> here" html={false} />
|
|
41
|
+
*
|
|
42
|
+
* // Explicit div wrapper when needed
|
|
43
|
+
* <Text text={["Item 1", "Item 2"]} as="div" lineAs="span" />
|
|
44
|
+
*/
|
|
45
|
+
function Text({ text, as = 'p', html = true, className, lineAs }) {
|
|
46
|
+
const isArray = Array.isArray(text);
|
|
47
|
+
const Tag = as;
|
|
48
|
+
const isHeading = as === 'h1' || as === 'h2' || as === 'h3' || as === 'h4' || as === 'h5' || as === 'h6';
|
|
49
|
+
|
|
50
|
+
// Single string input
|
|
51
|
+
if (!isArray) {
|
|
52
|
+
if (!text || text.trim() === '') return null;
|
|
53
|
+
|
|
54
|
+
if (html) {
|
|
55
|
+
return (
|
|
56
|
+
<Tag
|
|
57
|
+
className={className}
|
|
58
|
+
dangerouslySetInnerHTML={{ __html: text }}
|
|
59
|
+
/>
|
|
60
|
+
);
|
|
61
|
+
}
|
|
62
|
+
return <Tag className={className}>{text}</Tag>;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Array input - filter empty content first
|
|
66
|
+
const filteredText = text.filter(
|
|
67
|
+
(item) => typeof item === 'string' && item.trim() !== ''
|
|
68
|
+
);
|
|
69
|
+
|
|
70
|
+
if (filteredText.length === 0) {
|
|
71
|
+
return null; // Don't render anything for empty arrays
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// Determine the line wrapper tag with smart defaults
|
|
75
|
+
const LineTag = lineAs || (isHeading ? 'div' : 'p');
|
|
76
|
+
|
|
77
|
+
// Multi-line heading: wrap all lines in a single heading tag
|
|
78
|
+
if (isHeading) {
|
|
79
|
+
return (
|
|
80
|
+
<Tag className={className}>
|
|
81
|
+
{filteredText.map((line, i) => {
|
|
82
|
+
if (html) {
|
|
83
|
+
return (
|
|
84
|
+
<LineTag
|
|
85
|
+
key={i}
|
|
86
|
+
dangerouslySetInnerHTML={{ __html: line }}
|
|
87
|
+
/>
|
|
88
|
+
);
|
|
89
|
+
}
|
|
90
|
+
return <LineTag key={i}>{line}</LineTag>;
|
|
91
|
+
})}
|
|
92
|
+
</Tag>
|
|
93
|
+
);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Non-heading arrays: render each line as separate element
|
|
97
|
+
return (
|
|
98
|
+
<>
|
|
99
|
+
{filteredText.map((line, i) => {
|
|
100
|
+
if (html) {
|
|
101
|
+
return (
|
|
102
|
+
<LineTag
|
|
103
|
+
key={i}
|
|
104
|
+
className={className}
|
|
105
|
+
dangerouslySetInnerHTML={{ __html: line }}
|
|
106
|
+
/>
|
|
107
|
+
);
|
|
108
|
+
}
|
|
109
|
+
return (
|
|
110
|
+
<LineTag key={i} className={className}>
|
|
111
|
+
{line}
|
|
112
|
+
</LineTag>
|
|
113
|
+
);
|
|
114
|
+
})}
|
|
115
|
+
</>
|
|
116
|
+
);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// ============================================================================
|
|
120
|
+
// Semantic Wrapper Components - Thin wrappers around Text for common use cases
|
|
121
|
+
// ============================================================================
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* H1 - Heading level 1 component
|
|
125
|
+
* @param {Object} props - All Text props except 'as' (automatically set to 'h1')
|
|
126
|
+
* @example
|
|
127
|
+
* <H1 text="Main Title" />
|
|
128
|
+
* <H1 text={["Multi-line", "Main Title"]} />
|
|
129
|
+
*/
|
|
130
|
+
export const H1 = (props) => <Text {...props} as="h1" />;
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* H2 - Heading level 2 component
|
|
134
|
+
* @param {Object} props - All Text props except 'as' (automatically set to 'h2')
|
|
135
|
+
*/
|
|
136
|
+
export const H2 = (props) => <Text {...props} as="h2" />;
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* H3 - Heading level 3 component
|
|
140
|
+
* @param {Object} props - All Text props except 'as' (automatically set to 'h3')
|
|
141
|
+
*/
|
|
142
|
+
export const H3 = (props) => <Text {...props} as="h3" />;
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* H4 - Heading level 4 component
|
|
146
|
+
* @param {Object} props - All Text props except 'as' (automatically set to 'h4')
|
|
147
|
+
*/
|
|
148
|
+
export const H4 = (props) => <Text {...props} as="h4" />;
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* H5 - Heading level 5 component
|
|
152
|
+
* @param {Object} props - All Text props except 'as' (automatically set to 'h5')
|
|
153
|
+
*/
|
|
154
|
+
export const H5 = (props) => <Text {...props} as="h5" />;
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* H6 - Heading level 6 component
|
|
158
|
+
* @param {Object} props - All Text props except 'as' (automatically set to 'h6')
|
|
159
|
+
*/
|
|
160
|
+
export const H6 = (props) => <Text {...props} as="h6" />;
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* P - Paragraph component (explicitly semantic)
|
|
164
|
+
* @param {Object} props - All Text props except 'as' (automatically set to 'p')
|
|
165
|
+
* @example
|
|
166
|
+
* <P text="A paragraph of content" />
|
|
167
|
+
* <P text={["First paragraph", "Second paragraph"]} />
|
|
168
|
+
*/
|
|
169
|
+
export const P = (props) => <Text {...props} as="p" />;
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* PlainText - Text component with HTML processing disabled
|
|
173
|
+
* @param {Object} props - All Text props except 'html' (automatically set to false)
|
|
174
|
+
* @example
|
|
175
|
+
* <PlainText text="Display <strong>tags</strong> as literal text" />
|
|
176
|
+
*/
|
|
177
|
+
export const PlainText = (props) => <Text {...props} html={false} />;
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Div - Explicit div wrapper component
|
|
181
|
+
* @param {Object} props - All Text props except 'as' (automatically set to 'div')
|
|
182
|
+
* @example
|
|
183
|
+
* <Div text={["Item 1", "Item 2"]} lineAs="span" />
|
|
184
|
+
*/
|
|
185
|
+
export const Div = (props) => <Text {...props} as="div" />;
|
|
186
|
+
|
|
187
|
+
// Export all components
|
|
188
|
+
export default Text;
|
package/src/index.js
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { processSequence } from "./processors/sequence.js";
|
|
2
|
+
import { processGroups } from "./processors/groups.js";
|
|
3
|
+
import { processByType } from "./processors/byType.js";
|
|
4
|
+
import * as mappers from "./mappers/index.js";
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Parse ProseMirror/TipTap content into semantic structure
|
|
8
|
+
* @param {Object} doc - ProseMirror document
|
|
9
|
+
* @param {Object} options - Parsing options
|
|
10
|
+
* @param {boolean} options.parseCodeAsJson - Parse code blocks as JSON. Default: false
|
|
11
|
+
* @returns {Object} Parsed content structure
|
|
12
|
+
*/
|
|
13
|
+
function parseContent(doc, options = {}) {
|
|
14
|
+
// Default options
|
|
15
|
+
const opts = {
|
|
16
|
+
parseCodeAsJson: false,
|
|
17
|
+
...options,
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
// Process content in different ways
|
|
21
|
+
const sequence = processSequence(doc, opts);
|
|
22
|
+
|
|
23
|
+
const groups = processGroups(sequence, opts);
|
|
24
|
+
|
|
25
|
+
const byType = processByType(sequence);
|
|
26
|
+
|
|
27
|
+
return {
|
|
28
|
+
raw: doc,
|
|
29
|
+
sequence,
|
|
30
|
+
groups,
|
|
31
|
+
byType,
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export { parseContent, mappers };
|