@uniweb/semantic-parser 1.0.7 → 1.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -27,17 +27,17 @@ Gracefully handles content issues with silent, automatic cleanup:
27
27
  ```js
28
28
  const schema = {
29
29
  title: {
30
- path: "groups.main.header.title",
30
+ path: "groups.main.title",
31
31
  type: "plaintext", // Auto-strips HTML markup
32
32
  maxLength: 60 // Auto-truncates with smart boundaries
33
33
  },
34
34
  description: {
35
- path: "groups.main.body.paragraphs",
35
+ path: "groups.main.paragraphs",
36
36
  type: "excerpt", // Auto-creates excerpt from paragraphs
37
37
  maxLength: 150
38
38
  },
39
39
  image: {
40
- path: "groups.main.body.imgs[0].url",
40
+ path: "groups.main.imgs[0].url",
41
41
  type: "image", // Normalizes image data
42
42
  defaultValue: "/placeholder.jpg",
43
43
  treatEmptyAsDefault: true
@@ -74,7 +74,7 @@ Strips all HTML markup, returning clean text. Perfect for titles, labels, and an
74
74
  ```js
75
75
  {
76
76
  title: {
77
- path: "groups.main.header.title",
77
+ path: "groups.main.title",
78
78
  type: "plaintext",
79
79
  maxLength: 60, // Auto-truncate
80
80
  boundary: "word", // or "sentence", "character"
@@ -94,7 +94,7 @@ Preserves safe HTML while removing dangerous tags (script, iframe, etc.).
94
94
  ```js
95
95
  {
96
96
  description: {
97
- path: "groups.main.body.paragraphs[0]",
97
+ path: "groups.main.paragraphs[0]",
98
98
  type: "richtext",
99
99
  allowedTags: ["strong", "em", "a", "br"], // Customize allowed tags
100
100
  stripTags: ["script", "style"] // Additional tags to remove
@@ -112,7 +112,7 @@ Auto-generates excerpt from content, stripping markup and truncating intelligent
112
112
  ```js
113
113
  {
114
114
  excerpt: {
115
- path: "groups.main.body.paragraphs",
115
+ path: "groups.main.paragraphs",
116
116
  type: "excerpt",
117
117
  maxLength: 150,
118
118
  boundary: "word", // or "sentence"
@@ -131,7 +131,7 @@ Parses and optionally formats numbers.
131
131
  ```js
132
132
  {
133
133
  price: {
134
- path: "groups.main.header.title",
134
+ path: "groups.main.title",
135
135
  type: "number",
136
136
  format: {
137
137
  decimals: 2,
@@ -152,7 +152,7 @@ Normalizes image data structure.
152
152
  ```js
153
153
  {
154
154
  image: {
155
- path: "groups.main.body.imgs[0]",
155
+ path: "groups.main.imgs[0]",
156
156
  type: "image",
157
157
  defaultValue: "/placeholder.jpg",
158
158
  defaultAlt: "Image"
@@ -170,7 +170,7 @@ Normalizes link data structure.
170
170
  ```js
171
171
  {
172
172
  cta: {
173
- path: "groups.main.body.links[0]",
173
+ path: "groups.main.links[0]",
174
174
  type: "link"
175
175
  }
176
176
  }
@@ -212,34 +212,34 @@ const hints = mappers.validateSchema(parsed, schema, { mode: 'visual-editor' });
212
212
  // Component declares its content requirements
213
213
  const componentSchema = {
214
214
  brand: {
215
- path: "groups.main.header.pretitle",
215
+ path: "groups.main.pretitle",
216
216
  type: "plaintext",
217
217
  maxLength: 20,
218
218
  transform: (text) => text.toUpperCase()
219
219
  },
220
220
  title: {
221
- path: "groups.main.header.title",
221
+ path: "groups.main.title",
222
222
  type: "plaintext",
223
223
  maxLength: 60,
224
224
  required: true
225
225
  },
226
226
  subtitle: {
227
- path: "groups.main.header.subtitle",
227
+ path: "groups.main.subtitle",
228
228
  type: "plaintext",
229
229
  maxLength: 100
230
230
  },
231
231
  description: {
232
- path: "groups.main.body.paragraphs",
232
+ path: "groups.main.paragraphs",
233
233
  type: "excerpt",
234
234
  maxLength: 200
235
235
  },
236
236
  image: {
237
- path: "groups.main.body.imgs[0].url",
237
+ path: "groups.main.imgs[0].url",
238
238
  type: "image",
239
239
  defaultValue: "/placeholder.jpg"
240
240
  },
241
241
  cta: {
242
- path: "groups.main.body.links[0]",
242
+ path: "groups.main.links[0]",
243
243
  type: "link"
244
244
  }
245
245
  };
@@ -272,8 +272,8 @@ const heroData = mappers.extractors.hero(parsed);
272
272
 
273
273
  // Or use schema-based extraction
274
274
  const customData = mappers.extractBySchema(parsed, {
275
- title: "groups.main.header.title",
276
- image: { path: "groups.main.body.imgs[0].url", defaultValue: "/placeholder.jpg" }
275
+ title: "groups.main.title",
276
+ image: { path: "groups.main.imgs[0].url", defaultValue: "/placeholder.jpg" }
277
277
  });
278
278
  ```
279
279
 
@@ -291,7 +291,7 @@ const image = helpers.first(images, "/default.jpg");
291
291
  const lastParagraph = helpers.last(paragraphs);
292
292
 
293
293
  // Transform array
294
- const titles = helpers.transformArray(items, item => item.header.title);
294
+ const titles = helpers.transformArray(items, item => item.title);
295
295
 
296
296
  // Filter and transform
297
297
  const h2s = helpers.filterArray(headings, h => h.level === 2, h => h.content);
@@ -308,7 +308,7 @@ const cleanArray = helpers.compact([null, "text", "", undefined, "more"]);
308
308
 
309
309
  ```js
310
310
  // Get nested value safely
311
- const title = helpers.get(parsed, "groups.main.header.title", "Untitled");
311
+ const title = helpers.get(parsed, "groups.main.title", "Untitled");
312
312
 
313
313
  // Pick specific properties
314
314
  const metadata = helpers.pick(parsed.groups.main, ["header", "banner"]);
@@ -337,7 +337,7 @@ if (!validation.valid) {
337
337
  ```js
338
338
  // Wrap extraction in try-catch
339
339
  const safeExtractor = helpers.safe((parsed) => {
340
- return parsed.groups.main.header.title.toUpperCase();
340
+ return parsed.groups.main.title.toUpperCase();
341
341
  }, "DEFAULT");
342
342
 
343
343
  const title = safeExtractor(parsed); // Won't throw if path is invalid
@@ -350,24 +350,24 @@ const title = safeExtractor(parsed); // Won't throw if path is invalid
350
350
  ```js
351
351
  const { accessor } = mappers;
352
352
 
353
- // Simple path
354
- const title = accessor.getByPath(parsed, "groups.main.header.title");
353
+ // Simple path (flat structure)
354
+ const title = accessor.getByPath(parsed, "groups.main.title");
355
355
 
356
356
  // Array index notation
357
- const firstImage = accessor.getByPath(parsed, "groups.main.body.imgs[0].url");
357
+ const firstImage = accessor.getByPath(parsed, "groups.main.imgs[0].url");
358
358
 
359
359
  // With default value
360
- const image = accessor.getByPath(parsed, "groups.main.body.imgs[0].url", {
360
+ const image = accessor.getByPath(parsed, "groups.main.imgs[0].url", {
361
361
  defaultValue: "/placeholder.jpg"
362
362
  });
363
363
 
364
364
  // With transformation
365
- const description = accessor.getByPath(parsed, "groups.main.body.paragraphs", {
365
+ const description = accessor.getByPath(parsed, "groups.main.paragraphs", {
366
366
  transform: (paragraphs) => paragraphs.join(" ")
367
367
  });
368
368
 
369
369
  // Required field (throws if missing)
370
- const title = accessor.getByPath(parsed, "groups.main.header.title", {
370
+ const title = accessor.getByPath(parsed, "groups.main.title", {
371
371
  required: true
372
372
  });
373
373
  ```
@@ -378,22 +378,22 @@ Extract multiple fields at once using a schema:
378
378
 
379
379
  ```js
380
380
  const schema = {
381
- // Shorthand: just the path
382
- title: "groups.main.header.title",
381
+ // Shorthand: just the path (flat structure)
382
+ title: "groups.main.title",
383
383
 
384
384
  // Full config with options
385
385
  image: {
386
- path: "groups.main.body.imgs[0].url",
386
+ path: "groups.main.imgs[0].url",
387
387
  defaultValue: "/placeholder.jpg"
388
388
  },
389
389
 
390
390
  description: {
391
- path: "groups.main.body.paragraphs",
391
+ path: "groups.main.paragraphs",
392
392
  transform: (p) => p.join(" ")
393
393
  },
394
394
 
395
395
  cta: {
396
- path: "groups.main.body.links[0]",
396
+ path: "groups.main.links[0]",
397
397
  required: false
398
398
  }
399
399
  };
@@ -412,15 +412,15 @@ const data = accessor.extractBySchema(parsed, schema);
412
412
  Extract data from array of items:
413
413
 
414
414
  ```js
415
- // Simple: extract single field from each item
416
- const titles = accessor.mapArray(parsed, "groups.items", "header.title");
415
+ // Simple: extract single field from each item (flat structure)
416
+ const titles = accessor.mapArray(parsed, "groups.items", "title");
417
417
  // ["Item 1", "Item 2", "Item 3"]
418
418
 
419
419
  // Complex: extract multiple fields from each item
420
420
  const cards = accessor.mapArray(parsed, "groups.items", {
421
- title: "header.title",
422
- text: { path: "body.paragraphs", transform: p => p.join(" ") },
423
- image: { path: "body.imgs[0].url", defaultValue: "/default.jpg" }
421
+ title: "title",
422
+ text: { path: "paragraphs", transform: p => p.join(" ") },
423
+ image: { path: "imgs[0].url", defaultValue: "/default.jpg" }
424
424
  });
425
425
  // [
426
426
  // { title: "...", text: "...", image: "..." },
@@ -436,11 +436,11 @@ if (accessor.hasPath(parsed, "groups.main.banner.url")) {
436
436
  // Banner exists
437
437
  }
438
438
 
439
- // Get first existing path
439
+ // Get first existing path (flat structure)
440
440
  const image = accessor.getFirstExisting(parsed, [
441
441
  "groups.main.banner.url",
442
- "groups.main.body.imgs[0].url",
443
- "groups.items[0].body.imgs[0].url"
442
+ "groups.main.imgs[0].url",
443
+ "groups.items[0].imgs[0].url"
444
444
  ], "/fallback.jpg");
445
445
  ```
446
446
 
@@ -640,8 +640,8 @@ const enhancedData = {
640
640
  relatedPosts: helpers.transformArray(
641
641
  accessor.getByPath(parsed, "groups.items", { defaultValue: [] }),
642
642
  item => ({
643
- title: item.header.title,
644
- link: helpers.first(item.body.links)
643
+ title: item.title,
644
+ link: helpers.first(item.links)
645
645
  })
646
646
  ),
647
647
 
@@ -662,13 +662,13 @@ const componentSchema = {
662
662
  content: {
663
663
  type: "hero", // Use pre-built extractor
664
664
  // OR
665
- mapping: { // Use custom mapping
666
- brand: "groups.main.header.pretitle",
667
- title: "groups.main.header.title",
668
- subtitle: "groups.main.header.subtitle",
669
- image: { path: "groups.main.body.imgs[0].url", defaultValue: "/default.jpg" },
665
+ mapping: { // Use custom mapping (flat paths)
666
+ brand: "groups.main.pretitle",
667
+ title: "groups.main.title",
668
+ subtitle: "groups.main.subtitle",
669
+ image: { path: "groups.main.imgs[0].url", defaultValue: "/default.jpg" },
670
670
  actions: {
671
- path: "groups.main.body.links",
671
+ path: "groups.main.links",
672
672
  transform: links => links.map(l => ({ label: l.label, type: "primary" }))
673
673
  }
674
674
  }
@@ -274,9 +274,9 @@ function Card({ data }) {
274
274
  import { getByPath, extractBySchema } from '@uniweb/semantic-parser/mappers/accessor';
275
275
 
276
276
  const schema = {
277
- title: { path: 'groups.main.header.title' },
278
- subtitle: { path: 'groups.main.header.subtitle' },
279
- content: { path: 'groups.main.body.paragraphs' }
277
+ title: { path: 'groups.main.title' },
278
+ subtitle: { path: 'groups.main.subtitle' },
279
+ content: { path: 'groups.main.paragraphs' }
280
280
  };
281
281
 
282
282
  const data = extractBySchema(parsed, schema);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@uniweb/semantic-parser",
3
- "version": "1.0.7",
3
+ "version": "1.0.9",
4
4
  "description": "Semantic parser for ProseMirror/TipTap content structures",
5
5
  "type": "module",
6
6
  "main": "./src/index.js",
package/src/index.js CHANGED
@@ -1,6 +1,5 @@
1
1
  import { processSequence } from "./processors/sequence.js";
2
2
  import { processGroups } from "./processors/groups.js";
3
- import { processByType } from "./processors/byType.js";
4
3
  import * as mappers from "./mappers/index.js";
5
4
 
6
5
  /**
@@ -8,7 +7,7 @@ import * as mappers from "./mappers/index.js";
8
7
  * @param {Object} doc - ProseMirror document
9
8
  * @param {Object} options - Parsing options
10
9
  * @param {boolean} options.parseCodeAsJson - Parse code blocks as JSON. Default: false
11
- * @returns {Object} Parsed content structure
10
+ * @returns {Object} Flat content structure with sequence for ordered access
12
11
  */
13
12
  function parseContent(doc, options = {}) {
14
13
  // Default options
@@ -17,18 +16,17 @@ function parseContent(doc, options = {}) {
17
16
  ...options,
18
17
  };
19
18
 
20
- // Process content in different ways
19
+ // Process sequence (ordered elements)
21
20
  const sequence = processSequence(doc, opts);
22
21
 
22
+ // Process groups (semantic structure) - returns flat object
23
23
  const groups = processGroups(sequence, opts);
24
24
 
25
- const byType = processByType(sequence);
26
-
25
+ // Return flat structure with sequence at top level
27
26
  return {
28
27
  raw: doc,
29
28
  sequence,
30
- groups,
31
- byType,
29
+ ...groups, // Spread flat content: title, paragraphs, items, etc.
32
30
  };
33
31
  }
34
32