@uniweb/semantic-parser 1.1.3 → 1.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/AGENTS.md CHANGED
@@ -64,12 +64,13 @@ The parser returns a flat content structure:
64
64
  subtitle2: '', // Third heading level
65
65
  paragraphs: [],
66
66
  links: [], // All link-like entities (including buttons, documents)
67
- imgs: [],
67
+ images: [],
68
68
  icons: [],
69
69
  videos: [],
70
70
  lists: [],
71
71
  quotes: [],
72
- data: {}, // Structured data (tagged code blocks, forms, cards)
72
+ snippets: [], // Fenced code [{ language, code }]
73
+ data: {}, // Structured data (tagged data blocks, forms, cards)
73
74
  headings: [], // Overflow headings after title/subtitle/subtitle2
74
75
  items: [], // Child content groups (same structure recursively)
75
76
  }
@@ -127,9 +128,9 @@ Editor-specific nodes are mapped to standard entities:
127
128
 
128
129
  See `docs/entity-consolidation.md` for complete mapping documentation.
129
130
 
130
- ### Tagged Code Blocks
131
+ ### Tagged Data Blocks
131
132
 
132
- Code blocks with tags route parsed data to the `data` object:
133
+ Data blocks with tags route parsed data to the `data` object:
133
134
 
134
135
  ```markdown
135
136
  ```yaml:nav-links
package/README.md CHANGED
@@ -73,7 +73,7 @@ result = {
73
73
  // Body fields
74
74
  paragraphs: ["Get started today."],
75
75
  links: [], // All links (including buttons, documents)
76
- imgs: [],
76
+ images: [],
77
77
  videos: [],
78
78
  icons: [],
79
79
  lists: [],
@@ -164,7 +164,7 @@ const schema = {
164
164
  maxLength: 150
165
165
  },
166
166
  image: {
167
- path: "imgs[0].url",
167
+ path: "images[0].url",
168
168
  type: "image",
169
169
  defaultValue: "/placeholder.jpg"
170
170
  }
@@ -214,7 +214,7 @@ const schema = {
214
214
  title: "title",
215
215
  subtitle: "subtitle",
216
216
  image: {
217
- path: "imgs[0].url",
217
+ path: "images[0].url",
218
218
  defaultValue: "/placeholder.jpg"
219
219
  },
220
220
  actions: {
package/docs/api.md CHANGED
@@ -130,7 +130,7 @@ Content organized into semantic groups with identified main content and items. T
130
130
 
131
131
  // Body fields (flat)
132
132
  paragraphs: ["paragraph text", ...],
133
- imgs: [
133
+ images: [
134
134
  { url: "...", caption: "...", alt: "..." }
135
135
  ],
136
136
  icons: ["<svg>...</svg>", ...],
@@ -160,7 +160,7 @@ Content organized into semantic groups with identified main content and items. T
160
160
  },
161
161
  items: [
162
162
  // Array of groups with same flat structure as main
163
- // { title, pretitle, subtitle, paragraphs, imgs, ... }
163
+ // { title, pretitle, subtitle, paragraphs, images, ... }
164
164
  ],
165
165
  metadata: {
166
166
  dividerMode: false, // Whether dividers were used for grouping
@@ -29,7 +29,7 @@ After consolidation, the parser outputs this flat structure:
29
29
  // Body fields
30
30
  paragraphs: [], // Text blocks with inline HTML formatting
31
31
  links: [], // All link-like entities (buttons, documents, nav links)
32
- imgs: [], // All images (with role distinguishing purpose)
32
+ images: [], // All images (with role distinguishing purpose)
33
33
  videos: [], // Video embeds
34
34
  icons: [], // Standalone icons
35
35
  lists: [], // Bullet/ordered lists (recursive structure)
@@ -94,7 +94,7 @@ All link-like content merges into the `links` array. The `role` attribute distin
94
94
 
95
95
  ### Images
96
96
 
97
- All image content uses the `imgs` array. The `role` attribute distinguishes purpose.
97
+ All image content uses the `images` array. The `role` attribute distinguishes purpose.
98
98
 
99
99
  ```js
100
100
  {
@@ -37,7 +37,7 @@ const schema = {
37
37
  maxLength: 150
38
38
  },
39
39
  image: {
40
- path: "groups.main.imgs[0].url",
40
+ path: "groups.main.images[0].url",
41
41
  type: "image", // Normalizes image data
42
42
  defaultValue: "/placeholder.jpg",
43
43
  treatEmptyAsDefault: true
@@ -152,7 +152,7 @@ Normalizes image data structure.
152
152
  ```js
153
153
  {
154
154
  image: {
155
- path: "groups.main.imgs[0]",
155
+ path: "groups.main.images[0]",
156
156
  type: "image",
157
157
  defaultValue: "/placeholder.jpg",
158
158
  defaultAlt: "Image"
@@ -234,7 +234,7 @@ const componentSchema = {
234
234
  maxLength: 200
235
235
  },
236
236
  image: {
237
- path: "groups.main.imgs[0].url",
237
+ path: "groups.main.images[0].url",
238
238
  type: "image",
239
239
  defaultValue: "/placeholder.jpg"
240
240
  },
@@ -273,7 +273,7 @@ const heroData = mappers.extractors.hero(parsed);
273
273
  // Or use schema-based extraction
274
274
  const customData = mappers.extractBySchema(parsed, {
275
275
  title: "groups.main.title",
276
- image: { path: "groups.main.imgs[0].url", defaultValue: "/placeholder.jpg" }
276
+ image: { path: "groups.main.images[0].url", defaultValue: "/placeholder.jpg" }
277
277
  });
278
278
  ```
279
279
 
@@ -354,10 +354,10 @@ const { accessor } = mappers;
354
354
  const title = accessor.getByPath(parsed, "groups.main.title");
355
355
 
356
356
  // Array index notation
357
- const firstImage = accessor.getByPath(parsed, "groups.main.imgs[0].url");
357
+ const firstImage = accessor.getByPath(parsed, "groups.main.images[0].url");
358
358
 
359
359
  // With default value
360
- const image = accessor.getByPath(parsed, "groups.main.imgs[0].url", {
360
+ const image = accessor.getByPath(parsed, "groups.main.images[0].url", {
361
361
  defaultValue: "/placeholder.jpg"
362
362
  });
363
363
 
@@ -383,7 +383,7 @@ const schema = {
383
383
 
384
384
  // Full config with options
385
385
  image: {
386
- path: "groups.main.imgs[0].url",
386
+ path: "groups.main.images[0].url",
387
387
  defaultValue: "/placeholder.jpg"
388
388
  },
389
389
 
@@ -420,7 +420,7 @@ const titles = accessor.mapArray(parsed, "groups.items", "title");
420
420
  const cards = accessor.mapArray(parsed, "groups.items", {
421
421
  title: "title",
422
422
  text: { path: "paragraphs", transform: p => p.join(" ") },
423
- image: { path: "imgs[0].url", defaultValue: "/default.jpg" }
423
+ image: { path: "images[0].url", defaultValue: "/default.jpg" }
424
424
  });
425
425
  // [
426
426
  // { title: "...", text: "...", image: "..." },
@@ -439,8 +439,8 @@ if (accessor.hasPath(parsed, "groups.main.banner.url")) {
439
439
  // Get first existing path (flat structure)
440
440
  const image = accessor.getFirstExisting(parsed, [
441
441
  "groups.main.banner.url",
442
- "groups.main.imgs[0].url",
443
- "groups.items[0].imgs[0].url"
442
+ "groups.main.images[0].url",
443
+ "groups.items[0].images[0].url"
444
444
  ], "/fallback.jpg");
445
445
  ```
446
446
 
@@ -666,7 +666,7 @@ const componentSchema = {
666
666
  brand: "groups.main.pretitle",
667
667
  title: "groups.main.title",
668
668
  subtitle: "groups.main.subtitle",
669
- image: { path: "groups.main.imgs[0].url", defaultValue: "/default.jpg" },
669
+ image: { path: "groups.main.images[0].url", defaultValue: "/default.jpg" },
670
670
  actions: {
671
671
  path: "groups.main.links",
672
672
  transform: links => links.map(l => ({ label: l.label, type: "primary" }))
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@uniweb/semantic-parser",
3
- "version": "1.1.3",
3
+ "version": "1.1.5",
4
4
  "description": "Semantic parser for ProseMirror/TipTap content structures",
5
5
  "type": "module",
6
6
  "main": "./src/index.js",
@@ -6,7 +6,7 @@ import { applyType, validateType } from './types.js';
6
6
 
7
7
  /**
8
8
  * Parse a path string into segments, handling array indices
9
- * @param {string} path - Path string (e.g., 'groups.main.body.imgs[0].url')
9
+ * @param {string} path - Path string (e.g., 'groups.main.body.images[0].url')
10
10
  * @returns {Array} Array of path segments
11
11
  */
12
12
  function parsePath(path) {
@@ -118,7 +118,7 @@ function getByPath(parsed, path, options = {}) {
118
118
  * maxLength: 60
119
119
  * },
120
120
  * image: {
121
- * path: 'groups.main.body.imgs[0].url',
121
+ * path: 'groups.main.body.images[0].url',
122
122
  * type: 'image',
123
123
  * defaultValue: '/placeholder.jpg'
124
124
  * },
@@ -2,7 +2,7 @@
2
2
  * Pre-built extractors for common component patterns
3
3
  *
4
4
  * All extractors work with the flat content structure:
5
- * - Root level: title, pretitle, subtitle, paragraphs, links, imgs, items, etc.
5
+ * - Root level: title, pretitle, subtitle, paragraphs, links, images, items, etc.
6
6
  * - Items array: each item has flat structure (title, paragraphs, etc.)
7
7
  */
8
8
 
@@ -25,8 +25,8 @@ function hero(parsed) {
25
25
  subtitle: parsed?.subtitle || null,
26
26
  kicker: parsed?.pretitle || null,
27
27
  description: parsed?.paragraphs || [],
28
- image: first(parsed?.imgs)?.url || null,
29
- imageAlt: first(parsed?.imgs)?.alt || null,
28
+ image: first(parsed?.images)?.url || null,
29
+ imageAlt: first(parsed?.images)?.alt || null,
30
30
  banner: null, // Banner detection would need to be added separately
31
31
  cta: buttonLink || plainLink || null,
32
32
  };
@@ -56,8 +56,8 @@ function card(parsed, options = {}) {
56
56
  title: content.title || null,
57
57
  subtitle: content.subtitle || null,
58
58
  description: content.paragraphs || [],
59
- image: first(content.imgs)?.url || null,
60
- imageAlt: first(content.imgs)?.alt || null,
59
+ image: first(content.images)?.url || null,
60
+ imageAlt: first(content.images)?.alt || null,
61
61
  icon: first(content.icons) || null,
62
62
  link: plainLink || null,
63
63
  cta: buttonLink || plainLink || null,
@@ -91,7 +91,7 @@ function article(parsed) {
91
91
  date: null, // Would need metadata support
92
92
  banner: null, // Banner detection would need to be added separately
93
93
  content: parsed?.paragraphs || [],
94
- images: parsed?.imgs || [],
94
+ images: parsed?.images || [],
95
95
  videos: parsed?.videos || [],
96
96
  links: parsed?.links || [],
97
97
  };
@@ -166,7 +166,7 @@ function features(parsed) {
166
166
  subtitle: item.subtitle || null,
167
167
  description: item.paragraphs || [],
168
168
  icon: first(item.icons) || null,
169
- image: first(item.imgs)?.url || null,
169
+ image: first(item.images)?.url || null,
170
170
  link: first(item.links) || null,
171
171
  }))
172
172
  .filter((feature) => feature.title);
@@ -192,8 +192,8 @@ function testimonial(parsed, options = {}) {
192
192
  author: content.title || null,
193
193
  role: content.subtitle || null,
194
194
  company: content.pretitle || null,
195
- image: first(content.imgs)?.url || null,
196
- imageAlt: first(content.imgs)?.alt || null,
195
+ image: first(content.images)?.url || null,
196
+ imageAlt: first(content.images)?.alt || null,
197
197
  };
198
198
  };
199
199
 
@@ -275,8 +275,8 @@ function team(parsed) {
275
275
  role: item.subtitle || null,
276
276
  department: item.pretitle || null,
277
277
  bio: item.paragraphs || [],
278
- image: first(item.imgs)?.url || null,
279
- imageAlt: first(item.imgs)?.alt || null,
278
+ image: first(item.images)?.url || null,
279
+ imageAlt: first(item.images)?.alt || null,
280
280
  links: item.links || [],
281
281
  }))
282
282
  .filter((member) => member.name);
@@ -296,14 +296,14 @@ function gallery(parsed, options = {}) {
296
296
  const images = [];
297
297
 
298
298
  if (source === "main" || source === "all") {
299
- const mainImages = parsed?.imgs || [];
299
+ const mainImages = parsed?.images || [];
300
300
  images.push(...mainImages);
301
301
  }
302
302
 
303
303
  if (source === "items" || source === "all") {
304
304
  const items = parsed?.items || [];
305
305
  items.forEach((item) => {
306
- const itemImages = item.imgs || [];
306
+ const itemImages = item.images || [];
307
307
  images.push(...itemImages);
308
308
  });
309
309
  }
@@ -339,12 +339,12 @@ function legacy(parsed) {
339
339
  const transformToNested = (content) => {
340
340
  if (!content) return null;
341
341
 
342
- let imgs = content.imgs || [];
343
- let banner = imgs.filter((item) => {
342
+ let images = content.images || [];
343
+ let banner = images.filter((item) => {
344
344
  return (item.role = "banner");
345
345
  })?.[0];
346
346
 
347
- if (!banner) banner = imgs[0];
347
+ if (!banner) banner = images[0];
348
348
 
349
349
  // Reconstruct deprecated fields from new structure
350
350
  const links = content.links || [];
@@ -377,7 +377,7 @@ function legacy(parsed) {
377
377
  body: {
378
378
  paragraphs: content.paragraphs || [],
379
379
  headings: content.headings || [],
380
- imgs,
380
+ images,
381
381
  videos: content.videos || [],
382
382
  lists: content.lists || [],
383
383
  links: plainLinks,
@@ -12,11 +12,12 @@ function flattenGroup(group) {
12
12
  subtitle2: group.header.subtitle2 || '',
13
13
  paragraphs: group.body.paragraphs || [],
14
14
  links: group.body.links || [],
15
- imgs: group.body.imgs || [],
15
+ images: group.body.images || [],
16
16
  icons: group.body.icons || [],
17
17
  lists: group.body.lists || [],
18
18
  videos: group.body.videos || [],
19
19
  insets: group.body.insets || [],
20
+ snippets: group.body.snippets || [],
20
21
  data: group.body.data || {},
21
22
  quotes: group.body.quotes || [],
22
23
  headings: group.body.headings || [],
@@ -39,11 +40,12 @@ function processGroups(sequence, options = {}) {
39
40
  subtitle2: '',
40
41
  paragraphs: [],
41
42
  links: [],
42
- imgs: [],
43
+ images: [],
43
44
  icons: [],
44
45
  lists: [],
45
46
  videos: [],
46
47
  insets: [],
48
+ snippets: [],
47
49
  data: {},
48
50
  quotes: [],
49
51
  headings: [],
@@ -76,7 +78,7 @@ function processGroups(sequence, options = {}) {
76
78
  subtitle2: '',
77
79
  paragraphs: [],
78
80
  links: [],
79
- imgs: [],
81
+ images: [],
80
82
  icons: [],
81
83
  lists: [],
82
84
  videos: [],
@@ -173,6 +175,7 @@ function isBannerImage(sequence, i) {
173
175
 
174
176
  function readHeadingGroup(sequence, startIdx) {
175
177
  const elements = [sequence[startIdx]];
178
+ let hasGoneDeeper = false;
176
179
 
177
180
  // Iterate starting from the next element
178
181
  for (let i = startIdx + 1; i < sequence.length; i++) {
@@ -186,6 +189,7 @@ function readHeadingGroup(sequence, startIdx) {
186
189
  // Case 1: Strictly Deeper (Standard Subtitle/Deep Header)
187
190
  // e.g. H1 -> H2
188
191
  if (element.level > previousElement.level) {
192
+ hasGoneDeeper = true;
189
193
  elements.push(element);
190
194
  continue;
191
195
  }
@@ -198,7 +202,18 @@ function readHeadingGroup(sequence, startIdx) {
198
202
  continue;
199
203
  }
200
204
 
201
- // Otherwise (Sibling or New Section), stop.
205
+ // Case 3: Same Level Continuation (multi-line heading)
206
+ // Only before going deeper — once a subtitle level is reached,
207
+ // same-level headings are new sections, not continuations.
208
+ // e.g. H1 -> H1 → merged into title array
209
+ // but H1 -> H2 -> H2 → second H2 starts a new group (items)
210
+ if (element.level === previousElement.level && !hasGoneDeeper) {
211
+ elements.push(element);
212
+ continue;
213
+ }
214
+
215
+ // Otherwise (New Section — went deeper then back up, or
216
+ // same-level after going deeper), stop.
202
217
  break;
203
218
  }
204
219
  return elements;
@@ -216,10 +231,11 @@ function processGroupContent(elements) {
216
231
  };
217
232
 
218
233
  const body = {
219
- imgs: [],
234
+ images: [],
220
235
  icons: [],
221
236
  videos: [],
222
237
  insets: [],
238
+ snippets: [],
223
239
  paragraphs: [],
224
240
  links: [],
225
241
  lists: [],
@@ -240,6 +256,10 @@ function processGroupContent(elements) {
240
256
  metadata,
241
257
  };
242
258
 
259
+ // Track last assigned heading slot and level for same-level merging
260
+ let lastSlot = null;
261
+ let lastLevel = null;
262
+
243
263
  for (let i = 0; i < elements.length; i++) {
244
264
  //We shuold only set pretitle once
245
265
  if (isPreTitle(elements, i) && !header.pretitle) {
@@ -256,19 +276,29 @@ function processGroupContent(elements) {
256
276
  //We shuold set the group level to the highest one instead of the first one.
257
277
  metadata.level ??= element.level;
258
278
 
259
- // h3 h2 h1 h1
260
- // Assign to header fields
261
- // h3 h2 h3 h4
262
- if (!header.title) {
279
+ // Same level as last assigned → merge into same slot as array
280
+ if (lastLevel !== null && element.level === lastLevel && lastSlot) {
281
+ const current = header[lastSlot];
282
+ if (Array.isArray(current)) {
283
+ current.push(element.text);
284
+ } else {
285
+ header[lastSlot] = [current, element.text];
286
+ }
287
+ } else if (!header.title) {
263
288
  header.title = element.text;
289
+ lastSlot = 'title';
264
290
  } else if (!header.subtitle) {
265
291
  header.subtitle = element.text;
292
+ lastSlot = 'subtitle';
266
293
  } else if (!header.subtitle2) {
267
294
  header.subtitle2 = element.text;
295
+ lastSlot = 'subtitle2';
268
296
  } else {
269
297
  // After subtitle2, we're in body - collect heading
270
298
  body.headings.push(element.text);
299
+ lastSlot = null;
271
300
  }
301
+ lastLevel = element.level;
272
302
  } else if (element.type === "list") {
273
303
  const listItems = element.children;
274
304
 
@@ -293,7 +323,7 @@ function processGroupContent(elements) {
293
323
  if (element.attrs?.role === "icon") {
294
324
  body.icons.push(element.attrs);
295
325
  } else {
296
- body.imgs.push(preserveProps);
326
+ body.images.push(preserveProps);
297
327
  }
298
328
  break;
299
329
 
@@ -339,11 +369,16 @@ function processGroupContent(elements) {
339
369
  break;
340
370
 
341
371
  case "codeBlock":
342
- // Fallback: tagged code blocks where parsing failed at build time
343
- // Untagged blocks stay in sequence for display
344
372
  const tag = element.attrs?.tag;
345
373
  if (tag) {
374
+ // Tagged block where parsing failed at build time — store as data
346
375
  body.data[tag] = element.text;
376
+ } else {
377
+ // Untagged code block — collect as a snippet
378
+ body.snippets.push({
379
+ language: element.attrs?.language || '',
380
+ code: typeof element.text === 'string' ? element.text : '',
381
+ });
347
382
  }
348
383
  break;
349
384
 
@@ -187,7 +187,7 @@ function processGroupContent(elements) {
187
187
  };
188
188
  let banner = null;
189
189
  const body = {
190
- imgs: [],
190
+ images: [],
191
191
  icons: [],
192
192
  videos: [],
193
193
  paragraphs: [],
@@ -259,7 +259,7 @@ function processGroupContent(elements) {
259
259
  break;
260
260
 
261
261
  case "image":
262
- body.imgs.push({
262
+ body.images.push({
263
263
  url: element.src,
264
264
  caption: element.caption,
265
265
  alt: element.alt,