@uniweb/semantic-parser 1.0.8 → 1.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +42 -25
- package/README.md +52 -104
- package/docs/api.md +38 -40
- package/docs/mapping-patterns.md +47 -47
- package/docs/text-component-reference.md +3 -3
- package/package.json +4 -1
- package/src/index.js +5 -7
- package/src/mappers/extractors.js +113 -120
- package/src/processors/groups.js +105 -30
- package/src/processors/sequence.js +59 -11
- package/src/processors/byType.js +0 -130
package/src/processors/byType.js
DELETED
|
@@ -1,130 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Organize content elements by their type while preserving context
|
|
3
|
-
* @param {Array} sequence Flat sequence of elements
|
|
4
|
-
* @returns {Object} Content organized by type
|
|
5
|
-
*/
|
|
6
|
-
function processByType(sequence) {
|
|
7
|
-
const collections = {
|
|
8
|
-
headings: [],
|
|
9
|
-
paragraphs: [],
|
|
10
|
-
images: {
|
|
11
|
-
background: [],
|
|
12
|
-
content: [],
|
|
13
|
-
gallery: [],
|
|
14
|
-
icon: [],
|
|
15
|
-
},
|
|
16
|
-
lists: [],
|
|
17
|
-
dividers: [],
|
|
18
|
-
metadata: {
|
|
19
|
-
totalElements: sequence.length,
|
|
20
|
-
dominantType: null,
|
|
21
|
-
hasMedia: false,
|
|
22
|
-
},
|
|
23
|
-
};
|
|
24
|
-
|
|
25
|
-
// Track type frequencies for metadata
|
|
26
|
-
const typeFrequency = new Map();
|
|
27
|
-
|
|
28
|
-
sequence.forEach((element, index) => {
|
|
29
|
-
// Track element type frequency
|
|
30
|
-
typeFrequency.set(element.type, (typeFrequency.get(element.type) || 0) + 1);
|
|
31
|
-
|
|
32
|
-
// Add context information
|
|
33
|
-
const context = getElementContext(sequence, index);
|
|
34
|
-
const enrichedElement = { ...element, context };
|
|
35
|
-
|
|
36
|
-
// Process element based on type
|
|
37
|
-
switch (element.type) {
|
|
38
|
-
case "heading":
|
|
39
|
-
collections.headings.push(enrichedElement);
|
|
40
|
-
break;
|
|
41
|
-
|
|
42
|
-
case "paragraph":
|
|
43
|
-
collections.paragraphs.push(enrichedElement);
|
|
44
|
-
break;
|
|
45
|
-
|
|
46
|
-
case "image": {
|
|
47
|
-
// Support both attrs.role and top-level role for backwards compatibility
|
|
48
|
-
const role = element.attrs?.role || element.role || "content";
|
|
49
|
-
if (!collections.images[role]) {
|
|
50
|
-
collections.images[role] = [];
|
|
51
|
-
}
|
|
52
|
-
collections.images[role].push(enrichedElement);
|
|
53
|
-
collections.metadata.hasMedia = true;
|
|
54
|
-
break;
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
case "list":
|
|
58
|
-
collections.lists.push(enrichedElement);
|
|
59
|
-
break;
|
|
60
|
-
|
|
61
|
-
case "divider":
|
|
62
|
-
collections.dividers.push(enrichedElement);
|
|
63
|
-
break;
|
|
64
|
-
}
|
|
65
|
-
});
|
|
66
|
-
|
|
67
|
-
// Calculate dominant type
|
|
68
|
-
let maxFrequency = 0;
|
|
69
|
-
typeFrequency.forEach((frequency, type) => {
|
|
70
|
-
if (frequency > maxFrequency) {
|
|
71
|
-
maxFrequency = frequency;
|
|
72
|
-
collections.metadata.dominantType = type;
|
|
73
|
-
}
|
|
74
|
-
});
|
|
75
|
-
|
|
76
|
-
// Add helper methods
|
|
77
|
-
addCollectionHelpers(collections);
|
|
78
|
-
|
|
79
|
-
return collections;
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
/**
|
|
83
|
-
* Get context information for an element
|
|
84
|
-
*/
|
|
85
|
-
function getElementContext(sequence, position) {
|
|
86
|
-
const context = {
|
|
87
|
-
position,
|
|
88
|
-
previousElement: position > 0 ? sequence[position - 1] : null,
|
|
89
|
-
nextElement: position < sequence.length - 1 ? sequence[position + 1] : null,
|
|
90
|
-
nearestHeading: null,
|
|
91
|
-
};
|
|
92
|
-
|
|
93
|
-
// Find nearest preceding heading
|
|
94
|
-
for (let i = position - 1; i >= 0; i--) {
|
|
95
|
-
if (sequence[i].type === "heading") {
|
|
96
|
-
context.nearestHeading = sequence[i];
|
|
97
|
-
break;
|
|
98
|
-
}
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
return context;
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
/**
|
|
105
|
-
* Add helper methods to collections
|
|
106
|
-
*/
|
|
107
|
-
function addCollectionHelpers(collections) {
|
|
108
|
-
// Get headings of specific level
|
|
109
|
-
collections.getHeadingsByLevel = function (level) {
|
|
110
|
-
return this.headings.filter((h) => h.level === level);
|
|
111
|
-
};
|
|
112
|
-
|
|
113
|
-
// Get elements by heading context
|
|
114
|
-
collections.getElementsByHeadingContext = function (headingFilter) {
|
|
115
|
-
const allElements = [
|
|
116
|
-
...this.paragraphs,
|
|
117
|
-
...Object.values(this.images).flat(),
|
|
118
|
-
...this.lists,
|
|
119
|
-
];
|
|
120
|
-
|
|
121
|
-
return allElements.filter(
|
|
122
|
-
(el) =>
|
|
123
|
-
el.context?.nearestHeading && headingFilter(el.context.nearestHeading)
|
|
124
|
-
);
|
|
125
|
-
};
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
export {
|
|
129
|
-
processByType
|
|
130
|
-
};
|