@gsriram24/structured-data-validator 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslint-header.txt +9 -0
- package/.releaserc.json +22 -0
- package/CHANGELOG.md +71 -0
- package/CODE_OF_CONDUCT.md +79 -0
- package/LICENSE +201 -0
- package/README.md +109 -0
- package/package.json +49 -0
- package/renovate.json +4 -0
- package/src/index.js +14 -0
- package/src/types/3DModel.js +21 -0
- package/src/types/AggregateOffer.js +23 -0
- package/src/types/AggregateRating.js +35 -0
- package/src/types/Answer.js +18 -0
- package/src/types/Article.js +26 -0
- package/src/types/Brand.js +18 -0
- package/src/types/BreadcrumbList.js +148 -0
- package/src/types/BroadcastEvent.js +23 -0
- package/src/types/Certification.js +26 -0
- package/src/types/Clip.js +25 -0
- package/src/types/DefinedRegion.js +38 -0
- package/src/types/Event.js +51 -0
- package/src/types/FAQPage.js +18 -0
- package/src/types/HowTo.js +27 -0
- package/src/types/HowToDirection.js +19 -0
- package/src/types/HowToSection.js +22 -0
- package/src/types/HowToStep.js +43 -0
- package/src/types/HowToTip.js +19 -0
- package/src/types/ImageObject.js +40 -0
- package/src/types/JobPosting.js +63 -0
- package/src/types/ListItem.js +28 -0
- package/src/types/LocalBusiness.js +30 -0
- package/src/types/MerchantReturnPolicy.js +96 -0
- package/src/types/Offer.js +39 -0
- package/src/types/OfferShippingDetails.js +27 -0
- package/src/types/Organization.js +18 -0
- package/src/types/PeopleAudience.js +37 -0
- package/src/types/Person.js +18 -0
- package/src/types/PriceSpecification.js +21 -0
- package/src/types/Product.js +90 -0
- package/src/types/ProductMerchant.js +88 -0
- package/src/types/QuantitativeValue.js +36 -0
- package/src/types/Question.js +21 -0
- package/src/types/Rating.js +56 -0
- package/src/types/Recipe.js +75 -0
- package/src/types/Review.js +35 -0
- package/src/types/SeekToAction.js +22 -0
- package/src/types/ShippingDeliveryTime.js +21 -0
- package/src/types/SizeSpecification.js +22 -0
- package/src/types/VideoObject.js +41 -0
- package/src/types/WebSite.js +23 -0
- package/src/types/base.js +201 -0
- package/src/types/schemaOrg.js +227 -0
- package/src/utils.js +15 -0
- package/src/validator.js +323 -0
package/src/validator.js
ADDED
|
@@ -0,0 +1,323 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright 2025 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
export class Validator {
|
|
13
|
+
constructor(schemaOrgJson) {
|
|
14
|
+
this.schemaOrgJson = schemaOrgJson;
|
|
15
|
+
if (this.schemaOrgJson) {
|
|
16
|
+
// Only add schema.org validation handler if schema is provided
|
|
17
|
+
this.globalHandlers = [() => import('./types/schemaOrg.js')];
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
this.debug = false;
|
|
21
|
+
|
|
22
|
+
this.registeredHandlers = {
|
|
23
|
+
'3DModel': [() => import('./types/3DModel.js')],
|
|
24
|
+
AggregateOffer: [() => import('./types/AggregateOffer.js')],
|
|
25
|
+
AggregateRating: [() => import('./types/AggregateRating.js')],
|
|
26
|
+
Answer: [() => import('./types/Answer.js')],
|
|
27
|
+
Article: [() => import('./types/Article.js')],
|
|
28
|
+
BlogPosting: [() => import('./types/Article.js')],
|
|
29
|
+
Brand: [() => import('./types/Brand.js')],
|
|
30
|
+
BreadcrumbList: [() => import('./types/BreadcrumbList.js')],
|
|
31
|
+
Certification: [() => import('./types/Certification.js')],
|
|
32
|
+
DefinedRegion: [() => import('./types/DefinedRegion.js')],
|
|
33
|
+
Event: [() => import('./types/Event.js')],
|
|
34
|
+
FAQPage: [() => import('./types/FAQPage.js')],
|
|
35
|
+
HowTo: [() => import('./types/HowTo.js')],
|
|
36
|
+
ImageObject: [() => import('./types/ImageObject.js')],
|
|
37
|
+
VideoObject: [() => import('./types/VideoObject.js')],
|
|
38
|
+
Clip: [() => import('./types/Clip.js')],
|
|
39
|
+
BroadcastEvent: [() => import('./types/BroadcastEvent.js')],
|
|
40
|
+
SeekToAction: [() => import('./types/SeekToAction.js')],
|
|
41
|
+
ListItem: [() => import('./types/ListItem.js')],
|
|
42
|
+
LocalBusiness: [() => import('./types/LocalBusiness.js')],
|
|
43
|
+
MerchantReturnPolicy: [() => import('./types/MerchantReturnPolicy.js')],
|
|
44
|
+
NewsArticle: [() => import('./types/Article.js')],
|
|
45
|
+
Offer: [() => import('./types/Offer.js')],
|
|
46
|
+
OfferShippingDetails: [() => import('./types/OfferShippingDetails.js')],
|
|
47
|
+
Organization: [() => import('./types/Organization.js')],
|
|
48
|
+
PeopleAudience: [() => import('./types/PeopleAudience.js')],
|
|
49
|
+
Person: [() => import('./types/Person.js')],
|
|
50
|
+
PriceSpecification: [() => import('./types/PriceSpecification.js')],
|
|
51
|
+
Product: [
|
|
52
|
+
() => import('./types/Product.js'),
|
|
53
|
+
() => import('./types/ProductMerchant.js'),
|
|
54
|
+
],
|
|
55
|
+
QuantitativeValue: [() => import('./types/QuantitativeValue.js')],
|
|
56
|
+
Question: [() => import('./types/Question.js')],
|
|
57
|
+
Rating: [() => import('./types/Rating.js')],
|
|
58
|
+
Review: [() => import('./types/Review.js')],
|
|
59
|
+
ShippingDeliveryTime: [() => import('./types/ShippingDeliveryTime.js')],
|
|
60
|
+
SizeSpecification: [() => import('./types/SizeSpecification.js')],
|
|
61
|
+
UnitPriceSpecification: [() => import('./types/PriceSpecification.js')],
|
|
62
|
+
JobPosting: [() => import('./types/JobPosting.js')],
|
|
63
|
+
Recipe: [() => import('./types/Recipe.js')],
|
|
64
|
+
HowToStep: [() => import('./types/HowToStep.js')],
|
|
65
|
+
HowToSection: [() => import('./types/HowToSection.js')],
|
|
66
|
+
HowToDirection: [() => import('./types/HowToDirection.js')],
|
|
67
|
+
HowToTip: [() => import('./types/HowToTip.js')],
|
|
68
|
+
WebSite: [() => import('./types/WebSite.js')],
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// Get parent types from schema.org JSON-LD
|
|
73
|
+
#getParentTypes(type) {
|
|
74
|
+
if (!this.schemaOrgJson) return [];
|
|
75
|
+
|
|
76
|
+
const graph = this.schemaOrgJson['@graph'];
|
|
77
|
+
if (!graph) return [];
|
|
78
|
+
|
|
79
|
+
const typeEntry = graph.find(
|
|
80
|
+
(e) =>
|
|
81
|
+
e['@type'] === 'rdfs:Class' &&
|
|
82
|
+
(e['@id'] === type ||
|
|
83
|
+
e['@id'] === `schema:${type}` ||
|
|
84
|
+
e['@id'] === `https://schema.org/${type}`),
|
|
85
|
+
);
|
|
86
|
+
|
|
87
|
+
if (!typeEntry || !typeEntry['rdfs:subClassOf']) return [];
|
|
88
|
+
|
|
89
|
+
const parents = Array.isArray(typeEntry['rdfs:subClassOf'])
|
|
90
|
+
? typeEntry['rdfs:subClassOf']
|
|
91
|
+
: [typeEntry['rdfs:subClassOf']];
|
|
92
|
+
|
|
93
|
+
return parents.map((p) => {
|
|
94
|
+
const id = p['@id'] || p;
|
|
95
|
+
return id
|
|
96
|
+
.replace('schema:', '')
|
|
97
|
+
.replace('https://schema.org/', '')
|
|
98
|
+
.replace('http://schema.org/', '');
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Get handlers for type, falling back to parent types if needed
|
|
103
|
+
async #getHandlersForType(type) {
|
|
104
|
+
// 1. Check direct mapping first (priority)
|
|
105
|
+
if (this.registeredHandlers[type]) {
|
|
106
|
+
return [...this.registeredHandlers[type]];
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// 2. If schemaOrgJson available, check parent types
|
|
110
|
+
if (this.schemaOrgJson) {
|
|
111
|
+
const visited = new Set();
|
|
112
|
+
const queue = [type];
|
|
113
|
+
|
|
114
|
+
while (queue.length > 0) {
|
|
115
|
+
const current = queue.shift();
|
|
116
|
+
if (visited.has(current)) continue;
|
|
117
|
+
visited.add(current);
|
|
118
|
+
|
|
119
|
+
// Check if parent has handlers
|
|
120
|
+
if (current !== type && this.registeredHandlers[current]) {
|
|
121
|
+
this.debug &&
|
|
122
|
+
console.debug(` Using ${current} handler for subtype ${type}`);
|
|
123
|
+
return [...this.registeredHandlers[current]];
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Add parent types to queue
|
|
127
|
+
const parents = this.#getParentTypes(current);
|
|
128
|
+
queue.push(...parents);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
return [];
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
async #validateSubtree(data, rootData, dataFormat, path = []) {
|
|
136
|
+
const spacing = ' ' + ' '.repeat(path.length);
|
|
137
|
+
|
|
138
|
+
if (Array.isArray(data)) {
|
|
139
|
+
const results = await Promise.all(
|
|
140
|
+
data.map(async (item, index) => {
|
|
141
|
+
let last = path[path.length - 1];
|
|
142
|
+
last = { ...last, index, length: data.length };
|
|
143
|
+
if (item['@type']) {
|
|
144
|
+
last.type = item['@type'];
|
|
145
|
+
}
|
|
146
|
+
return this.#validateSubtree(item, rootData, dataFormat, [
|
|
147
|
+
...path.slice(0, -1),
|
|
148
|
+
last,
|
|
149
|
+
]);
|
|
150
|
+
}),
|
|
151
|
+
);
|
|
152
|
+
return results.flat();
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
if (typeof data === 'object' && data !== null) {
|
|
156
|
+
if (!data['@type']) {
|
|
157
|
+
this.debug && console.warn(`${spacing} WARN: No type found for item`);
|
|
158
|
+
// TODO: Should return a validation error as type is missing,
|
|
159
|
+
// WAE is already returning an error
|
|
160
|
+
return [];
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
let types = [];
|
|
164
|
+
if (Array.isArray(data['@type'])) {
|
|
165
|
+
types = data['@type'];
|
|
166
|
+
} else {
|
|
167
|
+
types = [data['@type']];
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
const typeIssues = await Promise.all(
|
|
171
|
+
types.map(async (type) => {
|
|
172
|
+
this.debug &&
|
|
173
|
+
console.debug(
|
|
174
|
+
`${spacing}VALIDATE TYPE:`,
|
|
175
|
+
type,
|
|
176
|
+
JSON.stringify(path),
|
|
177
|
+
);
|
|
178
|
+
|
|
179
|
+
// Find supported handlers (check direct mapping first, then parent types)
|
|
180
|
+
const handlers = await this.#getHandlersForType(type);
|
|
181
|
+
if (!handlers || handlers.length === 0) {
|
|
182
|
+
this.debug &&
|
|
183
|
+
console.warn(
|
|
184
|
+
`${spacing} WARN: No handlers registered for type: ${type}`,
|
|
185
|
+
);
|
|
186
|
+
return [];
|
|
187
|
+
}
|
|
188
|
+
handlers.push(...(this.globalHandlers || []));
|
|
189
|
+
|
|
190
|
+
const handlerPromises = handlers.map(async (handler) => {
|
|
191
|
+
const handlerClass = (await handler()).default;
|
|
192
|
+
const handlerInstance = new handlerClass({
|
|
193
|
+
dataFormat,
|
|
194
|
+
path,
|
|
195
|
+
// If an object has multiple types, we need to pass the current type for any global handlers
|
|
196
|
+
type,
|
|
197
|
+
schemaOrgJson: this.schemaOrgJson,
|
|
198
|
+
});
|
|
199
|
+
return handlerInstance.validate(data);
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
// Wait for all handlers to complete
|
|
203
|
+
const handlerResults = (await Promise.all(handlerPromises)).flat();
|
|
204
|
+
|
|
205
|
+
for (const issue of handlerResults) {
|
|
206
|
+
this.debug && console.debug(`${spacing} ISSUE:`, issue);
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
return handlerResults;
|
|
210
|
+
}),
|
|
211
|
+
);
|
|
212
|
+
|
|
213
|
+
// Check properties for subtypes
|
|
214
|
+
const properties = Object.keys(data).filter(
|
|
215
|
+
(key) =>
|
|
216
|
+
// Ignore LD-JSON properties
|
|
217
|
+
!key.startsWith('@') &&
|
|
218
|
+
data[key] !== null &&
|
|
219
|
+
data[key] !== undefined &&
|
|
220
|
+
// Array of objects
|
|
221
|
+
// Array of objects
|
|
222
|
+
((Array.isArray(data[key]) &&
|
|
223
|
+
data[key].length > 0 &&
|
|
224
|
+
typeof data[key][0] === 'object') ||
|
|
225
|
+
// Object
|
|
226
|
+
(!Array.isArray(data[key]) && typeof data[key] === 'object')),
|
|
227
|
+
);
|
|
228
|
+
if (this.debug && properties.length > 0) {
|
|
229
|
+
console.debug(`${spacing}PROPERTIES:`, properties);
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
const propertyIssues = await Promise.all(
|
|
233
|
+
properties.map((property) => {
|
|
234
|
+
const newPathElem = { property };
|
|
235
|
+
if (data[property]?.['@type']) {
|
|
236
|
+
newPathElem.type = data[property]['@type'];
|
|
237
|
+
}
|
|
238
|
+
return this.#validateSubtree(data[property], rootData, dataFormat, [
|
|
239
|
+
...path,
|
|
240
|
+
newPathElem,
|
|
241
|
+
]);
|
|
242
|
+
}),
|
|
243
|
+
);
|
|
244
|
+
|
|
245
|
+
return [...typeIssues.flat(), ...propertyIssues.flat()];
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
return [];
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* Validates structured data
|
|
253
|
+
* @param {object} waeData Data as parsed from Web Auto Extractor
|
|
254
|
+
* @returns {object[]} Array of validation issues
|
|
255
|
+
*/
|
|
256
|
+
async validate(waeData) {
|
|
257
|
+
const dataFormats = ['jsonld', 'microdata', 'rdfa'];
|
|
258
|
+
|
|
259
|
+
const results = [];
|
|
260
|
+
|
|
261
|
+
for (const dataFormat of dataFormats) {
|
|
262
|
+
if (
|
|
263
|
+
!waeData[dataFormat] ||
|
|
264
|
+
Object.keys(waeData[dataFormat]).length === 0
|
|
265
|
+
) {
|
|
266
|
+
continue;
|
|
267
|
+
}
|
|
268
|
+
this.debug && console.debug('DATA FORMAT:', dataFormat);
|
|
269
|
+
const rootTypes = Object.keys(waeData[dataFormat]);
|
|
270
|
+
|
|
271
|
+
// Validate root type items
|
|
272
|
+
for (const rootType of rootTypes) {
|
|
273
|
+
this.debug && console.debug(' ROOT TYPE:', rootType);
|
|
274
|
+
const rootTypeItems = waeData[dataFormat][rootType];
|
|
275
|
+
|
|
276
|
+
// Validate each root type item
|
|
277
|
+
for (const [index, item] of rootTypeItems.entries()) {
|
|
278
|
+
const location = item['@location'];
|
|
279
|
+
delete item['@location'];
|
|
280
|
+
|
|
281
|
+
const issues = await this.#validateSubtree(item, item, dataFormat, [
|
|
282
|
+
{ type: rootType, index },
|
|
283
|
+
]);
|
|
284
|
+
issues.forEach((issue) => {
|
|
285
|
+
let source = item['@source'];
|
|
286
|
+
if (!source && dataFormat === 'jsonld') {
|
|
287
|
+
source = JSON.stringify(item);
|
|
288
|
+
}
|
|
289
|
+
results.push({
|
|
290
|
+
rootType,
|
|
291
|
+
dataFormat,
|
|
292
|
+
location,
|
|
293
|
+
source,
|
|
294
|
+
...issue,
|
|
295
|
+
});
|
|
296
|
+
});
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
// Expose WAE errors, filter out metadata errors
|
|
302
|
+
const errors = waeData.errors?.filter((e) =>
|
|
303
|
+
dataFormats.includes(e.format),
|
|
304
|
+
);
|
|
305
|
+
for (const error of errors || []) {
|
|
306
|
+
const result = {
|
|
307
|
+
dataFormat: error.format,
|
|
308
|
+
issueMessage: error.message,
|
|
309
|
+
rootType: error.format,
|
|
310
|
+
severity: 'ERROR',
|
|
311
|
+
};
|
|
312
|
+
if (error.sourceCodeLocation) {
|
|
313
|
+
result.location = `${error.sourceCodeLocation.startOffset},${error.sourceCodeLocation.endOffset}`;
|
|
314
|
+
}
|
|
315
|
+
if (error.source) {
|
|
316
|
+
result.source = error.source;
|
|
317
|
+
}
|
|
318
|
+
results.push(result);
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
return results;
|
|
322
|
+
}
|
|
323
|
+
}
|