@gsriram24/structured-data-validator 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/.eslint-header.txt +9 -0
  2. package/.releaserc.json +22 -0
  3. package/CHANGELOG.md +71 -0
  4. package/CODE_OF_CONDUCT.md +79 -0
  5. package/LICENSE +201 -0
  6. package/README.md +109 -0
  7. package/package.json +49 -0
  8. package/renovate.json +4 -0
  9. package/src/index.js +14 -0
  10. package/src/types/3DModel.js +21 -0
  11. package/src/types/AggregateOffer.js +23 -0
  12. package/src/types/AggregateRating.js +35 -0
  13. package/src/types/Answer.js +18 -0
  14. package/src/types/Article.js +26 -0
  15. package/src/types/Brand.js +18 -0
  16. package/src/types/BreadcrumbList.js +148 -0
  17. package/src/types/BroadcastEvent.js +23 -0
  18. package/src/types/Certification.js +26 -0
  19. package/src/types/Clip.js +25 -0
  20. package/src/types/DefinedRegion.js +38 -0
  21. package/src/types/Event.js +51 -0
  22. package/src/types/FAQPage.js +18 -0
  23. package/src/types/HowTo.js +27 -0
  24. package/src/types/HowToDirection.js +19 -0
  25. package/src/types/HowToSection.js +22 -0
  26. package/src/types/HowToStep.js +43 -0
  27. package/src/types/HowToTip.js +19 -0
  28. package/src/types/ImageObject.js +40 -0
  29. package/src/types/JobPosting.js +63 -0
  30. package/src/types/ListItem.js +28 -0
  31. package/src/types/LocalBusiness.js +30 -0
  32. package/src/types/MerchantReturnPolicy.js +96 -0
  33. package/src/types/Offer.js +39 -0
  34. package/src/types/OfferShippingDetails.js +27 -0
  35. package/src/types/Organization.js +18 -0
  36. package/src/types/PeopleAudience.js +37 -0
  37. package/src/types/Person.js +18 -0
  38. package/src/types/PriceSpecification.js +21 -0
  39. package/src/types/Product.js +90 -0
  40. package/src/types/ProductMerchant.js +88 -0
  41. package/src/types/QuantitativeValue.js +36 -0
  42. package/src/types/Question.js +21 -0
  43. package/src/types/Rating.js +56 -0
  44. package/src/types/Recipe.js +75 -0
  45. package/src/types/Review.js +35 -0
  46. package/src/types/SeekToAction.js +22 -0
  47. package/src/types/ShippingDeliveryTime.js +21 -0
  48. package/src/types/SizeSpecification.js +22 -0
  49. package/src/types/VideoObject.js +41 -0
  50. package/src/types/WebSite.js +23 -0
  51. package/src/types/base.js +201 -0
  52. package/src/types/schemaOrg.js +227 -0
  53. package/src/utils.js +15 -0
  54. package/src/validator.js +323 -0
@@ -0,0 +1,323 @@
1
+ /**
2
+ * Copyright 2025 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+ export class Validator {
13
+ constructor(schemaOrgJson) {
14
+ this.schemaOrgJson = schemaOrgJson;
15
+ if (this.schemaOrgJson) {
16
+ // Only add schema.org validation handler if schema is provided
17
+ this.globalHandlers = [() => import('./types/schemaOrg.js')];
18
+ }
19
+
20
+ this.debug = false;
21
+
22
+ this.registeredHandlers = {
23
+ '3DModel': [() => import('./types/3DModel.js')],
24
+ AggregateOffer: [() => import('./types/AggregateOffer.js')],
25
+ AggregateRating: [() => import('./types/AggregateRating.js')],
26
+ Answer: [() => import('./types/Answer.js')],
27
+ Article: [() => import('./types/Article.js')],
28
+ BlogPosting: [() => import('./types/Article.js')],
29
+ Brand: [() => import('./types/Brand.js')],
30
+ BreadcrumbList: [() => import('./types/BreadcrumbList.js')],
31
+ Certification: [() => import('./types/Certification.js')],
32
+ DefinedRegion: [() => import('./types/DefinedRegion.js')],
33
+ Event: [() => import('./types/Event.js')],
34
+ FAQPage: [() => import('./types/FAQPage.js')],
35
+ HowTo: [() => import('./types/HowTo.js')],
36
+ ImageObject: [() => import('./types/ImageObject.js')],
37
+ VideoObject: [() => import('./types/VideoObject.js')],
38
+ Clip: [() => import('./types/Clip.js')],
39
+ BroadcastEvent: [() => import('./types/BroadcastEvent.js')],
40
+ SeekToAction: [() => import('./types/SeekToAction.js')],
41
+ ListItem: [() => import('./types/ListItem.js')],
42
+ LocalBusiness: [() => import('./types/LocalBusiness.js')],
43
+ MerchantReturnPolicy: [() => import('./types/MerchantReturnPolicy.js')],
44
+ NewsArticle: [() => import('./types/Article.js')],
45
+ Offer: [() => import('./types/Offer.js')],
46
+ OfferShippingDetails: [() => import('./types/OfferShippingDetails.js')],
47
+ Organization: [() => import('./types/Organization.js')],
48
+ PeopleAudience: [() => import('./types/PeopleAudience.js')],
49
+ Person: [() => import('./types/Person.js')],
50
+ PriceSpecification: [() => import('./types/PriceSpecification.js')],
51
+ Product: [
52
+ () => import('./types/Product.js'),
53
+ () => import('./types/ProductMerchant.js'),
54
+ ],
55
+ QuantitativeValue: [() => import('./types/QuantitativeValue.js')],
56
+ Question: [() => import('./types/Question.js')],
57
+ Rating: [() => import('./types/Rating.js')],
58
+ Review: [() => import('./types/Review.js')],
59
+ ShippingDeliveryTime: [() => import('./types/ShippingDeliveryTime.js')],
60
+ SizeSpecification: [() => import('./types/SizeSpecification.js')],
61
+ UnitPriceSpecification: [() => import('./types/PriceSpecification.js')],
62
+ JobPosting: [() => import('./types/JobPosting.js')],
63
+ Recipe: [() => import('./types/Recipe.js')],
64
+ HowToStep: [() => import('./types/HowToStep.js')],
65
+ HowToSection: [() => import('./types/HowToSection.js')],
66
+ HowToDirection: [() => import('./types/HowToDirection.js')],
67
+ HowToTip: [() => import('./types/HowToTip.js')],
68
+ WebSite: [() => import('./types/WebSite.js')],
69
+ };
70
+ }
71
+
72
+ // Get parent types from schema.org JSON-LD
73
+ #getParentTypes(type) {
74
+ if (!this.schemaOrgJson) return [];
75
+
76
+ const graph = this.schemaOrgJson['@graph'];
77
+ if (!graph) return [];
78
+
79
+ const typeEntry = graph.find(
80
+ (e) =>
81
+ e['@type'] === 'rdfs:Class' &&
82
+ (e['@id'] === type ||
83
+ e['@id'] === `schema:${type}` ||
84
+ e['@id'] === `https://schema.org/${type}`),
85
+ );
86
+
87
+ if (!typeEntry || !typeEntry['rdfs:subClassOf']) return [];
88
+
89
+ const parents = Array.isArray(typeEntry['rdfs:subClassOf'])
90
+ ? typeEntry['rdfs:subClassOf']
91
+ : [typeEntry['rdfs:subClassOf']];
92
+
93
+ return parents.map((p) => {
94
+ const id = p['@id'] || p;
95
+ return id
96
+ .replace('schema:', '')
97
+ .replace('https://schema.org/', '')
98
+ .replace('http://schema.org/', '');
99
+ });
100
+ }
101
+
102
+ // Get handlers for type, falling back to parent types if needed
103
+ async #getHandlersForType(type) {
104
+ // 1. Check direct mapping first (priority)
105
+ if (this.registeredHandlers[type]) {
106
+ return [...this.registeredHandlers[type]];
107
+ }
108
+
109
+ // 2. If schemaOrgJson available, check parent types
110
+ if (this.schemaOrgJson) {
111
+ const visited = new Set();
112
+ const queue = [type];
113
+
114
+ while (queue.length > 0) {
115
+ const current = queue.shift();
116
+ if (visited.has(current)) continue;
117
+ visited.add(current);
118
+
119
+ // Check if parent has handlers
120
+ if (current !== type && this.registeredHandlers[current]) {
121
+ this.debug &&
122
+ console.debug(` Using ${current} handler for subtype ${type}`);
123
+ return [...this.registeredHandlers[current]];
124
+ }
125
+
126
+ // Add parent types to queue
127
+ const parents = this.#getParentTypes(current);
128
+ queue.push(...parents);
129
+ }
130
+ }
131
+
132
+ return [];
133
+ }
134
+
135
+ async #validateSubtree(data, rootData, dataFormat, path = []) {
136
+ const spacing = ' ' + ' '.repeat(path.length);
137
+
138
+ if (Array.isArray(data)) {
139
+ const results = await Promise.all(
140
+ data.map(async (item, index) => {
141
+ let last = path[path.length - 1];
142
+ last = { ...last, index, length: data.length };
143
+ if (item['@type']) {
144
+ last.type = item['@type'];
145
+ }
146
+ return this.#validateSubtree(item, rootData, dataFormat, [
147
+ ...path.slice(0, -1),
148
+ last,
149
+ ]);
150
+ }),
151
+ );
152
+ return results.flat();
153
+ }
154
+
155
+ if (typeof data === 'object' && data !== null) {
156
+ if (!data['@type']) {
157
+ this.debug && console.warn(`${spacing} WARN: No type found for item`);
158
+ // TODO: Should return a validation error as type is missing,
159
+ // WAE is already returning an error
160
+ return [];
161
+ }
162
+
163
+ let types = [];
164
+ if (Array.isArray(data['@type'])) {
165
+ types = data['@type'];
166
+ } else {
167
+ types = [data['@type']];
168
+ }
169
+
170
+ const typeIssues = await Promise.all(
171
+ types.map(async (type) => {
172
+ this.debug &&
173
+ console.debug(
174
+ `${spacing}VALIDATE TYPE:`,
175
+ type,
176
+ JSON.stringify(path),
177
+ );
178
+
179
+ // Find supported handlers (check direct mapping first, then parent types)
180
+ const handlers = await this.#getHandlersForType(type);
181
+ if (!handlers || handlers.length === 0) {
182
+ this.debug &&
183
+ console.warn(
184
+ `${spacing} WARN: No handlers registered for type: ${type}`,
185
+ );
186
+ return [];
187
+ }
188
+ handlers.push(...(this.globalHandlers || []));
189
+
190
+ const handlerPromises = handlers.map(async (handler) => {
191
+ const handlerClass = (await handler()).default;
192
+ const handlerInstance = new handlerClass({
193
+ dataFormat,
194
+ path,
195
+ // If an object has multiple types, we need to pass the current type for any global handlers
196
+ type,
197
+ schemaOrgJson: this.schemaOrgJson,
198
+ });
199
+ return handlerInstance.validate(data);
200
+ });
201
+
202
+ // Wait for all handlers to complete
203
+ const handlerResults = (await Promise.all(handlerPromises)).flat();
204
+
205
+ for (const issue of handlerResults) {
206
+ this.debug && console.debug(`${spacing} ISSUE:`, issue);
207
+ }
208
+
209
+ return handlerResults;
210
+ }),
211
+ );
212
+
213
+ // Check properties for subtypes
214
+ const properties = Object.keys(data).filter(
215
+ (key) =>
216
+ // Ignore LD-JSON properties
217
+ !key.startsWith('@') &&
218
+ data[key] !== null &&
219
+ data[key] !== undefined &&
220
+ // Array of objects
221
+ // Array of objects
222
+ ((Array.isArray(data[key]) &&
223
+ data[key].length > 0 &&
224
+ typeof data[key][0] === 'object') ||
225
+ // Object
226
+ (!Array.isArray(data[key]) && typeof data[key] === 'object')),
227
+ );
228
+ if (this.debug && properties.length > 0) {
229
+ console.debug(`${spacing}PROPERTIES:`, properties);
230
+ }
231
+
232
+ const propertyIssues = await Promise.all(
233
+ properties.map((property) => {
234
+ const newPathElem = { property };
235
+ if (data[property]?.['@type']) {
236
+ newPathElem.type = data[property]['@type'];
237
+ }
238
+ return this.#validateSubtree(data[property], rootData, dataFormat, [
239
+ ...path,
240
+ newPathElem,
241
+ ]);
242
+ }),
243
+ );
244
+
245
+ return [...typeIssues.flat(), ...propertyIssues.flat()];
246
+ }
247
+
248
+ return [];
249
+ }
250
+
251
+ /**
252
+ * Validates structured data
253
+ * @param {object} waeData Data as parsed from Web Auto Extractor
254
+ * @returns {object[]} Array of validation issues
255
+ */
256
+ async validate(waeData) {
257
+ const dataFormats = ['jsonld', 'microdata', 'rdfa'];
258
+
259
+ const results = [];
260
+
261
+ for (const dataFormat of dataFormats) {
262
+ if (
263
+ !waeData[dataFormat] ||
264
+ Object.keys(waeData[dataFormat]).length === 0
265
+ ) {
266
+ continue;
267
+ }
268
+ this.debug && console.debug('DATA FORMAT:', dataFormat);
269
+ const rootTypes = Object.keys(waeData[dataFormat]);
270
+
271
+ // Validate root type items
272
+ for (const rootType of rootTypes) {
273
+ this.debug && console.debug(' ROOT TYPE:', rootType);
274
+ const rootTypeItems = waeData[dataFormat][rootType];
275
+
276
+ // Validate each root type item
277
+ for (const [index, item] of rootTypeItems.entries()) {
278
+ const location = item['@location'];
279
+ delete item['@location'];
280
+
281
+ const issues = await this.#validateSubtree(item, item, dataFormat, [
282
+ { type: rootType, index },
283
+ ]);
284
+ issues.forEach((issue) => {
285
+ let source = item['@source'];
286
+ if (!source && dataFormat === 'jsonld') {
287
+ source = JSON.stringify(item);
288
+ }
289
+ results.push({
290
+ rootType,
291
+ dataFormat,
292
+ location,
293
+ source,
294
+ ...issue,
295
+ });
296
+ });
297
+ }
298
+ }
299
+ }
300
+
301
+ // Expose WAE errors, filter out metadata errors
302
+ const errors = waeData.errors?.filter((e) =>
303
+ dataFormats.includes(e.format),
304
+ );
305
+ for (const error of errors || []) {
306
+ const result = {
307
+ dataFormat: error.format,
308
+ issueMessage: error.message,
309
+ rootType: error.format,
310
+ severity: 'ERROR',
311
+ };
312
+ if (error.sourceCodeLocation) {
313
+ result.location = `${error.sourceCodeLocation.startOffset},${error.sourceCodeLocation.endOffset}`;
314
+ }
315
+ if (error.source) {
316
+ result.source = error.source;
317
+ }
318
+ results.push(result);
319
+ }
320
+
321
+ return results;
322
+ }
323
+ }