geo-semantic-layer 3.0.0-alpha.1 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/auditor.js +2067 -0
- package/dist/auditor.js.map +1 -0
- package/dist/cli.js +5152 -0
- package/dist/cli.js.map +1 -0
- package/dist/extractor.js +596 -0
- package/dist/extractor.js.map +1 -0
- package/dist/index.js +3491 -0
- package/dist/index.js.map +1 -0
- package/dist/optimizer.js +523 -0
- package/dist/optimizer.js.map +1 -0
- package/package.json +9 -4
package/dist/auditor.js
ADDED
|
@@ -0,0 +1,2067 @@
|
|
|
1
|
+
import * as fs from 'fs/promises';
|
|
2
|
+
import * as path from 'path';
|
|
3
|
+
import { z } from 'zod';
|
|
4
|
+
import { encode } from 'gpt-tokenizer';
|
|
5
|
+
|
|
6
|
+
// src/auditor/project-scanner.ts
|
|
7
|
+
var DEFAULT_INCLUDE_PATTERNS = [
|
|
8
|
+
"**/*.tsx",
|
|
9
|
+
"**/*.jsx",
|
|
10
|
+
"**/*.vue",
|
|
11
|
+
"**/*.astro",
|
|
12
|
+
"**/*.js",
|
|
13
|
+
"**/*.ts"
|
|
14
|
+
];
|
|
15
|
+
var DEFAULT_EXCLUDE_PATTERNS = [
|
|
16
|
+
"node_modules/**",
|
|
17
|
+
"dist/**",
|
|
18
|
+
"build/**",
|
|
19
|
+
".next/**",
|
|
20
|
+
".nuxt/**",
|
|
21
|
+
".astro/**",
|
|
22
|
+
"coverage/**",
|
|
23
|
+
"**/*.test.{ts,tsx,js,jsx}",
|
|
24
|
+
"**/*.spec.{ts,tsx,js,jsx}"
|
|
25
|
+
];
|
|
26
|
+
var GENERATOR_PATTERNS = [
|
|
27
|
+
/generate(Article|Product|Organization|LocalBusiness|Person|Event|WebPage|FAQ|Breadcrumb|OpenGraph)Schema/g,
|
|
28
|
+
/new\s+(Article|Product|Organization|LocalBusiness|Person|Event|WebPage|FAQ|Breadcrumb)Schema/g
|
|
29
|
+
];
|
|
30
|
+
async function scanProject(options = {}) {
|
|
31
|
+
const rootDir = options.rootDir || process.cwd();
|
|
32
|
+
const include = options.include || DEFAULT_INCLUDE_PATTERNS;
|
|
33
|
+
const exclude = options.exclude || DEFAULT_EXCLUDE_PATTERNS;
|
|
34
|
+
const framework = await detectFramework(rootDir);
|
|
35
|
+
const allFiles = await getAllFiles(rootDir, {
|
|
36
|
+
include,
|
|
37
|
+
exclude,
|
|
38
|
+
...options.maxDepth && { maxDepth: options.maxDepth }
|
|
39
|
+
});
|
|
40
|
+
const filesWithSchemas = [];
|
|
41
|
+
const routesWithoutSchemas = [];
|
|
42
|
+
for (const filePath of allFiles) {
|
|
43
|
+
const content = await fs.readFile(filePath, "utf-8");
|
|
44
|
+
const relativePath = path.relative(rootDir, filePath);
|
|
45
|
+
const schemas = await detectSchemasInFile(content);
|
|
46
|
+
if (schemas.length > 0) {
|
|
47
|
+
filesWithSchemas.push({
|
|
48
|
+
filePath: relativePath,
|
|
49
|
+
fileType: path.extname(filePath).slice(1),
|
|
50
|
+
schemas,
|
|
51
|
+
linesOfCode: content.split("\n").length
|
|
52
|
+
});
|
|
53
|
+
} else if (isRouteFile(filePath, framework)) {
|
|
54
|
+
const routeInfo = analyzeRouteFile(content, filePath, framework);
|
|
55
|
+
if (routeInfo) {
|
|
56
|
+
routesWithoutSchemas.push({
|
|
57
|
+
filePath: relativePath,
|
|
58
|
+
...routeInfo
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
return {
|
|
64
|
+
projectRoot: rootDir,
|
|
65
|
+
framework,
|
|
66
|
+
filesScanned: allFiles.length,
|
|
67
|
+
filesWithSchemas,
|
|
68
|
+
routesWithoutSchemas,
|
|
69
|
+
scannedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
async function detectFramework(rootDir) {
|
|
73
|
+
const configFiles = {
|
|
74
|
+
next: ["next.config.js", "next.config.mjs", "next.config.ts"],
|
|
75
|
+
nuxt: ["nuxt.config.js", "nuxt.config.ts"],
|
|
76
|
+
astro: ["astro.config.mjs", "astro.config.ts"],
|
|
77
|
+
vite: ["vite.config.js", "vite.config.ts"]
|
|
78
|
+
};
|
|
79
|
+
for (const [framework, files] of Object.entries(configFiles)) {
|
|
80
|
+
for (const file of files) {
|
|
81
|
+
try {
|
|
82
|
+
await fs.access(path.join(rootDir, file));
|
|
83
|
+
return framework;
|
|
84
|
+
} catch {
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
return "unknown";
|
|
89
|
+
}
|
|
90
|
+
async function getAllFiles(dir, options, currentDepth = 0) {
|
|
91
|
+
const files = [];
|
|
92
|
+
if (options.maxDepth && currentDepth > options.maxDepth) {
|
|
93
|
+
return files;
|
|
94
|
+
}
|
|
95
|
+
try {
|
|
96
|
+
const entries = await fs.readdir(dir, { withFileTypes: true });
|
|
97
|
+
for (const entry of entries) {
|
|
98
|
+
const fullPath = path.join(dir, entry.name);
|
|
99
|
+
const relativePath = path.relative(process.cwd(), fullPath);
|
|
100
|
+
if (isExcluded(relativePath, options.exclude)) {
|
|
101
|
+
continue;
|
|
102
|
+
}
|
|
103
|
+
if (entry.isDirectory()) {
|
|
104
|
+
const subFiles = await getAllFiles(fullPath, options, currentDepth + 1);
|
|
105
|
+
files.push(...subFiles);
|
|
106
|
+
} else if (entry.isFile()) {
|
|
107
|
+
if (matchesPatterns(relativePath, options.include)) {
|
|
108
|
+
files.push(fullPath);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
} catch (error) {
|
|
113
|
+
console.warn(`Warning: Could not read directory ${dir}`);
|
|
114
|
+
}
|
|
115
|
+
return files;
|
|
116
|
+
}
|
|
117
|
+
function isExcluded(filePath, excludePatterns) {
|
|
118
|
+
return excludePatterns.some((pattern) => {
|
|
119
|
+
const regex = globToRegex(pattern);
|
|
120
|
+
return regex.test(filePath);
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
function matchesPatterns(filePath, includePatterns) {
|
|
124
|
+
return includePatterns.some((pattern) => {
|
|
125
|
+
const regex = globToRegex(pattern);
|
|
126
|
+
return regex.test(filePath);
|
|
127
|
+
});
|
|
128
|
+
}
|
|
129
|
+
function globToRegex(pattern) {
|
|
130
|
+
let regexStr = pattern.replace(/\*\*/g, "%%%DOUBLESTAR%%%").replace(/\*/g, "[^/]*").replace(/%%%DOUBLESTAR%%%/g, ".*").replace(/\./g, "\\.").replace(/\{([^}]+)\}/g, (_, group) => `(${group.split(",").join("|")})`);
|
|
131
|
+
return new RegExp(`^${regexStr}$`);
|
|
132
|
+
}
|
|
133
|
+
async function detectSchemasInFile(content, _filePath) {
|
|
134
|
+
const schemas = [];
|
|
135
|
+
const lines = content.split("\n");
|
|
136
|
+
for (const pattern of GENERATOR_PATTERNS) {
|
|
137
|
+
pattern.lastIndex = 0;
|
|
138
|
+
let match;
|
|
139
|
+
while ((match = pattern.exec(content)) !== null) {
|
|
140
|
+
const matchText = match[0];
|
|
141
|
+
const schemaType = match[1];
|
|
142
|
+
if (!schemaType) continue;
|
|
143
|
+
const position = match.index;
|
|
144
|
+
let line = 0;
|
|
145
|
+
let column = 0;
|
|
146
|
+
let currentPos = 0;
|
|
147
|
+
for (let i = 0; i < lines.length; i++) {
|
|
148
|
+
const currentLine = lines[i];
|
|
149
|
+
if (!currentLine) continue;
|
|
150
|
+
if (currentPos + currentLine.length >= position) {
|
|
151
|
+
line = i + 1;
|
|
152
|
+
column = position - currentPos;
|
|
153
|
+
break;
|
|
154
|
+
}
|
|
155
|
+
currentPos += currentLine.length + 1;
|
|
156
|
+
}
|
|
157
|
+
schemas.push({
|
|
158
|
+
type: schemaType,
|
|
159
|
+
generatorFunction: matchText,
|
|
160
|
+
line,
|
|
161
|
+
column,
|
|
162
|
+
issues: []
|
|
163
|
+
// Se llenarán en la validación
|
|
164
|
+
});
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
return schemas;
|
|
168
|
+
}
|
|
169
|
+
function isRouteFile(filePath, framework) {
|
|
170
|
+
const normalized = filePath.replace(/\\/g, "/");
|
|
171
|
+
switch (framework) {
|
|
172
|
+
case "next":
|
|
173
|
+
return /\/app\/.*\/page\.(tsx|jsx|ts|js)$/.test(normalized) || /\/pages\/.*\.(tsx|jsx|ts|js)$/.test(normalized);
|
|
174
|
+
case "nuxt":
|
|
175
|
+
return /\/pages\/.*\.vue$/.test(normalized);
|
|
176
|
+
case "astro":
|
|
177
|
+
return /\/src\/pages\/.*\.astro$/.test(normalized);
|
|
178
|
+
default:
|
|
179
|
+
return false;
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
function analyzeRouteFile(_content, filePath, framework) {
|
|
183
|
+
const normalized = filePath.toLowerCase().replace(/\\/g, "/");
|
|
184
|
+
let routePath;
|
|
185
|
+
if (framework === "next") {
|
|
186
|
+
const appMatch = normalized.match(/\/app\/(.*)\/page\.(tsx|jsx|ts|js)$/);
|
|
187
|
+
if (appMatch) {
|
|
188
|
+
routePath = `/${appMatch[1]}`;
|
|
189
|
+
}
|
|
190
|
+
const pagesMatch = normalized.match(/\/pages\/(.*)\.(?:tsx|jsx|ts|js)$/);
|
|
191
|
+
if (pagesMatch) {
|
|
192
|
+
routePath = `/${pagesMatch[1]}`;
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
let suggestedSchemaType;
|
|
196
|
+
let confidence = 0;
|
|
197
|
+
let reason;
|
|
198
|
+
if (/blog|article|post|news/.test(normalized)) {
|
|
199
|
+
suggestedSchemaType = "Article";
|
|
200
|
+
confidence = 0.8;
|
|
201
|
+
reason = "Path contains blog/article/post keywords";
|
|
202
|
+
} else if (/product|item|shop/.test(normalized)) {
|
|
203
|
+
suggestedSchemaType = "Product";
|
|
204
|
+
confidence = 0.75;
|
|
205
|
+
reason = "Path contains product/shop keywords";
|
|
206
|
+
} else if (/about|company|contact/.test(normalized)) {
|
|
207
|
+
suggestedSchemaType = "Organization";
|
|
208
|
+
confidence = 0.7;
|
|
209
|
+
reason = "Path suggests organization/company page";
|
|
210
|
+
} else if (/event|calendar/.test(normalized)) {
|
|
211
|
+
suggestedSchemaType = "Event";
|
|
212
|
+
confidence = 0.7;
|
|
213
|
+
reason = "Path suggests event page";
|
|
214
|
+
}
|
|
215
|
+
if (!suggestedSchemaType || confidence < 0.6) {
|
|
216
|
+
return null;
|
|
217
|
+
}
|
|
218
|
+
return {
|
|
219
|
+
...routePath && { routePath },
|
|
220
|
+
suggestedSchemaType,
|
|
221
|
+
confidence,
|
|
222
|
+
...reason && { reason }
|
|
223
|
+
};
|
|
224
|
+
}
|
|
225
|
+
async function getProjectStats(rootDir = process.cwd()) {
|
|
226
|
+
const framework = await detectFramework(rootDir);
|
|
227
|
+
let estimatedFiles = 0;
|
|
228
|
+
try {
|
|
229
|
+
const entries = await fs.readdir(rootDir, { withFileTypes: true });
|
|
230
|
+
estimatedFiles = entries.filter((e) => e.isFile()).length;
|
|
231
|
+
} catch {
|
|
232
|
+
estimatedFiles = 0;
|
|
233
|
+
}
|
|
234
|
+
let hasPackageJson = false;
|
|
235
|
+
try {
|
|
236
|
+
await fs.access(path.join(rootDir, "package.json"));
|
|
237
|
+
hasPackageJson = true;
|
|
238
|
+
} catch {
|
|
239
|
+
hasPackageJson = false;
|
|
240
|
+
}
|
|
241
|
+
return {
|
|
242
|
+
framework,
|
|
243
|
+
estimatedFiles,
|
|
244
|
+
hasPackageJson
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
var PostalAddressSchema = z.object({
|
|
248
|
+
"@type": z.literal("PostalAddress").default("PostalAddress"),
|
|
249
|
+
streetAddress: z.string().optional(),
|
|
250
|
+
addressLocality: z.string().optional(),
|
|
251
|
+
addressRegion: z.string().optional(),
|
|
252
|
+
postalCode: z.string().optional(),
|
|
253
|
+
addressCountry: z.string().length(2).optional()
|
|
254
|
+
// ISO 3166-1 alpha-2
|
|
255
|
+
});
|
|
256
|
+
var ContactPointSchema = z.object({
|
|
257
|
+
"@type": z.literal("ContactPoint").default("ContactPoint"),
|
|
258
|
+
telephone: z.string().optional(),
|
|
259
|
+
email: z.string().email().optional(),
|
|
260
|
+
contactType: z.enum(["customer service", "technical support", "sales", "billing"]).optional(),
|
|
261
|
+
areaServed: z.string().optional(),
|
|
262
|
+
availableLanguage: z.array(z.string()).optional()
|
|
263
|
+
});
|
|
264
|
+
var OrganizationSchema = z.object({
|
|
265
|
+
"@context": z.literal("https://schema.org").default("https://schema.org"),
|
|
266
|
+
"@type": z.literal("Organization").default("Organization"),
|
|
267
|
+
// Required fields
|
|
268
|
+
name: z.string().min(1, "Organization name is required"),
|
|
269
|
+
url: z.string().url("Must be a valid URL"),
|
|
270
|
+
// Recommended fields
|
|
271
|
+
logo: z.string().url("Logo must be a valid URL").optional(),
|
|
272
|
+
image: z.string().url("Image must be a valid URL").optional(),
|
|
273
|
+
description: z.string().min(10, "Description should be at least 10 characters").optional(),
|
|
274
|
+
/**
|
|
275
|
+
* CRITICAL for GEO: Entity disambiguation
|
|
276
|
+
* Should include at least one social profile or authority ID (Wikidata, Wikipedia, etc.)
|
|
277
|
+
*/
|
|
278
|
+
sameAs: z.array(z.string().url()).min(1, "At least one sameAs URL is required for entity disambiguation").describe("Social profiles, Wikidata ID, Wikipedia URL for entity disambiguation"),
|
|
279
|
+
// Contact information
|
|
280
|
+
email: z.string().email().optional(),
|
|
281
|
+
telephone: z.string().optional(),
|
|
282
|
+
address: PostalAddressSchema.optional(),
|
|
283
|
+
contactPoint: z.union([ContactPointSchema, z.array(ContactPointSchema)]).optional(),
|
|
284
|
+
// Additional information
|
|
285
|
+
alternateName: z.string().optional(),
|
|
286
|
+
foundingDate: z.string().regex(/^\d{4}-\d{2}-\d{2}$/, "Must be in YYYY-MM-DD format").optional(),
|
|
287
|
+
founder: z.array(z.string()).optional(),
|
|
288
|
+
// Relationships
|
|
289
|
+
parentOrganization: z.string().optional(),
|
|
290
|
+
subOrganization: z.array(z.string()).optional()
|
|
291
|
+
});
|
|
292
|
+
var PersonSchema = z.object({
|
|
293
|
+
"@context": z.literal("https://schema.org").default("https://schema.org"),
|
|
294
|
+
"@type": z.literal("Person").default("Person"),
|
|
295
|
+
// Required fields
|
|
296
|
+
name: z.string().min(1, "Person name is required"),
|
|
297
|
+
url: z.string().url("Must be a valid URL").optional(),
|
|
298
|
+
// Recommended fields
|
|
299
|
+
image: z.string().url("Image must be a valid URL").optional(),
|
|
300
|
+
description: z.string().min(10, "Description should be at least 10 characters").optional(),
|
|
301
|
+
/**
|
|
302
|
+
* CRITICAL for GEO: Entity disambiguation
|
|
303
|
+
* Should include social profiles, Wikidata ID, Wikipedia URL, etc.
|
|
304
|
+
*/
|
|
305
|
+
sameAs: z.array(z.string().url()).min(1, "At least one sameAs URL is required for entity disambiguation").describe("Social profiles, Wikidata ID, Wikipedia URL for entity disambiguation"),
|
|
306
|
+
// Contact information
|
|
307
|
+
email: z.string().email().optional(),
|
|
308
|
+
telephone: z.string().optional(),
|
|
309
|
+
address: PostalAddressSchema.optional(),
|
|
310
|
+
// Professional information
|
|
311
|
+
jobTitle: z.string().optional(),
|
|
312
|
+
worksFor: z.object({
|
|
313
|
+
"@type": z.literal("Organization"),
|
|
314
|
+
name: z.string(),
|
|
315
|
+
url: z.string().url().optional()
|
|
316
|
+
}).optional(),
|
|
317
|
+
// Additional information
|
|
318
|
+
alternateName: z.string().optional(),
|
|
319
|
+
birthDate: z.string().regex(/^\d{4}-\d{2}-\d{2}$/, "Must be in YYYY-MM-DD format").optional(),
|
|
320
|
+
nationality: z.string().optional(),
|
|
321
|
+
// Relationships
|
|
322
|
+
colleague: z.array(z.string()).optional(),
|
|
323
|
+
alumniOf: z.string().optional()
|
|
324
|
+
});
|
|
325
|
+
var OfferSchema = z.object({
|
|
326
|
+
"@type": z.literal("Offer").default("Offer"),
|
|
327
|
+
price: z.union([z.string(), z.number()]),
|
|
328
|
+
priceCurrency: z.string().length(3),
|
|
329
|
+
// ISO 4217 currency code
|
|
330
|
+
availability: z.string().url().optional().describe("e.g., https://schema.org/InStock"),
|
|
331
|
+
url: z.string().url().optional(),
|
|
332
|
+
priceValidUntil: z.string().regex(/^\d{4}-\d{2}-\d{2}$/).optional(),
|
|
333
|
+
seller: z.object({
|
|
334
|
+
"@type": z.literal("Organization"),
|
|
335
|
+
name: z.string()
|
|
336
|
+
}).optional()
|
|
337
|
+
});
|
|
338
|
+
var AggregateRatingSchema = z.object({
|
|
339
|
+
"@type": z.literal("AggregateRating").default("AggregateRating"),
|
|
340
|
+
ratingValue: z.union([z.string(), z.number()]),
|
|
341
|
+
reviewCount: z.number().int().positive(),
|
|
342
|
+
bestRating: z.union([z.string(), z.number()]).optional().default(5),
|
|
343
|
+
worstRating: z.union([z.string(), z.number()]).optional().default(1)
|
|
344
|
+
});
|
|
345
|
+
var ReviewSchema = z.object({
|
|
346
|
+
"@type": z.literal("Review").default("Review"),
|
|
347
|
+
author: z.object({
|
|
348
|
+
"@type": z.literal("Person"),
|
|
349
|
+
name: z.string()
|
|
350
|
+
}),
|
|
351
|
+
datePublished: z.string().regex(/^\d{4}-\d{2}-\d{2}$/),
|
|
352
|
+
reviewBody: z.string().min(10),
|
|
353
|
+
reviewRating: z.object({
|
|
354
|
+
"@type": z.literal("Rating"),
|
|
355
|
+
ratingValue: z.union([z.string(), z.number()]),
|
|
356
|
+
bestRating: z.union([z.string(), z.number()]).optional().default(5)
|
|
357
|
+
})
|
|
358
|
+
});
|
|
359
|
+
var ProductSchema = z.object({
|
|
360
|
+
"@context": z.literal("https://schema.org").default("https://schema.org"),
|
|
361
|
+
"@type": z.literal("Product").default("Product"),
|
|
362
|
+
// Required fields
|
|
363
|
+
name: z.string().min(1, "Product name is required"),
|
|
364
|
+
// Recommended fields
|
|
365
|
+
image: z.union([z.string().url(), z.array(z.string().url())]).optional(),
|
|
366
|
+
description: z.string().min(10, "Description should be at least 10 characters").optional(),
|
|
367
|
+
// SKU or other identifiers
|
|
368
|
+
sku: z.string().optional(),
|
|
369
|
+
gtin: z.string().optional(),
|
|
370
|
+
gtin8: z.string().optional(),
|
|
371
|
+
gtin12: z.string().optional(),
|
|
372
|
+
gtin13: z.string().optional(),
|
|
373
|
+
gtin14: z.string().optional(),
|
|
374
|
+
mpn: z.string().optional(),
|
|
375
|
+
// Brand
|
|
376
|
+
brand: z.object({
|
|
377
|
+
"@type": z.literal("Brand"),
|
|
378
|
+
name: z.string()
|
|
379
|
+
}).optional(),
|
|
380
|
+
// Offers (pricing)
|
|
381
|
+
offers: z.union([OfferSchema, z.array(OfferSchema)]).optional(),
|
|
382
|
+
// Ratings and Reviews
|
|
383
|
+
aggregateRating: AggregateRatingSchema.optional(),
|
|
384
|
+
review: z.union([ReviewSchema, z.array(ReviewSchema)]).optional(),
|
|
385
|
+
// Additional information
|
|
386
|
+
category: z.string().optional(),
|
|
387
|
+
color: z.union([z.string(), z.array(z.string())]).optional(),
|
|
388
|
+
material: z.string().optional(),
|
|
389
|
+
manufacturer: z.object({
|
|
390
|
+
"@type": z.literal("Organization"),
|
|
391
|
+
name: z.string()
|
|
392
|
+
}).optional()
|
|
393
|
+
});
|
|
394
|
+
var ArticleSchema = z.object({
|
|
395
|
+
"@context": z.literal("https://schema.org").default("https://schema.org"),
|
|
396
|
+
"@type": z.enum(["Article", "NewsArticle", "BlogPosting", "TechArticle"]).default("Article"),
|
|
397
|
+
// Required fields
|
|
398
|
+
headline: z.string().min(1).max(110, "Headline should be less than 110 characters for optimal SEO"),
|
|
399
|
+
image: z.union([z.string().url(), z.array(z.string().url())]),
|
|
400
|
+
// Recommended fields
|
|
401
|
+
author: z.union([
|
|
402
|
+
z.object({
|
|
403
|
+
"@type": z.enum(["Person", "Organization"]),
|
|
404
|
+
name: z.string(),
|
|
405
|
+
url: z.string().url().optional()
|
|
406
|
+
}),
|
|
407
|
+
z.array(
|
|
408
|
+
z.object({
|
|
409
|
+
"@type": z.enum(["Person", "Organization"]),
|
|
410
|
+
name: z.string(),
|
|
411
|
+
url: z.string().url().optional()
|
|
412
|
+
})
|
|
413
|
+
)
|
|
414
|
+
]).describe("Author(s) of the article"),
|
|
415
|
+
datePublished: z.string().regex(/^\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}Z)?$/, "Must be ISO 8601 format"),
|
|
416
|
+
dateModified: z.string().regex(/^\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}Z)?$/, "Must be ISO 8601 format").optional(),
|
|
417
|
+
// Publisher (recommended for Google News)
|
|
418
|
+
publisher: z.object({
|
|
419
|
+
"@type": z.literal("Organization"),
|
|
420
|
+
name: z.string(),
|
|
421
|
+
logo: z.object({
|
|
422
|
+
"@type": z.literal("ImageObject"),
|
|
423
|
+
url: z.string().url(),
|
|
424
|
+
width: z.number().optional(),
|
|
425
|
+
height: z.number().optional()
|
|
426
|
+
}).optional()
|
|
427
|
+
}).optional(),
|
|
428
|
+
// Content
|
|
429
|
+
description: z.string().min(10).optional(),
|
|
430
|
+
articleBody: z.string().min(50, "Article body should be at least 50 characters").optional(),
|
|
431
|
+
wordCount: z.number().int().positive().optional(),
|
|
432
|
+
// Additional metadata
|
|
433
|
+
url: z.string().url().optional(),
|
|
434
|
+
mainEntityOfPage: z.string().url().optional(),
|
|
435
|
+
keywords: z.union([z.string(), z.array(z.string())]).optional(),
|
|
436
|
+
articleSection: z.string().optional(),
|
|
437
|
+
inLanguage: z.string().optional()
|
|
438
|
+
});
|
|
439
|
+
ArticleSchema.extend({
|
|
440
|
+
"@type": z.literal("BlogPosting").default("BlogPosting")
|
|
441
|
+
});
|
|
442
|
+
ArticleSchema.extend({
|
|
443
|
+
"@type": z.literal("NewsArticle").default("NewsArticle"),
|
|
444
|
+
dateline: z.string().optional()
|
|
445
|
+
});
|
|
446
|
+
var QuestionSchema = z.object({
|
|
447
|
+
"@type": z.literal("Question").default("Question"),
|
|
448
|
+
name: z.string().min(1, "Question text is required"),
|
|
449
|
+
acceptedAnswer: z.object({
|
|
450
|
+
"@type": z.literal("Answer").default("Answer"),
|
|
451
|
+
text: z.string().min(1, "Answer text is required")
|
|
452
|
+
})
|
|
453
|
+
});
|
|
454
|
+
var FAQPageSchema = z.object({
|
|
455
|
+
"@context": z.literal("https://schema.org").default("https://schema.org"),
|
|
456
|
+
"@type": z.literal("FAQPage").default("FAQPage"),
|
|
457
|
+
mainEntity: z.array(QuestionSchema).min(1, "At least one question is required for FAQ").describe("Array of questions and answers")
|
|
458
|
+
});
|
|
459
|
+
var ListItemSchema = z.object({
|
|
460
|
+
"@type": z.literal("ListItem").default("ListItem"),
|
|
461
|
+
position: z.number().int().positive(),
|
|
462
|
+
name: z.string().min(1),
|
|
463
|
+
item: z.string().url().optional()
|
|
464
|
+
});
|
|
465
|
+
var BreadcrumbListSchema = z.object({
|
|
466
|
+
"@context": z.literal("https://schema.org").default("https://schema.org"),
|
|
467
|
+
"@type": z.literal("BreadcrumbList").default("BreadcrumbList"),
|
|
468
|
+
itemListElement: z.array(ListItemSchema).min(2, "Breadcrumb list must have at least 2 items").describe("Ordered list of breadcrumb items")
|
|
469
|
+
});
|
|
470
|
+
var WebPageSchema = z.object({
|
|
471
|
+
"@context": z.literal("https://schema.org").default("https://schema.org"),
|
|
472
|
+
"@type": z.literal("WebPage").default("WebPage"),
|
|
473
|
+
name: z.string().min(1),
|
|
474
|
+
description: z.string().min(10).optional(),
|
|
475
|
+
url: z.string().url(),
|
|
476
|
+
// Optional fields
|
|
477
|
+
inLanguage: z.string().optional(),
|
|
478
|
+
isPartOf: z.object({
|
|
479
|
+
"@type": z.literal("WebSite"),
|
|
480
|
+
name: z.string(),
|
|
481
|
+
url: z.string().url()
|
|
482
|
+
}).optional(),
|
|
483
|
+
breadcrumb: z.object({
|
|
484
|
+
"@type": z.literal("BreadcrumbList")
|
|
485
|
+
}).optional(),
|
|
486
|
+
datePublished: z.string().regex(/^\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}Z)?$/).optional(),
|
|
487
|
+
dateModified: z.string().regex(/^\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}Z)?$/).optional(),
|
|
488
|
+
author: z.object({
|
|
489
|
+
"@type": z.enum(["Person", "Organization"]),
|
|
490
|
+
name: z.string()
|
|
491
|
+
}).optional()
|
|
492
|
+
});
|
|
493
|
+
z.object({
|
|
494
|
+
title: z.string().min(1).max(60, "Title should be less than 60 characters for optimal display"),
|
|
495
|
+
description: z.string().min(1).max(160, "Description should be less than 160 characters"),
|
|
496
|
+
url: z.string().url(),
|
|
497
|
+
type: z.enum(["website", "article", "product", "book", "profile", "music", "video"]).default("website"),
|
|
498
|
+
image: z.string().url(),
|
|
499
|
+
imageAlt: z.string().optional(),
|
|
500
|
+
siteName: z.string().optional(),
|
|
501
|
+
locale: z.string().default("en_US"),
|
|
502
|
+
// Article-specific
|
|
503
|
+
publishedTime: z.string().optional(),
|
|
504
|
+
modifiedTime: z.string().optional(),
|
|
505
|
+
author: z.string().optional(),
|
|
506
|
+
section: z.string().optional(),
|
|
507
|
+
tags: z.array(z.string()).optional(),
|
|
508
|
+
// Product-specific
|
|
509
|
+
price: z.union([z.string(), z.number()]).optional(),
|
|
510
|
+
currency: z.string().length(3).optional()
|
|
511
|
+
});
|
|
512
|
+
z.object({
|
|
513
|
+
card: z.enum(["summary", "summary_large_image", "app", "player"]).default("summary_large_image"),
|
|
514
|
+
site: z.string().optional(),
|
|
515
|
+
// @username
|
|
516
|
+
creator: z.string().optional(),
|
|
517
|
+
// @username
|
|
518
|
+
title: z.string().min(1).max(70, "Twitter title should be less than 70 characters"),
|
|
519
|
+
description: z.string().min(1).max(200, "Twitter description should be less than 200 characters"),
|
|
520
|
+
image: z.string().url(),
|
|
521
|
+
imageAlt: z.string().optional()
|
|
522
|
+
});
|
|
523
|
+
var GeoCoordinatesSchema = z.object({
|
|
524
|
+
"@type": z.literal("GeoCoordinates").default("GeoCoordinates"),
|
|
525
|
+
latitude: z.number().min(-90).max(90),
|
|
526
|
+
longitude: z.number().min(-180).max(180)
|
|
527
|
+
});
|
|
528
|
+
var OpeningHoursSchema = z.object({
|
|
529
|
+
"@type": z.literal("OpeningHoursSpecification").default("OpeningHoursSpecification"),
|
|
530
|
+
dayOfWeek: z.union([
|
|
531
|
+
z.enum(["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]),
|
|
532
|
+
z.array(z.enum(["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]))
|
|
533
|
+
]),
|
|
534
|
+
opens: z.string().regex(/^([01]\d|2[0-3]):([0-5]\d)$/, "Must be in HH:MM format"),
|
|
535
|
+
closes: z.string().regex(/^([01]\d|2[0-3]):([0-5]\d)$/, "Must be in HH:MM format")
|
|
536
|
+
});
|
|
537
|
+
var LocalBusinessSchema = z.object({
|
|
538
|
+
"@context": z.literal("https://schema.org").default("https://schema.org"),
|
|
539
|
+
"@type": z.string().default("LocalBusiness"),
|
|
540
|
+
// Can be Restaurant, Store, etc.
|
|
541
|
+
// Required
|
|
542
|
+
name: z.string().min(1, "Business name is required"),
|
|
543
|
+
image: z.union([z.string().url(), z.array(z.string().url())]),
|
|
544
|
+
address: PostalAddressSchema,
|
|
545
|
+
// Recommended
|
|
546
|
+
"@id": z.string().url().optional(),
|
|
547
|
+
url: z.string().url().optional(),
|
|
548
|
+
telephone: z.string().optional(),
|
|
549
|
+
priceRange: z.string().optional(),
|
|
550
|
+
// e.g., "$$"
|
|
551
|
+
// Location
|
|
552
|
+
geo: GeoCoordinatesSchema.optional(),
|
|
553
|
+
// Opening hours
|
|
554
|
+
openingHoursSpecification: z.union([
|
|
555
|
+
OpeningHoursSchema,
|
|
556
|
+
z.array(OpeningHoursSchema)
|
|
557
|
+
]).optional(),
|
|
558
|
+
// Ratings
|
|
559
|
+
aggregateRating: z.object({
|
|
560
|
+
"@type": z.literal("AggregateRating"),
|
|
561
|
+
ratingValue: z.union([z.string(), z.number()]),
|
|
562
|
+
reviewCount: z.number().int().positive()
|
|
563
|
+
}).optional(),
|
|
564
|
+
// Additional info
|
|
565
|
+
description: z.string().min(10).optional(),
|
|
566
|
+
servesCuisine: z.union([z.string(), z.array(z.string())]).optional(),
|
|
567
|
+
// For restaurants
|
|
568
|
+
menu: z.string().url().optional(),
|
|
569
|
+
// For restaurants
|
|
570
|
+
acceptsReservations: z.union([z.boolean(), z.string()]).optional(),
|
|
571
|
+
paymentAccepted: z.string().optional()
|
|
572
|
+
});
|
|
573
|
+
var PlaceSchema = z.object({
|
|
574
|
+
"@type": z.literal("Place").default("Place"),
|
|
575
|
+
name: z.string(),
|
|
576
|
+
address: z.union([
|
|
577
|
+
z.string(),
|
|
578
|
+
z.object({
|
|
579
|
+
"@type": z.literal("PostalAddress"),
|
|
580
|
+
streetAddress: z.string().optional(),
|
|
581
|
+
addressLocality: z.string().optional(),
|
|
582
|
+
addressRegion: z.string().optional(),
|
|
583
|
+
postalCode: z.string().optional(),
|
|
584
|
+
addressCountry: z.string().optional()
|
|
585
|
+
})
|
|
586
|
+
]).optional()
|
|
587
|
+
});
|
|
588
|
+
var EventOfferSchema = z.object({
|
|
589
|
+
"@type": z.literal("Offer").default("Offer"),
|
|
590
|
+
url: z.string().url().optional(),
|
|
591
|
+
price: z.union([z.string(), z.number()]),
|
|
592
|
+
priceCurrency: z.string().length(3),
|
|
593
|
+
availability: z.string().url().optional(),
|
|
594
|
+
validFrom: z.string().optional()
|
|
595
|
+
});
|
|
596
|
+
var EventSchema = z.object({
|
|
597
|
+
"@context": z.literal("https://schema.org").default("https://schema.org"),
|
|
598
|
+
"@type": z.enum(["Event", "MusicEvent", "BusinessEvent", "SportsEvent", "TheaterEvent", "EducationEvent"]).default("Event"),
|
|
599
|
+
// Required
|
|
600
|
+
name: z.string().min(1, "Event name is required"),
|
|
601
|
+
startDate: z.string().regex(/^\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}[+-]\d{2}:\d{2})?$/, "Must be ISO 8601 format"),
|
|
602
|
+
// Recommended
|
|
603
|
+
endDate: z.string().regex(/^\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}[+-]\d{2}:\d{2})?$/, "Must be ISO 8601 format").optional(),
|
|
604
|
+
eventStatus: z.enum([
|
|
605
|
+
"https://schema.org/EventScheduled",
|
|
606
|
+
"https://schema.org/EventCancelled",
|
|
607
|
+
"https://schema.org/EventMovedOnline",
|
|
608
|
+
"https://schema.org/EventPostponed",
|
|
609
|
+
"https://schema.org/EventRescheduled"
|
|
610
|
+
]).optional(),
|
|
611
|
+
eventAttendanceMode: z.enum([
|
|
612
|
+
"https://schema.org/OfflineEventAttendanceMode",
|
|
613
|
+
"https://schema.org/OnlineEventAttendanceMode",
|
|
614
|
+
"https://schema.org/MixedEventAttendanceMode"
|
|
615
|
+
]).optional(),
|
|
616
|
+
location: z.union([PlaceSchema, z.string().url()]).optional(),
|
|
617
|
+
// Can be Place or VirtualLocation URL
|
|
618
|
+
image: z.union([z.string().url(), z.array(z.string().url())]).optional(),
|
|
619
|
+
description: z.string().min(10).optional(),
|
|
620
|
+
// Organizer
|
|
621
|
+
organizer: z.object({
|
|
622
|
+
"@type": z.enum(["Organization", "Person"]),
|
|
623
|
+
name: z.string(),
|
|
624
|
+
url: z.string().url().optional()
|
|
625
|
+
}).optional(),
|
|
626
|
+
// Performer
|
|
627
|
+
performer: z.union([
|
|
628
|
+
z.object({
|
|
629
|
+
"@type": z.enum(["Person", "PerformingGroup"]),
|
|
630
|
+
name: z.string()
|
|
631
|
+
}),
|
|
632
|
+
z.array(z.object({
|
|
633
|
+
"@type": z.enum(["Person", "PerformingGroup"]),
|
|
634
|
+
name: z.string()
|
|
635
|
+
}))
|
|
636
|
+
]).optional(),
|
|
637
|
+
// Offers (tickets)
|
|
638
|
+
offers: z.union([EventOfferSchema, z.array(EventOfferSchema)]).optional(),
|
|
639
|
+
// Additional
|
|
640
|
+
url: z.string().url().optional()
|
|
641
|
+
});
|
|
642
|
+
|
|
643
|
+
// src/auditor/schema-validator.ts
|
|
644
|
+
var SCHEMA_VALIDATORS = {
|
|
645
|
+
Article: ArticleSchema,
|
|
646
|
+
Product: ProductSchema,
|
|
647
|
+
Organization: OrganizationSchema,
|
|
648
|
+
Person: PersonSchema,
|
|
649
|
+
LocalBusiness: LocalBusinessSchema,
|
|
650
|
+
Event: EventSchema,
|
|
651
|
+
WebPage: WebPageSchema,
|
|
652
|
+
FAQPage: FAQPageSchema,
|
|
653
|
+
BreadcrumbList: BreadcrumbListSchema
|
|
654
|
+
};
|
|
655
|
+
var REQUIRED_FIELDS = {
|
|
656
|
+
Article: ["@type", "@context", "headline", "author", "datePublished"],
|
|
657
|
+
Product: ["@type", "@context", "name"],
|
|
658
|
+
Organization: ["@type", "@context", "name"],
|
|
659
|
+
Person: ["@type", "@context", "name"],
|
|
660
|
+
LocalBusiness: ["@type", "@context", "name", "address"],
|
|
661
|
+
Event: ["@type", "@context", "name", "startDate", "location"],
|
|
662
|
+
WebPage: ["@type", "@context", "name"],
|
|
663
|
+
FAQ: ["@type", "@context", "mainEntity"],
|
|
664
|
+
Breadcrumb: ["@type", "@context", "itemListElement"]
|
|
665
|
+
};
|
|
666
|
+
var RECOMMENDED_FIELDS = {
|
|
667
|
+
Article: ["image", "dateModified", "publisher", "description"],
|
|
668
|
+
Product: ["image", "price", "availability", "brand", "description"],
|
|
669
|
+
Organization: ["url", "logo", "sameAs", "contactPoint"],
|
|
670
|
+
Person: ["image", "url", "sameAs", "jobTitle"],
|
|
671
|
+
LocalBusiness: ["telephone", "geo", "openingHours", "image"],
|
|
672
|
+
Event: ["description", "image", "organizer", "eventStatus"],
|
|
673
|
+
WebPage: ["description", "url"],
|
|
674
|
+
FAQ: [],
|
|
675
|
+
// FAQ no tiene campos especialmente recomendados adicionales
|
|
676
|
+
Breadcrumb: []
|
|
677
|
+
// Breadcrumb es simple
|
|
678
|
+
};
|
|
679
|
+
var DEPRECATED_FIELDS = {
|
|
680
|
+
Article: ["version"],
|
|
681
|
+
// 'version' ya no se usa en Article
|
|
682
|
+
Organization: ["founder"],
|
|
683
|
+
// Bajo valor semántico para GEO
|
|
684
|
+
Product: ["model"]
|
|
685
|
+
// Confuso, mejor usar 'name' o 'mpn'
|
|
686
|
+
};
|
|
687
|
+
async function validateSchemas(scanResult, options = {}) {
|
|
688
|
+
const fileResults = [];
|
|
689
|
+
let totalSchemas = 0;
|
|
690
|
+
let validSchemas = 0;
|
|
691
|
+
let totalErrors = 0;
|
|
692
|
+
let totalWarnings = 0;
|
|
693
|
+
for (const file of scanResult.filesWithSchemas) {
|
|
694
|
+
const schemasToValidate = options.schemaTypes ? file.schemas.filter((s) => options.schemaTypes.includes(s.type)) : file.schemas;
|
|
695
|
+
const fileIssues = [];
|
|
696
|
+
for (const schema of schemasToValidate) {
|
|
697
|
+
totalSchemas++;
|
|
698
|
+
const issues = validateSchema(schema);
|
|
699
|
+
schema.issues = issues;
|
|
700
|
+
const errors = issues.filter((i) => i.severity === "error").length;
|
|
701
|
+
const warnings = issues.filter((i) => i.severity === "warning").length;
|
|
702
|
+
totalErrors += errors;
|
|
703
|
+
totalWarnings += warnings;
|
|
704
|
+
if (errors === 0) {
|
|
705
|
+
validSchemas++;
|
|
706
|
+
}
|
|
707
|
+
fileIssues.push(...issues);
|
|
708
|
+
}
|
|
709
|
+
const fileValid = fileIssues.filter((i) => i.severity === "error").length === 0 && (!options.strict || fileIssues.filter((i) => i.severity === "warning").length === 0);
|
|
710
|
+
fileResults.push({
|
|
711
|
+
filePath: file.filePath,
|
|
712
|
+
valid: fileValid,
|
|
713
|
+
schemas: schemasToValidate,
|
|
714
|
+
issues: fileIssues
|
|
715
|
+
});
|
|
716
|
+
}
|
|
717
|
+
const commonIssues = generateCommonIssues(fileResults);
|
|
718
|
+
const valid = totalErrors === 0 && (!options.strict || totalWarnings === 0);
|
|
719
|
+
return {
|
|
720
|
+
valid,
|
|
721
|
+
totalSchemas,
|
|
722
|
+
validSchemas,
|
|
723
|
+
errors: totalErrors,
|
|
724
|
+
warnings: totalWarnings,
|
|
725
|
+
fileResults,
|
|
726
|
+
commonIssues
|
|
727
|
+
};
|
|
728
|
+
}
|
|
729
|
+
function validateSchema(schema) {
|
|
730
|
+
const issues = [];
|
|
731
|
+
const schemaType = schema.type;
|
|
732
|
+
if (!schema.schema) {
|
|
733
|
+
issues.push({
|
|
734
|
+
severity: "info",
|
|
735
|
+
message: `Could not parse schema for deep validation. Using '${schema.generatorFunction}' at line ${schema.line}.`
|
|
736
|
+
});
|
|
737
|
+
return issues;
|
|
738
|
+
}
|
|
739
|
+
const data = schema.schema;
|
|
740
|
+
const requiredFields = REQUIRED_FIELDS[schemaType] || [];
|
|
741
|
+
for (const field of requiredFields) {
|
|
742
|
+
if (!hasField(data, field)) {
|
|
743
|
+
const autoFix = generateAutoFix(field, schemaType);
|
|
744
|
+
issues.push({
|
|
745
|
+
severity: "error",
|
|
746
|
+
field,
|
|
747
|
+
message: `Required field '${field}' is missing`,
|
|
748
|
+
fix: `Add '${field}' field to your ${schemaType} schema`,
|
|
749
|
+
...autoFix && { autoFixCode: autoFix }
|
|
750
|
+
});
|
|
751
|
+
}
|
|
752
|
+
}
|
|
753
|
+
const recommendedFields = RECOMMENDED_FIELDS[schemaType] || [];
|
|
754
|
+
for (const field of recommendedFields) {
|
|
755
|
+
if (!hasField(data, field)) {
|
|
756
|
+
issues.push({
|
|
757
|
+
severity: "warning",
|
|
758
|
+
field,
|
|
759
|
+
message: `Recommended field '${field}' is missing`,
|
|
760
|
+
fix: `Consider adding '${field}' for better SEO and GEO`
|
|
761
|
+
});
|
|
762
|
+
}
|
|
763
|
+
}
|
|
764
|
+
const deprecatedFields = DEPRECATED_FIELDS[schemaType] || [];
|
|
765
|
+
for (const field of deprecatedFields) {
|
|
766
|
+
if (hasField(data, field)) {
|
|
767
|
+
issues.push({
|
|
768
|
+
severity: "info",
|
|
769
|
+
field,
|
|
770
|
+
message: `Field '${field}' is deprecated or has low semantic value`,
|
|
771
|
+
fix: `Consider removing '${field}' to reduce token usage`
|
|
772
|
+
});
|
|
773
|
+
}
|
|
774
|
+
}
|
|
775
|
+
const validator = SCHEMA_VALIDATORS[schemaType];
|
|
776
|
+
if (validator) {
|
|
777
|
+
try {
|
|
778
|
+
validator.parse(data);
|
|
779
|
+
} catch (error) {
|
|
780
|
+
if (error.errors) {
|
|
781
|
+
for (const zodError of error.errors) {
|
|
782
|
+
issues.push({
|
|
783
|
+
severity: "error",
|
|
784
|
+
field: zodError.path.join("."),
|
|
785
|
+
message: zodError.message,
|
|
786
|
+
fix: `Fix type or format of '${zodError.path.join(".")}'`
|
|
787
|
+
});
|
|
788
|
+
}
|
|
789
|
+
}
|
|
790
|
+
}
|
|
791
|
+
}
|
|
792
|
+
issues.push(...validateSchemaSpecificRules(schemaType, data));
|
|
793
|
+
return issues;
|
|
794
|
+
}
|
|
795
|
+
function validateSchemaSpecificRules(schemaType, data) {
|
|
796
|
+
const issues = [];
|
|
797
|
+
switch (schemaType) {
|
|
798
|
+
case "Article":
|
|
799
|
+
if (data.datePublished && data.dateModified) {
|
|
800
|
+
const published = new Date(data.datePublished);
|
|
801
|
+
const modified = new Date(data.dateModified);
|
|
802
|
+
if (published > modified) {
|
|
803
|
+
issues.push({
|
|
804
|
+
severity: "error",
|
|
805
|
+
field: "dateModified",
|
|
806
|
+
message: "dateModified must be after datePublished",
|
|
807
|
+
fix: "Ensure dateModified is later than datePublished"
|
|
808
|
+
});
|
|
809
|
+
}
|
|
810
|
+
}
|
|
811
|
+
if (data.wordCount && (data.wordCount < 100 || data.wordCount > 1e4)) {
|
|
812
|
+
issues.push({
|
|
813
|
+
severity: "warning",
|
|
814
|
+
field: "wordCount",
|
|
815
|
+
message: `wordCount seems unusual: ${data.wordCount}`,
|
|
816
|
+
fix: "Verify wordCount is calculated correctly"
|
|
817
|
+
});
|
|
818
|
+
}
|
|
819
|
+
break;
|
|
820
|
+
case "Product":
|
|
821
|
+
if (data.price && !data.priceCurrency) {
|
|
822
|
+
issues.push({
|
|
823
|
+
severity: "error",
|
|
824
|
+
field: "priceCurrency",
|
|
825
|
+
message: "priceCurrency is required when price is specified",
|
|
826
|
+
fix: 'Add priceCurrency (e.g., "USD", "EUR")'
|
|
827
|
+
});
|
|
828
|
+
}
|
|
829
|
+
if (data.availability) {
|
|
830
|
+
const validAvailability = [
|
|
831
|
+
"InStock",
|
|
832
|
+
"OutOfStock",
|
|
833
|
+
"PreOrder",
|
|
834
|
+
"Discontinued",
|
|
835
|
+
"SoldOut"
|
|
836
|
+
];
|
|
837
|
+
if (!validAvailability.includes(data.availability)) {
|
|
838
|
+
issues.push({
|
|
839
|
+
severity: "warning",
|
|
840
|
+
field: "availability",
|
|
841
|
+
message: `availability value '${data.availability}' is not standard`,
|
|
842
|
+
fix: `Use one of: ${validAvailability.join(", ")}`
|
|
843
|
+
});
|
|
844
|
+
}
|
|
845
|
+
}
|
|
846
|
+
break;
|
|
847
|
+
case "Organization":
|
|
848
|
+
case "LocalBusiness":
|
|
849
|
+
if (data.logo && typeof data.logo === "string") {
|
|
850
|
+
if (!data.logo.startsWith("https://")) {
|
|
851
|
+
issues.push({
|
|
852
|
+
severity: "warning",
|
|
853
|
+
field: "logo",
|
|
854
|
+
message: "Logo URL should use HTTPS",
|
|
855
|
+
fix: "Change logo URL to HTTPS"
|
|
856
|
+
});
|
|
857
|
+
}
|
|
858
|
+
}
|
|
859
|
+
if (data.telephone && typeof data.telephone === "string") {
|
|
860
|
+
if (!/^\+?[\d\s\-()]+$/.test(data.telephone)) {
|
|
861
|
+
issues.push({
|
|
862
|
+
severity: "info",
|
|
863
|
+
field: "telephone",
|
|
864
|
+
message: "Telephone format may not be standard",
|
|
865
|
+
fix: "Use international format (e.g., +1-555-1234)"
|
|
866
|
+
});
|
|
867
|
+
}
|
|
868
|
+
}
|
|
869
|
+
break;
|
|
870
|
+
case "Event":
|
|
871
|
+
if (data.startDate && data.endDate) {
|
|
872
|
+
const start = new Date(data.startDate);
|
|
873
|
+
const end = new Date(data.endDate);
|
|
874
|
+
if (start > end) {
|
|
875
|
+
issues.push({
|
|
876
|
+
severity: "error",
|
|
877
|
+
field: "endDate",
|
|
878
|
+
message: "endDate must be after startDate",
|
|
879
|
+
fix: "Ensure endDate is later than startDate"
|
|
880
|
+
});
|
|
881
|
+
}
|
|
882
|
+
}
|
|
883
|
+
break;
|
|
884
|
+
}
|
|
885
|
+
return issues;
|
|
886
|
+
}
|
|
887
|
+
function hasField(obj, fieldPath) {
|
|
888
|
+
const parts = fieldPath.split(".");
|
|
889
|
+
let current = obj;
|
|
890
|
+
for (const part of parts) {
|
|
891
|
+
if (!current || typeof current !== "object" || !(part in current)) {
|
|
892
|
+
return false;
|
|
893
|
+
}
|
|
894
|
+
current = current[part];
|
|
895
|
+
}
|
|
896
|
+
return current !== void 0 && current !== null;
|
|
897
|
+
}
|
|
898
|
+
function generateAutoFix(field, schemaType) {
|
|
899
|
+
const fixes = {
|
|
900
|
+
"@context": '"@context": "https://schema.org"',
|
|
901
|
+
"@type": `"@type": "${schemaType}"`,
|
|
902
|
+
headline: '"headline": "Your article title"',
|
|
903
|
+
author: '"author": "Author Name"',
|
|
904
|
+
datePublished: `"datePublished": new Date().toISOString()`,
|
|
905
|
+
name: '"name": "Item name"'
|
|
906
|
+
};
|
|
907
|
+
return fixes[field];
|
|
908
|
+
}
|
|
909
|
+
function generateCommonIssues(fileResults) {
|
|
910
|
+
const issueMap = /* @__PURE__ */ new Map();
|
|
911
|
+
for (const fileResult of fileResults) {
|
|
912
|
+
for (const issue of fileResult.issues) {
|
|
913
|
+
const key = `${issue.severity}:${issue.message}`;
|
|
914
|
+
if (issueMap.has(key)) {
|
|
915
|
+
const existing = issueMap.get(key);
|
|
916
|
+
existing.count++;
|
|
917
|
+
if (!existing.affectedFiles.includes(fileResult.filePath)) {
|
|
918
|
+
existing.affectedFiles.push(fileResult.filePath);
|
|
919
|
+
}
|
|
920
|
+
} else {
|
|
921
|
+
issueMap.set(key, {
|
|
922
|
+
type: issue.message,
|
|
923
|
+
count: 1,
|
|
924
|
+
severity: issue.severity,
|
|
925
|
+
affectedFiles: [fileResult.filePath]
|
|
926
|
+
});
|
|
927
|
+
}
|
|
928
|
+
}
|
|
929
|
+
}
|
|
930
|
+
return Array.from(issueMap.values()).sort((a, b) => b.count - a.count);
|
|
931
|
+
}
|
|
932
|
+
function validateSingleSchema(schemaType, schemaData) {
|
|
933
|
+
const foundSchema = {
|
|
934
|
+
type: schemaType,
|
|
935
|
+
generatorFunction: "manual",
|
|
936
|
+
line: 0,
|
|
937
|
+
schema: schemaData};
|
|
938
|
+
return validateSchema(foundSchema);
|
|
939
|
+
}
|
|
940
|
+
function generateCoverageReport(scanResult, validationResult, optimizations = []) {
|
|
941
|
+
const totalFiles = scanResult.filesScanned;
|
|
942
|
+
const filesWithSchemas = scanResult.filesWithSchemas.length;
|
|
943
|
+
const routesWithoutSchemas = scanResult.routesWithoutSchemas.length;
|
|
944
|
+
const totalSchemas = validationResult.totalSchemas;
|
|
945
|
+
const totalRoutes = filesWithSchemas + routesWithoutSchemas;
|
|
946
|
+
const coverage = totalRoutes > 0 ? filesWithSchemas / totalRoutes * 100 : 0;
|
|
947
|
+
const schemasByType = {};
|
|
948
|
+
for (const file of scanResult.filesWithSchemas) {
|
|
949
|
+
for (const schema of file.schemas) {
|
|
950
|
+
schemasByType[schema.type] = (schemasByType[schema.type] || 0) + 1;
|
|
951
|
+
}
|
|
952
|
+
}
|
|
953
|
+
const score = calculateProjectScore(scanResult, validationResult, coverage);
|
|
954
|
+
const allIssues = validationResult.fileResults.flatMap((f) => f.issues);
|
|
955
|
+
return {
|
|
956
|
+
project: {
|
|
957
|
+
root: scanResult.projectRoot,
|
|
958
|
+
...scanResult.framework && { framework: scanResult.framework },
|
|
959
|
+
scannedAt: scanResult.scannedAt
|
|
960
|
+
},
|
|
961
|
+
stats: {
|
|
962
|
+
totalFiles,
|
|
963
|
+
filesWithSchemas,
|
|
964
|
+
routesWithoutSchemas,
|
|
965
|
+
totalSchemas,
|
|
966
|
+
coverage: Math.round(coverage)
|
|
967
|
+
},
|
|
968
|
+
schemasByType,
|
|
969
|
+
score,
|
|
970
|
+
files: scanResult.filesWithSchemas,
|
|
971
|
+
missingSchemas: scanResult.routesWithoutSchemas,
|
|
972
|
+
issues: allIssues,
|
|
973
|
+
optimizations
|
|
974
|
+
};
|
|
975
|
+
}
|
|
976
|
+
function calculateProjectScore(_scanResult, validationResult, coverage) {
|
|
977
|
+
const presenceScore = coverage;
|
|
978
|
+
const totalSchemas = validationResult.totalSchemas;
|
|
979
|
+
const validSchemas = validationResult.validSchemas;
|
|
980
|
+
const validityScore = totalSchemas > 0 ? validSchemas / totalSchemas * 100 : 0;
|
|
981
|
+
const totalWarnings = validationResult.warnings;
|
|
982
|
+
const maxExpectedWarnings = totalSchemas * 2;
|
|
983
|
+
const completenessScore = Math.max(
|
|
984
|
+
0,
|
|
985
|
+
100 - totalWarnings / Math.max(1, maxExpectedWarnings) * 100
|
|
986
|
+
);
|
|
987
|
+
const infoIssues = validationResult.fileResults.flatMap(
|
|
988
|
+
(f) => f.issues.filter((i) => i.severity === "info")
|
|
989
|
+
).length;
|
|
990
|
+
const optimizationScore = Math.max(0, 100 - infoIssues * 10);
|
|
991
|
+
const total = Math.round(
|
|
992
|
+
presenceScore * 0.25 + validityScore * 0.35 + completenessScore * 0.25 + optimizationScore * 0.15
|
|
993
|
+
);
|
|
994
|
+
return {
|
|
995
|
+
total: Math.min(100, Math.max(0, total)),
|
|
996
|
+
breakdown: {
|
|
997
|
+
presence: Math.round(presenceScore),
|
|
998
|
+
validity: Math.round(validityScore),
|
|
999
|
+
completeness: Math.round(completenessScore),
|
|
1000
|
+
optimization: Math.round(optimizationScore)
|
|
1001
|
+
}
|
|
1002
|
+
};
|
|
1003
|
+
}
|
|
1004
|
+
async function saveReport(report, options = {}) {
|
|
1005
|
+
const format = options.format || "json";
|
|
1006
|
+
let content;
|
|
1007
|
+
switch (format) {
|
|
1008
|
+
case "json":
|
|
1009
|
+
content = generateJSONReport(report);
|
|
1010
|
+
break;
|
|
1011
|
+
case "html":
|
|
1012
|
+
content = generateHTMLReport(report);
|
|
1013
|
+
break;
|
|
1014
|
+
case "markdown":
|
|
1015
|
+
content = generateMarkdownReport(report);
|
|
1016
|
+
break;
|
|
1017
|
+
default:
|
|
1018
|
+
throw new Error(`Unknown format: ${format}`);
|
|
1019
|
+
}
|
|
1020
|
+
if (options.output) {
|
|
1021
|
+
await fs.writeFile(options.output, content, "utf-8");
|
|
1022
|
+
} else {
|
|
1023
|
+
console.log(content);
|
|
1024
|
+
}
|
|
1025
|
+
}
|
|
1026
|
+
function generateJSONReport(report) {
|
|
1027
|
+
return JSON.stringify(report, null, 2);
|
|
1028
|
+
}
|
|
1029
|
+
function generateMarkdownReport(report) {
|
|
1030
|
+
const { project, stats, score, schemasByType, issues, missingSchemas } = report;
|
|
1031
|
+
let md = `# JSON-LD Audit Report
|
|
1032
|
+
|
|
1033
|
+
`;
|
|
1034
|
+
md += `**Project:** ${project.root}
|
|
1035
|
+
`;
|
|
1036
|
+
md += `**Framework:** ${project.framework || "Unknown"}
|
|
1037
|
+
`;
|
|
1038
|
+
md += `**Date:** ${new Date(project.scannedAt).toLocaleString()}
|
|
1039
|
+
|
|
1040
|
+
`;
|
|
1041
|
+
md += `## Score: ${score.total}/100
|
|
1042
|
+
|
|
1043
|
+
`;
|
|
1044
|
+
md += `- **Presence:** ${score.breakdown.presence}/100 (${stats.coverage}% coverage)
|
|
1045
|
+
`;
|
|
1046
|
+
md += `- **Validity:** ${score.breakdown.validity}/100
|
|
1047
|
+
`;
|
|
1048
|
+
md += `- **Completeness:** ${score.breakdown.completeness}/100
|
|
1049
|
+
`;
|
|
1050
|
+
md += `- **Optimization:** ${score.breakdown.optimization}/100
|
|
1051
|
+
|
|
1052
|
+
`;
|
|
1053
|
+
md += `## Statistics
|
|
1054
|
+
|
|
1055
|
+
`;
|
|
1056
|
+
md += `- Total files scanned: ${stats.totalFiles}
|
|
1057
|
+
`;
|
|
1058
|
+
md += `- Files with schemas: ${stats.filesWithSchemas}
|
|
1059
|
+
`;
|
|
1060
|
+
md += `- Routes without schemas: ${stats.routesWithoutSchemas}
|
|
1061
|
+
`;
|
|
1062
|
+
md += `- Total schemas: ${stats.totalSchemas}
|
|
1063
|
+
|
|
1064
|
+
`;
|
|
1065
|
+
md += `## Schemas by Type
|
|
1066
|
+
|
|
1067
|
+
`;
|
|
1068
|
+
for (const [type, count] of Object.entries(schemasByType)) {
|
|
1069
|
+
md += `- **${type}**: ${count}
|
|
1070
|
+
`;
|
|
1071
|
+
}
|
|
1072
|
+
md += `
|
|
1073
|
+
`;
|
|
1074
|
+
if (issues.length > 0) {
|
|
1075
|
+
md += `## Issues Found
|
|
1076
|
+
|
|
1077
|
+
`;
|
|
1078
|
+
const errors = issues.filter((i) => i.severity === "error");
|
|
1079
|
+
const warnings = issues.filter((i) => i.severity === "warning");
|
|
1080
|
+
if (errors.length > 0) {
|
|
1081
|
+
md += `### Errors (${errors.length})
|
|
1082
|
+
|
|
1083
|
+
`;
|
|
1084
|
+
errors.slice(0, 10).forEach((issue) => {
|
|
1085
|
+
md += `- **${issue.field || "General"}**: ${issue.message}
|
|
1086
|
+
`;
|
|
1087
|
+
if (issue.fix) md += ` - Fix: ${issue.fix}
|
|
1088
|
+
`;
|
|
1089
|
+
});
|
|
1090
|
+
if (errors.length > 10) {
|
|
1091
|
+
md += `
|
|
1092
|
+
... and ${errors.length - 10} more errors
|
|
1093
|
+
`;
|
|
1094
|
+
}
|
|
1095
|
+
md += `
|
|
1096
|
+
`;
|
|
1097
|
+
}
|
|
1098
|
+
if (warnings.length > 0) {
|
|
1099
|
+
md += `### Warnings (${warnings.length})
|
|
1100
|
+
|
|
1101
|
+
`;
|
|
1102
|
+
warnings.slice(0, 10).forEach((issue) => {
|
|
1103
|
+
md += `- **${issue.field || "General"}**: ${issue.message}
|
|
1104
|
+
`;
|
|
1105
|
+
if (issue.fix) md += ` - Suggestion: ${issue.fix}
|
|
1106
|
+
`;
|
|
1107
|
+
});
|
|
1108
|
+
if (warnings.length > 10) {
|
|
1109
|
+
md += `
|
|
1110
|
+
... and ${warnings.length - 10} more warnings
|
|
1111
|
+
`;
|
|
1112
|
+
}
|
|
1113
|
+
md += `
|
|
1114
|
+
`;
|
|
1115
|
+
}
|
|
1116
|
+
}
|
|
1117
|
+
if (missingSchemas.length > 0) {
|
|
1118
|
+
md += `## Missing Schemas
|
|
1119
|
+
|
|
1120
|
+
`;
|
|
1121
|
+
md += `Routes that should have schemas but don't:
|
|
1122
|
+
|
|
1123
|
+
`;
|
|
1124
|
+
missingSchemas.slice(0, 10).forEach((route) => {
|
|
1125
|
+
md += `- **${route.filePath}**`;
|
|
1126
|
+
if (route.suggestedSchemaType) {
|
|
1127
|
+
md += ` \u2192 Suggested: ${route.suggestedSchemaType}`;
|
|
1128
|
+
}
|
|
1129
|
+
md += `
|
|
1130
|
+
`;
|
|
1131
|
+
});
|
|
1132
|
+
if (missingSchemas.length > 10) {
|
|
1133
|
+
md += `
|
|
1134
|
+
... and ${missingSchemas.length - 10} more routes
|
|
1135
|
+
`;
|
|
1136
|
+
}
|
|
1137
|
+
md += `
|
|
1138
|
+
`;
|
|
1139
|
+
}
|
|
1140
|
+
md += `---
|
|
1141
|
+
|
|
1142
|
+
`;
|
|
1143
|
+
md += `Generated by [geo-semantic-layer](https://www.npmjs.com/package/geo-semantic-layer)
|
|
1144
|
+
`;
|
|
1145
|
+
return md;
|
|
1146
|
+
}
|
|
1147
|
+
function generateHTMLReport(report) {
|
|
1148
|
+
const { project, stats, score, schemasByType, issues, missingSchemas } = report;
|
|
1149
|
+
const errors = issues.filter((i) => i.severity === "error");
|
|
1150
|
+
const warnings = issues.filter((i) => i.severity === "warning");
|
|
1151
|
+
return `<!DOCTYPE html>
|
|
1152
|
+
<html lang="en">
|
|
1153
|
+
<head>
|
|
1154
|
+
<meta charset="UTF-8">
|
|
1155
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
1156
|
+
<title>JSON-LD Audit Report</title>
|
|
1157
|
+
<style>
|
|
1158
|
+
* { margin: 0; padding: 0; box-sizing: border-box; }
|
|
1159
|
+
body {
|
|
1160
|
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
|
1161
|
+
line-height: 1.6;
|
|
1162
|
+
color: #333;
|
|
1163
|
+
background: #f5f5f5;
|
|
1164
|
+
padding: 20px;
|
|
1165
|
+
}
|
|
1166
|
+
.container {
|
|
1167
|
+
max-width: 1200px;
|
|
1168
|
+
margin: 0 auto;
|
|
1169
|
+
background: white;
|
|
1170
|
+
border-radius: 8px;
|
|
1171
|
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
|
1172
|
+
overflow: hidden;
|
|
1173
|
+
}
|
|
1174
|
+
header {
|
|
1175
|
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
|
1176
|
+
color: white;
|
|
1177
|
+
padding: 30px;
|
|
1178
|
+
}
|
|
1179
|
+
h1 { font-size: 32px; margin-bottom: 10px; }
|
|
1180
|
+
.meta { opacity: 0.9; font-size: 14px; }
|
|
1181
|
+
.score-section {
|
|
1182
|
+
padding: 30px;
|
|
1183
|
+
background: #f9fafb;
|
|
1184
|
+
border-bottom: 1px solid #e5e7eb;
|
|
1185
|
+
}
|
|
1186
|
+
.score-card {
|
|
1187
|
+
display: flex;
|
|
1188
|
+
align-items: center;
|
|
1189
|
+
gap: 30px;
|
|
1190
|
+
}
|
|
1191
|
+
.score-circle {
|
|
1192
|
+
width: 120px;
|
|
1193
|
+
height: 120px;
|
|
1194
|
+
border-radius: 50%;
|
|
1195
|
+
display: flex;
|
|
1196
|
+
align-items: center;
|
|
1197
|
+
justify-content: center;
|
|
1198
|
+
font-size: 36px;
|
|
1199
|
+
font-weight: bold;
|
|
1200
|
+
color: white;
|
|
1201
|
+
flex-shrink: 0;
|
|
1202
|
+
}
|
|
1203
|
+
.score-excellent { background: #10b981; }
|
|
1204
|
+
.score-good { background: #3b82f6; }
|
|
1205
|
+
.score-warning { background: #f59e0b; }
|
|
1206
|
+
.score-poor { background: #ef4444; }
|
|
1207
|
+
.score-breakdown {
|
|
1208
|
+
flex: 1;
|
|
1209
|
+
display: grid;
|
|
1210
|
+
grid-template-columns: repeat(2, 1fr);
|
|
1211
|
+
gap: 15px;
|
|
1212
|
+
}
|
|
1213
|
+
.score-item {
|
|
1214
|
+
background: white;
|
|
1215
|
+
padding: 15px;
|
|
1216
|
+
border-radius: 6px;
|
|
1217
|
+
border-left: 4px solid #667eea;
|
|
1218
|
+
}
|
|
1219
|
+
.score-item h3 { font-size: 14px; color: #6b7280; margin-bottom: 5px; }
|
|
1220
|
+
.score-item .value { font-size: 24px; font-weight: bold; color: #111827; }
|
|
1221
|
+
.stats-grid {
|
|
1222
|
+
display: grid;
|
|
1223
|
+
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
|
|
1224
|
+
gap: 20px;
|
|
1225
|
+
padding: 30px;
|
|
1226
|
+
}
|
|
1227
|
+
.stat-card {
|
|
1228
|
+
background: #f9fafb;
|
|
1229
|
+
padding: 20px;
|
|
1230
|
+
border-radius: 8px;
|
|
1231
|
+
border: 1px solid #e5e7eb;
|
|
1232
|
+
}
|
|
1233
|
+
.stat-card h3 { font-size: 14px; color: #6b7280; margin-bottom: 10px; }
|
|
1234
|
+
.stat-card .value { font-size: 32px; font-weight: bold; color: #111827; }
|
|
1235
|
+
.section {
|
|
1236
|
+
padding: 30px;
|
|
1237
|
+
border-bottom: 1px solid #e5e7eb;
|
|
1238
|
+
}
|
|
1239
|
+
.section h2 {
|
|
1240
|
+
font-size: 20px;
|
|
1241
|
+
margin-bottom: 20px;
|
|
1242
|
+
color: #111827;
|
|
1243
|
+
}
|
|
1244
|
+
.issue-list {
|
|
1245
|
+
display: flex;
|
|
1246
|
+
flex-direction: column;
|
|
1247
|
+
gap: 12px;
|
|
1248
|
+
}
|
|
1249
|
+
.issue {
|
|
1250
|
+
padding: 15px;
|
|
1251
|
+
border-radius: 6px;
|
|
1252
|
+
border-left: 4px solid;
|
|
1253
|
+
}
|
|
1254
|
+
.issue-error {
|
|
1255
|
+
background: #fef2f2;
|
|
1256
|
+
border-left-color: #ef4444;
|
|
1257
|
+
}
|
|
1258
|
+
.issue-warning {
|
|
1259
|
+
background: #fffbeb;
|
|
1260
|
+
border-left-color: #f59e0b;
|
|
1261
|
+
}
|
|
1262
|
+
.issue-info {
|
|
1263
|
+
background: #eff6ff;
|
|
1264
|
+
border-left-color: #3b82f6;
|
|
1265
|
+
}
|
|
1266
|
+
.issue-header {
|
|
1267
|
+
display: flex;
|
|
1268
|
+
align-items: center;
|
|
1269
|
+
gap: 10px;
|
|
1270
|
+
margin-bottom: 8px;
|
|
1271
|
+
}
|
|
1272
|
+
.issue-badge {
|
|
1273
|
+
padding: 4px 8px;
|
|
1274
|
+
border-radius: 4px;
|
|
1275
|
+
font-size: 11px;
|
|
1276
|
+
font-weight: 600;
|
|
1277
|
+
text-transform: uppercase;
|
|
1278
|
+
}
|
|
1279
|
+
.badge-error { background: #ef4444; color: white; }
|
|
1280
|
+
.badge-warning { background: #f59e0b; color: white; }
|
|
1281
|
+
.badge-info { background: #3b82f6; color: white; }
|
|
1282
|
+
.issue-field {
|
|
1283
|
+
font-family: 'Courier New', monospace;
|
|
1284
|
+
font-size: 13px;
|
|
1285
|
+
color: #6b7280;
|
|
1286
|
+
}
|
|
1287
|
+
.issue-message { font-size: 14px; color: #111827; margin-bottom: 6px; }
|
|
1288
|
+
.issue-fix {
|
|
1289
|
+
font-size: 13px;
|
|
1290
|
+
color: #059669;
|
|
1291
|
+
font-style: italic;
|
|
1292
|
+
}
|
|
1293
|
+
.file-list {
|
|
1294
|
+
display: flex;
|
|
1295
|
+
flex-direction: column;
|
|
1296
|
+
gap: 10px;
|
|
1297
|
+
}
|
|
1298
|
+
.file-item {
|
|
1299
|
+
padding: 12px;
|
|
1300
|
+
background: #f9fafb;
|
|
1301
|
+
border-radius: 6px;
|
|
1302
|
+
border: 1px solid #e5e7eb;
|
|
1303
|
+
font-family: 'Courier New', monospace;
|
|
1304
|
+
font-size: 13px;
|
|
1305
|
+
}
|
|
1306
|
+
.schema-type {
|
|
1307
|
+
display: inline-block;
|
|
1308
|
+
padding: 2px 8px;
|
|
1309
|
+
background: #667eea;
|
|
1310
|
+
color: white;
|
|
1311
|
+
border-radius: 4px;
|
|
1312
|
+
font-size: 11px;
|
|
1313
|
+
margin-left: 8px;
|
|
1314
|
+
}
|
|
1315
|
+
footer {
|
|
1316
|
+
padding: 20px 30px;
|
|
1317
|
+
background: #f9fafb;
|
|
1318
|
+
text-align: center;
|
|
1319
|
+
color: #6b7280;
|
|
1320
|
+
font-size: 14px;
|
|
1321
|
+
}
|
|
1322
|
+
.chart {
|
|
1323
|
+
width: 100%;
|
|
1324
|
+
max-width: 400px;
|
|
1325
|
+
margin: 0 auto;
|
|
1326
|
+
}
|
|
1327
|
+
.chart-bar {
|
|
1328
|
+
display: flex;
|
|
1329
|
+
align-items: center;
|
|
1330
|
+
gap: 10px;
|
|
1331
|
+
margin-bottom: 12px;
|
|
1332
|
+
}
|
|
1333
|
+
.chart-label {
|
|
1334
|
+
width: 100px;
|
|
1335
|
+
text-align: right;
|
|
1336
|
+
font-size: 14px;
|
|
1337
|
+
color: #6b7280;
|
|
1338
|
+
}
|
|
1339
|
+
.chart-track {
|
|
1340
|
+
flex: 1;
|
|
1341
|
+
height: 24px;
|
|
1342
|
+
background: #e5e7eb;
|
|
1343
|
+
border-radius: 4px;
|
|
1344
|
+
overflow: hidden;
|
|
1345
|
+
}
|
|
1346
|
+
.chart-fill {
|
|
1347
|
+
height: 100%;
|
|
1348
|
+
background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
|
|
1349
|
+
transition: width 0.3s ease;
|
|
1350
|
+
}
|
|
1351
|
+
.chart-value {
|
|
1352
|
+
width: 40px;
|
|
1353
|
+
text-align: right;
|
|
1354
|
+
font-weight: bold;
|
|
1355
|
+
font-size: 14px;
|
|
1356
|
+
}
|
|
1357
|
+
</style>
|
|
1358
|
+
</head>
|
|
1359
|
+
<body>
|
|
1360
|
+
<div class="container">
|
|
1361
|
+
<header>
|
|
1362
|
+
<h1>\u{1F4CA} JSON-LD Audit Report</h1>
|
|
1363
|
+
<div class="meta">
|
|
1364
|
+
<strong>Project:</strong> ${project.root}<br>
|
|
1365
|
+
<strong>Framework:</strong> ${project.framework || "Unknown"}<br>
|
|
1366
|
+
<strong>Date:</strong> ${new Date(project.scannedAt).toLocaleString()}
|
|
1367
|
+
</div>
|
|
1368
|
+
</header>
|
|
1369
|
+
|
|
1370
|
+
<div class="score-section">
|
|
1371
|
+
<div class="score-card">
|
|
1372
|
+
<div class="score-circle ${getScoreClass(score.total)}">
|
|
1373
|
+
${score.total}
|
|
1374
|
+
</div>
|
|
1375
|
+
<div class="score-breakdown">
|
|
1376
|
+
<div class="score-item">
|
|
1377
|
+
<h3>Presence</h3>
|
|
1378
|
+
<div class="value">${score.breakdown.presence}/100</div>
|
|
1379
|
+
</div>
|
|
1380
|
+
<div class="score-item">
|
|
1381
|
+
<h3>Validity</h3>
|
|
1382
|
+
<div class="value">${score.breakdown.validity}/100</div>
|
|
1383
|
+
</div>
|
|
1384
|
+
<div class="score-item">
|
|
1385
|
+
<h3>Completeness</h3>
|
|
1386
|
+
<div class="value">${score.breakdown.completeness}/100</div>
|
|
1387
|
+
</div>
|
|
1388
|
+
<div class="score-item">
|
|
1389
|
+
<h3>Optimization</h3>
|
|
1390
|
+
<div class="value">${score.breakdown.optimization}/100</div>
|
|
1391
|
+
</div>
|
|
1392
|
+
</div>
|
|
1393
|
+
</div>
|
|
1394
|
+
</div>
|
|
1395
|
+
|
|
1396
|
+
<div class="stats-grid">
|
|
1397
|
+
<div class="stat-card">
|
|
1398
|
+
<h3>Files Scanned</h3>
|
|
1399
|
+
<div class="value">${stats.totalFiles}</div>
|
|
1400
|
+
</div>
|
|
1401
|
+
<div class="stat-card">
|
|
1402
|
+
<h3>With Schemas</h3>
|
|
1403
|
+
<div class="value">${stats.filesWithSchemas}</div>
|
|
1404
|
+
</div>
|
|
1405
|
+
<div class="stat-card">
|
|
1406
|
+
<h3>Coverage</h3>
|
|
1407
|
+
<div class="value">${stats.coverage}%</div>
|
|
1408
|
+
</div>
|
|
1409
|
+
<div class="stat-card">
|
|
1410
|
+
<h3>Total Schemas</h3>
|
|
1411
|
+
<div class="value">${stats.totalSchemas}</div>
|
|
1412
|
+
</div>
|
|
1413
|
+
</div>
|
|
1414
|
+
|
|
1415
|
+
<div class="section">
|
|
1416
|
+
<h2>Schemas by Type</h2>
|
|
1417
|
+
<div class="chart">
|
|
1418
|
+
${Object.entries(schemasByType).map(
|
|
1419
|
+
([type, count]) => `
|
|
1420
|
+
<div class="chart-bar">
|
|
1421
|
+
<div class="chart-label">${type}</div>
|
|
1422
|
+
<div class="chart-track">
|
|
1423
|
+
<div class="chart-fill" style="width: ${count / Math.max(...Object.values(schemasByType)) * 100}%"></div>
|
|
1424
|
+
</div>
|
|
1425
|
+
<div class="chart-value">${count}</div>
|
|
1426
|
+
</div>
|
|
1427
|
+
`
|
|
1428
|
+
).join("")}
|
|
1429
|
+
</div>
|
|
1430
|
+
</div>
|
|
1431
|
+
|
|
1432
|
+
${errors.length > 0 ? `
|
|
1433
|
+
<div class="section">
|
|
1434
|
+
<h2>\u274C Errors (${errors.length})</h2>
|
|
1435
|
+
<div class="issue-list">
|
|
1436
|
+
${errors.slice(0, 20).map(
|
|
1437
|
+
(issue) => `
|
|
1438
|
+
<div class="issue issue-error">
|
|
1439
|
+
<div class="issue-header">
|
|
1440
|
+
<span class="issue-badge badge-error">Error</span>
|
|
1441
|
+
${issue.field ? `<span class="issue-field">${issue.field}</span>` : ""}
|
|
1442
|
+
</div>
|
|
1443
|
+
<div class="issue-message">${issue.message}</div>
|
|
1444
|
+
${issue.fix ? `<div class="issue-fix">\u{1F4A1} ${issue.fix}</div>` : ""}
|
|
1445
|
+
</div>
|
|
1446
|
+
`
|
|
1447
|
+
).join("")}
|
|
1448
|
+
${errors.length > 20 ? `<div class="file-item">... and ${errors.length - 20} more errors</div>` : ""}
|
|
1449
|
+
</div>
|
|
1450
|
+
</div>
|
|
1451
|
+
` : ""}
|
|
1452
|
+
|
|
1453
|
+
${warnings.length > 0 ? `
|
|
1454
|
+
<div class="section">
|
|
1455
|
+
<h2>\u26A0\uFE0F Warnings (${warnings.length})</h2>
|
|
1456
|
+
<div class="issue-list">
|
|
1457
|
+
${warnings.slice(0, 20).map(
|
|
1458
|
+
(issue) => `
|
|
1459
|
+
<div class="issue issue-warning">
|
|
1460
|
+
<div class="issue-header">
|
|
1461
|
+
<span class="issue-badge badge-warning">Warning</span>
|
|
1462
|
+
${issue.field ? `<span class="issue-field">${issue.field}</span>` : ""}
|
|
1463
|
+
</div>
|
|
1464
|
+
<div class="issue-message">${issue.message}</div>
|
|
1465
|
+
${issue.fix ? `<div class="issue-fix">\u{1F4A1} ${issue.fix}</div>` : ""}
|
|
1466
|
+
</div>
|
|
1467
|
+
`
|
|
1468
|
+
).join("")}
|
|
1469
|
+
${warnings.length > 20 ? `<div class="file-item">... and ${warnings.length - 20} more warnings</div>` : ""}
|
|
1470
|
+
</div>
|
|
1471
|
+
</div>
|
|
1472
|
+
` : ""}
|
|
1473
|
+
|
|
1474
|
+
${missingSchemas.length > 0 ? `
|
|
1475
|
+
<div class="section">
|
|
1476
|
+
<h2>\u{1F4C4} Routes Without Schemas (${missingSchemas.length})</h2>
|
|
1477
|
+
<div class="file-list">
|
|
1478
|
+
${missingSchemas.slice(0, 20).map(
|
|
1479
|
+
(route) => `
|
|
1480
|
+
<div class="file-item">
|
|
1481
|
+
${route.filePath}
|
|
1482
|
+
${route.suggestedSchemaType ? `<span class="schema-type">${route.suggestedSchemaType}</span>` : ""}
|
|
1483
|
+
${route.reason ? `<br><small style="color: #6b7280;">${route.reason}</small>` : ""}
|
|
1484
|
+
</div>
|
|
1485
|
+
`
|
|
1486
|
+
).join("")}
|
|
1487
|
+
${missingSchemas.length > 20 ? `<div class="file-item">... and ${missingSchemas.length - 20} more routes</div>` : ""}
|
|
1488
|
+
</div>
|
|
1489
|
+
</div>
|
|
1490
|
+
` : ""}
|
|
1491
|
+
|
|
1492
|
+
<footer>
|
|
1493
|
+
Generated by <strong>geo-semantic-layer</strong> · <a href="https://www.npmjs.com/package/geo-semantic-layer" target="_blank">npm</a>
|
|
1494
|
+
</footer>
|
|
1495
|
+
</div>
|
|
1496
|
+
</body>
|
|
1497
|
+
</html>`;
|
|
1498
|
+
}
|
|
1499
|
+
function getScoreClass(score) {
|
|
1500
|
+
if (score >= 90) return "score-excellent";
|
|
1501
|
+
if (score >= 70) return "score-good";
|
|
1502
|
+
if (score >= 50) return "score-warning";
|
|
1503
|
+
return "score-poor";
|
|
1504
|
+
}
|
|
1505
|
+
|
|
1506
|
+
// src/optimizer/llm-profiles.ts
|
|
1507
|
+
var LLM_PROFILES = {
|
|
1508
|
+
"gpt-4": {
|
|
1509
|
+
model: "gpt-4",
|
|
1510
|
+
tokensPerChar: 0.25,
|
|
1511
|
+
// ~4 chars per token (average for English)
|
|
1512
|
+
maxContextTokens: 8192,
|
|
1513
|
+
recommendedMaxTokens: 500,
|
|
1514
|
+
supportsFunctionCalling: true
|
|
1515
|
+
},
|
|
1516
|
+
"gpt-4-turbo": {
|
|
1517
|
+
model: "gpt-4-turbo",
|
|
1518
|
+
tokensPerChar: 0.25,
|
|
1519
|
+
maxContextTokens: 128e3,
|
|
1520
|
+
recommendedMaxTokens: 1e3,
|
|
1521
|
+
supportsFunctionCalling: true
|
|
1522
|
+
},
|
|
1523
|
+
"gpt-3.5-turbo": {
|
|
1524
|
+
model: "gpt-3.5-turbo",
|
|
1525
|
+
tokensPerChar: 0.25,
|
|
1526
|
+
maxContextTokens: 16385,
|
|
1527
|
+
recommendedMaxTokens: 400,
|
|
1528
|
+
supportsFunctionCalling: true
|
|
1529
|
+
},
|
|
1530
|
+
"claude-3-opus": {
|
|
1531
|
+
model: "claude-3-opus",
|
|
1532
|
+
tokensPerChar: 0.27,
|
|
1533
|
+
// Slightly more efficient tokenization
|
|
1534
|
+
maxContextTokens: 2e5,
|
|
1535
|
+
recommendedMaxTokens: 1e3,
|
|
1536
|
+
supportsFunctionCalling: true
|
|
1537
|
+
},
|
|
1538
|
+
"claude-3-sonnet": {
|
|
1539
|
+
model: "claude-3-sonnet",
|
|
1540
|
+
tokensPerChar: 0.27,
|
|
1541
|
+
maxContextTokens: 2e5,
|
|
1542
|
+
recommendedMaxTokens: 800,
|
|
1543
|
+
supportsFunctionCalling: true
|
|
1544
|
+
},
|
|
1545
|
+
"claude-3-haiku": {
|
|
1546
|
+
model: "claude-3-haiku",
|
|
1547
|
+
tokensPerChar: 0.27,
|
|
1548
|
+
maxContextTokens: 2e5,
|
|
1549
|
+
recommendedMaxTokens: 600,
|
|
1550
|
+
supportsFunctionCalling: true
|
|
1551
|
+
},
|
|
1552
|
+
"claude-2": {
|
|
1553
|
+
model: "claude-2",
|
|
1554
|
+
tokensPerChar: 0.27,
|
|
1555
|
+
maxContextTokens: 1e5,
|
|
1556
|
+
recommendedMaxTokens: 600,
|
|
1557
|
+
supportsFunctionCalling: false
|
|
1558
|
+
},
|
|
1559
|
+
"gemini-pro": {
|
|
1560
|
+
model: "gemini-pro",
|
|
1561
|
+
tokensPerChar: 0.26,
|
|
1562
|
+
maxContextTokens: 32760,
|
|
1563
|
+
recommendedMaxTokens: 600,
|
|
1564
|
+
supportsFunctionCalling: true
|
|
1565
|
+
},
|
|
1566
|
+
"gemini-ultra": {
|
|
1567
|
+
model: "gemini-ultra",
|
|
1568
|
+
tokensPerChar: 0.26,
|
|
1569
|
+
maxContextTokens: 32760,
|
|
1570
|
+
recommendedMaxTokens: 800,
|
|
1571
|
+
supportsFunctionCalling: true
|
|
1572
|
+
}
|
|
1573
|
+
};
|
|
1574
|
+
function getModelProfile(model) {
|
|
1575
|
+
return LLM_PROFILES[model];
|
|
1576
|
+
}
|
|
1577
|
+
|
|
1578
|
+
// src/optimizer/token-analyzer.ts
|
|
1579
|
+
function analyzeTokenUsage(schema, model) {
|
|
1580
|
+
const serialized = JSON.stringify(schema, null, 2);
|
|
1581
|
+
const totalTokens = estimateTokens(serialized, model);
|
|
1582
|
+
const byField = calculateTokensByField(schema, model);
|
|
1583
|
+
const recommendations = generateRecommendations(totalTokens, byField, model);
|
|
1584
|
+
return {
|
|
1585
|
+
totalTokens,
|
|
1586
|
+
byField,
|
|
1587
|
+
recommendations,
|
|
1588
|
+
model
|
|
1589
|
+
};
|
|
1590
|
+
}
|
|
1591
|
+
function estimateTokens(text, model) {
|
|
1592
|
+
if (model.startsWith("gpt-")) {
|
|
1593
|
+
try {
|
|
1594
|
+
return encode(text).length;
|
|
1595
|
+
} catch (e) {
|
|
1596
|
+
return approximateTokens(text, model);
|
|
1597
|
+
}
|
|
1598
|
+
}
|
|
1599
|
+
return approximateTokens(text, model);
|
|
1600
|
+
}
|
|
1601
|
+
function approximateTokens(text, model) {
|
|
1602
|
+
const profile = getModelProfile(model);
|
|
1603
|
+
return Math.ceil(text.length * profile.tokensPerChar);
|
|
1604
|
+
}
|
|
1605
|
+
function calculateTokensByField(schema, model, path2 = "") {
|
|
1606
|
+
const result = {};
|
|
1607
|
+
if (schema === null || schema === void 0) {
|
|
1608
|
+
return result;
|
|
1609
|
+
}
|
|
1610
|
+
if (typeof schema !== "object") {
|
|
1611
|
+
const serialized = JSON.stringify(schema);
|
|
1612
|
+
const tokens = estimateTokens(serialized, model);
|
|
1613
|
+
result[path2 || "value"] = tokens;
|
|
1614
|
+
return result;
|
|
1615
|
+
}
|
|
1616
|
+
if (Array.isArray(schema)) {
|
|
1617
|
+
schema.forEach((item, index) => {
|
|
1618
|
+
const itemPath = path2 ? `${path2}[${index}]` : `[${index}]`;
|
|
1619
|
+
const itemTokens = calculateTokensByField(item, model, itemPath);
|
|
1620
|
+
Object.assign(result, itemTokens);
|
|
1621
|
+
});
|
|
1622
|
+
return result;
|
|
1623
|
+
}
|
|
1624
|
+
for (const [key, value] of Object.entries(schema)) {
|
|
1625
|
+
const fieldPath = path2 ? `${path2}.${key}` : key;
|
|
1626
|
+
const keyTokens = estimateTokens(`"${key}":`, model);
|
|
1627
|
+
result[`${fieldPath}#key`] = keyTokens;
|
|
1628
|
+
const valueTokens = calculateTokensByField(value, model, fieldPath);
|
|
1629
|
+
Object.assign(result, valueTokens);
|
|
1630
|
+
}
|
|
1631
|
+
return result;
|
|
1632
|
+
}
|
|
1633
|
+
function generateRecommendations(totalTokens, byField, model) {
|
|
1634
|
+
const recommendations = [];
|
|
1635
|
+
const profile = getModelProfile(model);
|
|
1636
|
+
if (totalTokens > profile.recommendedMaxTokens) {
|
|
1637
|
+
const excess = totalTokens - profile.recommendedMaxTokens;
|
|
1638
|
+
const reduction = (excess / totalTokens * 100).toFixed(1);
|
|
1639
|
+
recommendations.push(
|
|
1640
|
+
`Schema uses ${totalTokens} tokens, ${excess} over recommended ${profile.recommendedMaxTokens} for ${model}. Consider reducing by ${reduction}%.`
|
|
1641
|
+
);
|
|
1642
|
+
}
|
|
1643
|
+
const sortedFields = Object.entries(byField).filter(([key]) => !key.endsWith("#key")).sort(([, a], [, b]) => b - a).slice(0, 5);
|
|
1644
|
+
const topField = sortedFields[0];
|
|
1645
|
+
if (topField && topField[1] > totalTokens * 0.2) {
|
|
1646
|
+
recommendations.push(
|
|
1647
|
+
`Most expensive field: "${topField[0]}" (${topField[1]} tokens, ${(topField[1] / totalTokens * 100).toFixed(1)}% of total)`
|
|
1648
|
+
);
|
|
1649
|
+
}
|
|
1650
|
+
for (const [field, tokens] of Object.entries(byField)) {
|
|
1651
|
+
if (!field.endsWith("#key") && tokens > 100) {
|
|
1652
|
+
recommendations.push(`Consider shortening field "${field}" (${tokens} tokens)`);
|
|
1653
|
+
}
|
|
1654
|
+
}
|
|
1655
|
+
const arrayFields = Object.keys(byField).filter((key) => key.includes("[") && !key.endsWith("#key"));
|
|
1656
|
+
if (arrayFields.length > 10) {
|
|
1657
|
+
recommendations.push(`Schema contains ${arrayFields.length} array items. Consider limiting array sizes.`);
|
|
1658
|
+
}
|
|
1659
|
+
return recommendations;
|
|
1660
|
+
}
|
|
1661
|
+
|
|
1662
|
+
// src/optimizer/semantic-scorer.ts
|
|
1663
|
+
var DEFAULT_SEMANTIC_WEIGHTS = {
|
|
1664
|
+
coreIdentity: 1,
|
|
1665
|
+
seoCritical: 0.85,
|
|
1666
|
+
authorship: 0.75,
|
|
1667
|
+
relationships: 0.6,
|
|
1668
|
+
decorative: 0.2
|
|
1669
|
+
};
|
|
1670
|
+
var FIELD_IMPORTANCE = {
|
|
1671
|
+
// Core Identity (CRITICAL - never remove)
|
|
1672
|
+
"@context": 1,
|
|
1673
|
+
"@type": 1,
|
|
1674
|
+
name: 1,
|
|
1675
|
+
url: 0.95,
|
|
1676
|
+
// SEO Critical
|
|
1677
|
+
headline: 0.9,
|
|
1678
|
+
description: 0.85,
|
|
1679
|
+
title: 0.85,
|
|
1680
|
+
image: 0.8,
|
|
1681
|
+
// Authorship & Provenance
|
|
1682
|
+
author: 0.8,
|
|
1683
|
+
datePublished: 0.75,
|
|
1684
|
+
publisher: 0.75,
|
|
1685
|
+
dateModified: 0.7,
|
|
1686
|
+
// Content
|
|
1687
|
+
articleBody: 0.7,
|
|
1688
|
+
text: 0.7,
|
|
1689
|
+
about: 0.65,
|
|
1690
|
+
// Products
|
|
1691
|
+
price: 0.85,
|
|
1692
|
+
priceCurrency: 0.8,
|
|
1693
|
+
offers: 0.85,
|
|
1694
|
+
availability: 0.75,
|
|
1695
|
+
brand: 0.7,
|
|
1696
|
+
sku: 0.6,
|
|
1697
|
+
gtin: 0.55,
|
|
1698
|
+
// Relationships (good for context)
|
|
1699
|
+
sameAs: 0.6,
|
|
1700
|
+
isPartOf: 0.55,
|
|
1701
|
+
mainEntity: 0.6,
|
|
1702
|
+
mainEntityOfPage: 0.55,
|
|
1703
|
+
// Navigation
|
|
1704
|
+
breadcrumb: 0.65,
|
|
1705
|
+
itemListElement: 0.6,
|
|
1706
|
+
position: 0.5,
|
|
1707
|
+
// Medium importance
|
|
1708
|
+
keywords: 0.5,
|
|
1709
|
+
inLanguage: 0.5,
|
|
1710
|
+
wordCount: 0.45,
|
|
1711
|
+
commentCount: 0.4,
|
|
1712
|
+
// Low importance (decorative/verbose)
|
|
1713
|
+
alternateName: 0.3,
|
|
1714
|
+
disambiguatingDescription: 0.25,
|
|
1715
|
+
identifier: 0.2,
|
|
1716
|
+
potentialAction: 0.15,
|
|
1717
|
+
// Very low importance
|
|
1718
|
+
interactionStatistic: 0.1,
|
|
1719
|
+
subjectOf: 0.1
|
|
1720
|
+
};
|
|
1721
|
+
function scoreFields(schema, tokensByField, weights = DEFAULT_SEMANTIC_WEIGHTS) {
|
|
1722
|
+
const scores = {};
|
|
1723
|
+
function traverse(obj, path2 = "") {
|
|
1724
|
+
if (obj === null || obj === void 0 || typeof obj !== "object") {
|
|
1725
|
+
return;
|
|
1726
|
+
}
|
|
1727
|
+
if (Array.isArray(obj)) {
|
|
1728
|
+
obj.forEach((item, index) => {
|
|
1729
|
+
traverse(item, path2 ? `${path2}[${index}]` : `[${index}]`);
|
|
1730
|
+
});
|
|
1731
|
+
return;
|
|
1732
|
+
}
|
|
1733
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
1734
|
+
const fieldPath = path2 ? `${path2}.${key}` : key;
|
|
1735
|
+
const tokenCost = tokensByField[fieldPath] || 0;
|
|
1736
|
+
const semanticValue = calculateSemanticValue(key, value);
|
|
1737
|
+
const efficiency = tokenCost > 0 ? semanticValue / tokenCost : 0;
|
|
1738
|
+
const { recommendation, reason } = determineRecommendation(
|
|
1739
|
+
semanticValue,
|
|
1740
|
+
efficiency,
|
|
1741
|
+
tokenCost
|
|
1742
|
+
);
|
|
1743
|
+
scores[fieldPath] = {
|
|
1744
|
+
semanticValue,
|
|
1745
|
+
tokenCost,
|
|
1746
|
+
efficiency,
|
|
1747
|
+
recommendation,
|
|
1748
|
+
...reason && { reason }
|
|
1749
|
+
};
|
|
1750
|
+
if (typeof value === "object" && value !== null) {
|
|
1751
|
+
traverse(value, fieldPath);
|
|
1752
|
+
}
|
|
1753
|
+
}
|
|
1754
|
+
}
|
|
1755
|
+
traverse(schema);
|
|
1756
|
+
return scores;
|
|
1757
|
+
}
|
|
1758
|
+
function calculateSemanticValue(fieldName, value, _weights) {
|
|
1759
|
+
const baseImportance = FIELD_IMPORTANCE[fieldName] || 0.5;
|
|
1760
|
+
let adjustedValue = baseImportance;
|
|
1761
|
+
if (typeof value === "string" && value.length > 500) {
|
|
1762
|
+
adjustedValue *= 0.7;
|
|
1763
|
+
}
|
|
1764
|
+
if (Array.isArray(value) && value.length > 10) {
|
|
1765
|
+
adjustedValue *= 0.8;
|
|
1766
|
+
}
|
|
1767
|
+
if (typeof value === "string" && value.length > 10 && value.length < 200) {
|
|
1768
|
+
adjustedValue *= 1.1;
|
|
1769
|
+
}
|
|
1770
|
+
return Math.min(adjustedValue, 1);
|
|
1771
|
+
}
|
|
1772
|
+
function determineRecommendation(semanticValue, efficiency, tokenCost) {
|
|
1773
|
+
if (semanticValue >= 0.9) {
|
|
1774
|
+
return { recommendation: "keep", reason: "Critical for schema validity" };
|
|
1775
|
+
}
|
|
1776
|
+
if (semanticValue < 0.3) {
|
|
1777
|
+
return { recommendation: "remove", reason: "Low semantic value" };
|
|
1778
|
+
}
|
|
1779
|
+
if (efficiency < 0.01 && tokenCost > 50) {
|
|
1780
|
+
return { recommendation: "remove", reason: "Inefficient (high cost, low value)" };
|
|
1781
|
+
}
|
|
1782
|
+
if (efficiency > 0.05) {
|
|
1783
|
+
return { recommendation: "keep", reason: "High efficiency" };
|
|
1784
|
+
}
|
|
1785
|
+
return { recommendation: "optional", reason: "Medium value/cost ratio" };
|
|
1786
|
+
}
|
|
1787
|
+
|
|
1788
|
+
// src/auditor/optimizer-suggestions.ts
|
|
1789
|
+
async function generateOptimizationSuggestions(scanResult, targetModel = "gpt-4") {
|
|
1790
|
+
const suggestions = [];
|
|
1791
|
+
for (const file of scanResult.filesWithSchemas) {
|
|
1792
|
+
for (const schema of file.schemas) {
|
|
1793
|
+
if (!schema.schema) continue;
|
|
1794
|
+
const schemaSuggestions = await analyzeSchemaOptimization(
|
|
1795
|
+
file.filePath,
|
|
1796
|
+
schema.type,
|
|
1797
|
+
schema.schema,
|
|
1798
|
+
targetModel
|
|
1799
|
+
);
|
|
1800
|
+
suggestions.push(...schemaSuggestions);
|
|
1801
|
+
}
|
|
1802
|
+
}
|
|
1803
|
+
suggestions.sort((a, b) => {
|
|
1804
|
+
const reductionA = a.impact.reduction || 0;
|
|
1805
|
+
const reductionB = b.impact.reduction || 0;
|
|
1806
|
+
return reductionB - reductionA;
|
|
1807
|
+
});
|
|
1808
|
+
return suggestions;
|
|
1809
|
+
}
|
|
1810
|
+
async function analyzeSchemaOptimization(filePath, schemaType, schemaData, targetModel) {
|
|
1811
|
+
const suggestions = [];
|
|
1812
|
+
try {
|
|
1813
|
+
const tokenAnalysis = analyzeTokenUsage(schemaData, targetModel);
|
|
1814
|
+
const totalTokens = tokenAnalysis.totalTokens;
|
|
1815
|
+
if (totalTokens > 800) {
|
|
1816
|
+
suggestions.push({
|
|
1817
|
+
filePath,
|
|
1818
|
+
schemaType,
|
|
1819
|
+
type: "token-reduction",
|
|
1820
|
+
message: `Schema is large (${totalTokens} tokens). Consider optimizing for LLM consumption.`,
|
|
1821
|
+
impact: {
|
|
1822
|
+
tokensBefore: totalTokens,
|
|
1823
|
+
tokensAfter: Math.round(totalTokens * 0.6),
|
|
1824
|
+
// Estimación conservadora
|
|
1825
|
+
reduction: 40
|
|
1826
|
+
}
|
|
1827
|
+
});
|
|
1828
|
+
}
|
|
1829
|
+
const fieldScores = scoreFields(schemaData, tokenAnalysis.byField);
|
|
1830
|
+
const lowValueFields = [];
|
|
1831
|
+
for (const [field, score] of Object.entries(fieldScores)) {
|
|
1832
|
+
if (score.semanticValue < 0.4 && score.tokenCost > 20) {
|
|
1833
|
+
lowValueFields.push(field);
|
|
1834
|
+
}
|
|
1835
|
+
}
|
|
1836
|
+
if (lowValueFields.length > 0) {
|
|
1837
|
+
const tokenSavings = lowValueFields.reduce(
|
|
1838
|
+
(sum, field) => {
|
|
1839
|
+
var _a;
|
|
1840
|
+
return sum + (((_a = fieldScores[field]) == null ? void 0 : _a.tokenCost) || 0);
|
|
1841
|
+
},
|
|
1842
|
+
0
|
|
1843
|
+
);
|
|
1844
|
+
suggestions.push({
|
|
1845
|
+
filePath,
|
|
1846
|
+
schemaType,
|
|
1847
|
+
type: "field-removal",
|
|
1848
|
+
message: `${lowValueFields.length} field(s) have low semantic value but high token cost.`,
|
|
1849
|
+
impact: {
|
|
1850
|
+
tokensBefore: totalTokens,
|
|
1851
|
+
tokensAfter: totalTokens - tokenSavings,
|
|
1852
|
+
reduction: Math.round(tokenSavings / totalTokens * 100)
|
|
1853
|
+
},
|
|
1854
|
+
fields: lowValueFields
|
|
1855
|
+
});
|
|
1856
|
+
}
|
|
1857
|
+
const missingHighValueFields = detectMissingHighValueFields(schemaType, schemaData);
|
|
1858
|
+
if (missingHighValueFields.length > 0) {
|
|
1859
|
+
suggestions.push({
|
|
1860
|
+
filePath,
|
|
1861
|
+
schemaType,
|
|
1862
|
+
type: "field-addition",
|
|
1863
|
+
message: `Consider adding high-value fields: ${missingHighValueFields.join(", ")}`,
|
|
1864
|
+
impact: {
|
|
1865
|
+
tokensBefore: totalTokens,
|
|
1866
|
+
tokensAfter: totalTokens + 50
|
|
1867
|
+
// Estimación
|
|
1868
|
+
},
|
|
1869
|
+
fields: missingHighValueFields
|
|
1870
|
+
});
|
|
1871
|
+
}
|
|
1872
|
+
const redundancySuggestions = detectRedundancies(schemaData, filePath, schemaType);
|
|
1873
|
+
suggestions.push(...redundancySuggestions);
|
|
1874
|
+
} catch (error) {
|
|
1875
|
+
console.warn(`Failed to analyze schema in ${filePath}:`, error);
|
|
1876
|
+
}
|
|
1877
|
+
return suggestions;
|
|
1878
|
+
}
|
|
1879
|
+
function detectMissingHighValueFields(schemaType, data) {
|
|
1880
|
+
const missing = [];
|
|
1881
|
+
const highValueFields = {
|
|
1882
|
+
Article: ["image", "dateModified", "publisher", "description", "wordCount"],
|
|
1883
|
+
Product: ["image", "brand", "aggregateRating", "review"],
|
|
1884
|
+
Organization: ["url", "logo", "sameAs", "description"],
|
|
1885
|
+
Person: ["image", "url", "sameAs", "description"],
|
|
1886
|
+
LocalBusiness: ["telephone", "address", "geo", "openingHours"],
|
|
1887
|
+
Event: ["image", "organizer", "eventStatus", "eventAttendanceMode"]
|
|
1888
|
+
};
|
|
1889
|
+
const recommendedFields = highValueFields[schemaType] || [];
|
|
1890
|
+
for (const field of recommendedFields) {
|
|
1891
|
+
if (!hasField2(data, field)) {
|
|
1892
|
+
missing.push(field);
|
|
1893
|
+
}
|
|
1894
|
+
}
|
|
1895
|
+
return missing;
|
|
1896
|
+
}
|
|
1897
|
+
function detectRedundancies(data, filePath, schemaType) {
|
|
1898
|
+
const suggestions = [];
|
|
1899
|
+
if (data.publisher) {
|
|
1900
|
+
if (data.publisher["@id"] && data.publisher.url) {
|
|
1901
|
+
if (data.publisher["@id"] === data.publisher.url) {
|
|
1902
|
+
suggestions.push({
|
|
1903
|
+
filePath,
|
|
1904
|
+
schemaType,
|
|
1905
|
+
type: "field-removal",
|
|
1906
|
+
message: "Redundant @id and url in publisher (same value)",
|
|
1907
|
+
impact: {
|
|
1908
|
+
reduction: 5
|
|
1909
|
+
},
|
|
1910
|
+
fields: ["publisher.@id"]
|
|
1911
|
+
});
|
|
1912
|
+
}
|
|
1913
|
+
}
|
|
1914
|
+
}
|
|
1915
|
+
const maxDepth = getMaxDepth(data);
|
|
1916
|
+
if (maxDepth > 4) {
|
|
1917
|
+
suggestions.push({
|
|
1918
|
+
filePath,
|
|
1919
|
+
schemaType,
|
|
1920
|
+
type: "field-update",
|
|
1921
|
+
message: `Schema has deep nesting (${maxDepth} levels). Consider flattening.`,
|
|
1922
|
+
impact: {
|
|
1923
|
+
reduction: 10
|
|
1924
|
+
}
|
|
1925
|
+
});
|
|
1926
|
+
}
|
|
1927
|
+
const largeArrays = findLargeArrays(data);
|
|
1928
|
+
if (largeArrays.length > 0) {
|
|
1929
|
+
suggestions.push({
|
|
1930
|
+
filePath,
|
|
1931
|
+
schemaType,
|
|
1932
|
+
type: "field-update",
|
|
1933
|
+
message: `Large arrays found: ${largeArrays.join(", ")}. Consider limiting.`,
|
|
1934
|
+
impact: {
|
|
1935
|
+
reduction: 15
|
|
1936
|
+
},
|
|
1937
|
+
fields: largeArrays
|
|
1938
|
+
});
|
|
1939
|
+
}
|
|
1940
|
+
return suggestions;
|
|
1941
|
+
}
|
|
1942
|
+
function hasField2(obj, fieldPath) {
|
|
1943
|
+
const parts = fieldPath.split(".");
|
|
1944
|
+
let current = obj;
|
|
1945
|
+
for (const part of parts) {
|
|
1946
|
+
if (!current || typeof current !== "object" || !(part in current)) {
|
|
1947
|
+
return false;
|
|
1948
|
+
}
|
|
1949
|
+
current = current[part];
|
|
1950
|
+
}
|
|
1951
|
+
return current !== void 0 && current !== null;
|
|
1952
|
+
}
|
|
1953
|
+
function getMaxDepth(obj, currentDepth = 0) {
|
|
1954
|
+
if (typeof obj !== "object" || obj === null) {
|
|
1955
|
+
return currentDepth;
|
|
1956
|
+
}
|
|
1957
|
+
let maxDepth = currentDepth;
|
|
1958
|
+
for (const value of Object.values(obj)) {
|
|
1959
|
+
if (typeof value === "object" && value !== null && !Array.isArray(value)) {
|
|
1960
|
+
const depth = getMaxDepth(value, currentDepth + 1);
|
|
1961
|
+
maxDepth = Math.max(maxDepth, depth);
|
|
1962
|
+
}
|
|
1963
|
+
}
|
|
1964
|
+
return maxDepth;
|
|
1965
|
+
}
|
|
1966
|
+
function findLargeArrays(obj, path2 = "") {
|
|
1967
|
+
const largeArrays = [];
|
|
1968
|
+
if (Array.isArray(obj) && obj.length > 10) {
|
|
1969
|
+
largeArrays.push(path2 || "root");
|
|
1970
|
+
}
|
|
1971
|
+
if (typeof obj === "object" && obj !== null) {
|
|
1972
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
1973
|
+
const newPath = path2 ? `${path2}.${key}` : key;
|
|
1974
|
+
largeArrays.push(...findLargeArrays(value, newPath));
|
|
1975
|
+
}
|
|
1976
|
+
}
|
|
1977
|
+
return largeArrays;
|
|
1978
|
+
}
|
|
1979
|
+
function getOptimizationSummary(suggestions) {
|
|
1980
|
+
const totalReduction = suggestions.reduce(
|
|
1981
|
+
(sum, s) => sum + (s.impact.reduction || 0),
|
|
1982
|
+
0
|
|
1983
|
+
);
|
|
1984
|
+
const byType = {};
|
|
1985
|
+
for (const suggestion of suggestions) {
|
|
1986
|
+
byType[suggestion.type] = (byType[suggestion.type] || 0) + 1;
|
|
1987
|
+
}
|
|
1988
|
+
const topOpportunities = suggestions.slice(0, 5).sort((a, b) => (b.impact.reduction || 0) - (a.impact.reduction || 0));
|
|
1989
|
+
return {
|
|
1990
|
+
totalReduction: Math.round(totalReduction / Math.max(1, suggestions.length)),
|
|
1991
|
+
byType,
|
|
1992
|
+
topOpportunities
|
|
1993
|
+
};
|
|
1994
|
+
}
|
|
1995
|
+
|
|
1996
|
+
// src/auditor/index.ts
|
|
1997
|
+
async function auditProject(options = {}) {
|
|
1998
|
+
console.log("\u{1F50D} Scanning project...");
|
|
1999
|
+
const scanResult = await scanProject({
|
|
2000
|
+
...options.rootDir && { rootDir: options.rootDir },
|
|
2001
|
+
...options.include && { include: options.include },
|
|
2002
|
+
...options.exclude && { exclude: options.exclude },
|
|
2003
|
+
...options.maxDepth && { maxDepth: options.maxDepth },
|
|
2004
|
+
...options.deepAnalysis !== void 0 && { deepAnalysis: options.deepAnalysis }
|
|
2005
|
+
});
|
|
2006
|
+
console.log(
|
|
2007
|
+
`\u2705 Found ${scanResult.filesWithSchemas.length} files with schemas`
|
|
2008
|
+
);
|
|
2009
|
+
console.log("\u{1F52C} Validating schemas...");
|
|
2010
|
+
const validationResult = await validateSchemas(scanResult, {
|
|
2011
|
+
...options.strict !== void 0 && { strict: options.strict },
|
|
2012
|
+
...options.schemaTypes && { schemaTypes: options.schemaTypes },
|
|
2013
|
+
...options.ignore && { ignore: options.ignore }
|
|
2014
|
+
});
|
|
2015
|
+
if (validationResult.errors > 0) {
|
|
2016
|
+
console.log(`\u274C Found ${validationResult.errors} errors`);
|
|
2017
|
+
}
|
|
2018
|
+
if (validationResult.warnings > 0) {
|
|
2019
|
+
console.log(`\u26A0\uFE0F Found ${validationResult.warnings} warnings`);
|
|
2020
|
+
}
|
|
2021
|
+
let optimizations = [];
|
|
2022
|
+
if (options.includeOptimizations !== false) {
|
|
2023
|
+
console.log("\u{1F4A1} Analyzing optimizations...");
|
|
2024
|
+
optimizations = await generateOptimizationSuggestions(
|
|
2025
|
+
scanResult,
|
|
2026
|
+
options.targetModel || "gpt-4"
|
|
2027
|
+
);
|
|
2028
|
+
if (optimizations.length > 0) {
|
|
2029
|
+
console.log(`\u{1F4A1} Found ${optimizations.length} optimization opportunities`);
|
|
2030
|
+
}
|
|
2031
|
+
}
|
|
2032
|
+
console.log("\u{1F4CA} Generating report...");
|
|
2033
|
+
const report = generateCoverageReport(
|
|
2034
|
+
scanResult,
|
|
2035
|
+
validationResult,
|
|
2036
|
+
optimizations
|
|
2037
|
+
);
|
|
2038
|
+
if (options.output) {
|
|
2039
|
+
await saveReport(report, {
|
|
2040
|
+
...options.format && { format: options.format },
|
|
2041
|
+
output: options.output
|
|
2042
|
+
});
|
|
2043
|
+
console.log(`\u2705 Report saved to: ${options.output}`);
|
|
2044
|
+
}
|
|
2045
|
+
return report;
|
|
2046
|
+
}
|
|
2047
|
+
async function quickValidate(options = {}) {
|
|
2048
|
+
const scanResult = await scanProject({
|
|
2049
|
+
...options.rootDir && { rootDir: options.rootDir },
|
|
2050
|
+
...options.include && { include: options.include },
|
|
2051
|
+
...options.exclude && { exclude: options.exclude }
|
|
2052
|
+
});
|
|
2053
|
+
const validationResult = await validateSchemas(scanResult, {
|
|
2054
|
+
...options.strict !== void 0 && { strict: options.strict },
|
|
2055
|
+
...options.schemaTypes && { schemaTypes: options.schemaTypes },
|
|
2056
|
+
...options.ignore && { ignore: options.ignore }
|
|
2057
|
+
});
|
|
2058
|
+
return validationResult.valid;
|
|
2059
|
+
}
|
|
2060
|
+
async function generateReportOnly(scanResult, validationResult, options = {}) {
|
|
2061
|
+
const report = generateCoverageReport(scanResult, validationResult);
|
|
2062
|
+
await saveReport(report, options);
|
|
2063
|
+
}
|
|
2064
|
+
|
|
2065
|
+
export { auditProject, generateCoverageReport, generateOptimizationSuggestions, generateReportOnly, getOptimizationSummary, getProjectStats, quickValidate, saveReport, scanProject, validateSchemas, validateSingleSchema };
|
|
2066
|
+
//# sourceMappingURL=auditor.js.map
|
|
2067
|
+
//# sourceMappingURL=auditor.js.map
|