geo-semantic-layer 3.0.0-alpha.1 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2067 @@
1
+ import * as fs from 'fs/promises';
2
+ import * as path from 'path';
3
+ import { z } from 'zod';
4
+ import { encode } from 'gpt-tokenizer';
5
+
6
+ // src/auditor/project-scanner.ts
7
+ var DEFAULT_INCLUDE_PATTERNS = [
8
+ "**/*.tsx",
9
+ "**/*.jsx",
10
+ "**/*.vue",
11
+ "**/*.astro",
12
+ "**/*.js",
13
+ "**/*.ts"
14
+ ];
15
+ var DEFAULT_EXCLUDE_PATTERNS = [
16
+ "node_modules/**",
17
+ "dist/**",
18
+ "build/**",
19
+ ".next/**",
20
+ ".nuxt/**",
21
+ ".astro/**",
22
+ "coverage/**",
23
+ "**/*.test.{ts,tsx,js,jsx}",
24
+ "**/*.spec.{ts,tsx,js,jsx}"
25
+ ];
26
+ var GENERATOR_PATTERNS = [
27
+ /generate(Article|Product|Organization|LocalBusiness|Person|Event|WebPage|FAQ|Breadcrumb|OpenGraph)Schema/g,
28
+ /new\s+(Article|Product|Organization|LocalBusiness|Person|Event|WebPage|FAQ|Breadcrumb)Schema/g
29
+ ];
30
+ async function scanProject(options = {}) {
31
+ const rootDir = options.rootDir || process.cwd();
32
+ const include = options.include || DEFAULT_INCLUDE_PATTERNS;
33
+ const exclude = options.exclude || DEFAULT_EXCLUDE_PATTERNS;
34
+ const framework = await detectFramework(rootDir);
35
+ const allFiles = await getAllFiles(rootDir, {
36
+ include,
37
+ exclude,
38
+ ...options.maxDepth && { maxDepth: options.maxDepth }
39
+ });
40
+ const filesWithSchemas = [];
41
+ const routesWithoutSchemas = [];
42
+ for (const filePath of allFiles) {
43
+ const content = await fs.readFile(filePath, "utf-8");
44
+ const relativePath = path.relative(rootDir, filePath);
45
+ const schemas = await detectSchemasInFile(content);
46
+ if (schemas.length > 0) {
47
+ filesWithSchemas.push({
48
+ filePath: relativePath,
49
+ fileType: path.extname(filePath).slice(1),
50
+ schemas,
51
+ linesOfCode: content.split("\n").length
52
+ });
53
+ } else if (isRouteFile(filePath, framework)) {
54
+ const routeInfo = analyzeRouteFile(content, filePath, framework);
55
+ if (routeInfo) {
56
+ routesWithoutSchemas.push({
57
+ filePath: relativePath,
58
+ ...routeInfo
59
+ });
60
+ }
61
+ }
62
+ }
63
+ return {
64
+ projectRoot: rootDir,
65
+ framework,
66
+ filesScanned: allFiles.length,
67
+ filesWithSchemas,
68
+ routesWithoutSchemas,
69
+ scannedAt: (/* @__PURE__ */ new Date()).toISOString()
70
+ };
71
+ }
72
+ async function detectFramework(rootDir) {
73
+ const configFiles = {
74
+ next: ["next.config.js", "next.config.mjs", "next.config.ts"],
75
+ nuxt: ["nuxt.config.js", "nuxt.config.ts"],
76
+ astro: ["astro.config.mjs", "astro.config.ts"],
77
+ vite: ["vite.config.js", "vite.config.ts"]
78
+ };
79
+ for (const [framework, files] of Object.entries(configFiles)) {
80
+ for (const file of files) {
81
+ try {
82
+ await fs.access(path.join(rootDir, file));
83
+ return framework;
84
+ } catch {
85
+ }
86
+ }
87
+ }
88
+ return "unknown";
89
+ }
90
+ async function getAllFiles(dir, options, currentDepth = 0) {
91
+ const files = [];
92
+ if (options.maxDepth && currentDepth > options.maxDepth) {
93
+ return files;
94
+ }
95
+ try {
96
+ const entries = await fs.readdir(dir, { withFileTypes: true });
97
+ for (const entry of entries) {
98
+ const fullPath = path.join(dir, entry.name);
99
+ const relativePath = path.relative(process.cwd(), fullPath);
100
+ if (isExcluded(relativePath, options.exclude)) {
101
+ continue;
102
+ }
103
+ if (entry.isDirectory()) {
104
+ const subFiles = await getAllFiles(fullPath, options, currentDepth + 1);
105
+ files.push(...subFiles);
106
+ } else if (entry.isFile()) {
107
+ if (matchesPatterns(relativePath, options.include)) {
108
+ files.push(fullPath);
109
+ }
110
+ }
111
+ }
112
+ } catch (error) {
113
+ console.warn(`Warning: Could not read directory ${dir}`);
114
+ }
115
+ return files;
116
+ }
117
+ function isExcluded(filePath, excludePatterns) {
118
+ return excludePatterns.some((pattern) => {
119
+ const regex = globToRegex(pattern);
120
+ return regex.test(filePath);
121
+ });
122
+ }
123
+ function matchesPatterns(filePath, includePatterns) {
124
+ return includePatterns.some((pattern) => {
125
+ const regex = globToRegex(pattern);
126
+ return regex.test(filePath);
127
+ });
128
+ }
129
+ function globToRegex(pattern) {
130
+ let regexStr = pattern.replace(/\*\*/g, "%%%DOUBLESTAR%%%").replace(/\*/g, "[^/]*").replace(/%%%DOUBLESTAR%%%/g, ".*").replace(/\./g, "\\.").replace(/\{([^}]+)\}/g, (_, group) => `(${group.split(",").join("|")})`);
131
+ return new RegExp(`^${regexStr}$`);
132
+ }
133
+ async function detectSchemasInFile(content, _filePath) {
134
+ const schemas = [];
135
+ const lines = content.split("\n");
136
+ for (const pattern of GENERATOR_PATTERNS) {
137
+ pattern.lastIndex = 0;
138
+ let match;
139
+ while ((match = pattern.exec(content)) !== null) {
140
+ const matchText = match[0];
141
+ const schemaType = match[1];
142
+ if (!schemaType) continue;
143
+ const position = match.index;
144
+ let line = 0;
145
+ let column = 0;
146
+ let currentPos = 0;
147
+ for (let i = 0; i < lines.length; i++) {
148
+ const currentLine = lines[i];
149
+ if (!currentLine) continue;
150
+ if (currentPos + currentLine.length >= position) {
151
+ line = i + 1;
152
+ column = position - currentPos;
153
+ break;
154
+ }
155
+ currentPos += currentLine.length + 1;
156
+ }
157
+ schemas.push({
158
+ type: schemaType,
159
+ generatorFunction: matchText,
160
+ line,
161
+ column,
162
+ issues: []
163
+ // Se llenarán en la validación
164
+ });
165
+ }
166
+ }
167
+ return schemas;
168
+ }
169
+ function isRouteFile(filePath, framework) {
170
+ const normalized = filePath.replace(/\\/g, "/");
171
+ switch (framework) {
172
+ case "next":
173
+ return /\/app\/.*\/page\.(tsx|jsx|ts|js)$/.test(normalized) || /\/pages\/.*\.(tsx|jsx|ts|js)$/.test(normalized);
174
+ case "nuxt":
175
+ return /\/pages\/.*\.vue$/.test(normalized);
176
+ case "astro":
177
+ return /\/src\/pages\/.*\.astro$/.test(normalized);
178
+ default:
179
+ return false;
180
+ }
181
+ }
182
+ function analyzeRouteFile(_content, filePath, framework) {
183
+ const normalized = filePath.toLowerCase().replace(/\\/g, "/");
184
+ let routePath;
185
+ if (framework === "next") {
186
+ const appMatch = normalized.match(/\/app\/(.*)\/page\.(tsx|jsx|ts|js)$/);
187
+ if (appMatch) {
188
+ routePath = `/${appMatch[1]}`;
189
+ }
190
+ const pagesMatch = normalized.match(/\/pages\/(.*)\.(?:tsx|jsx|ts|js)$/);
191
+ if (pagesMatch) {
192
+ routePath = `/${pagesMatch[1]}`;
193
+ }
194
+ }
195
+ let suggestedSchemaType;
196
+ let confidence = 0;
197
+ let reason;
198
+ if (/blog|article|post|news/.test(normalized)) {
199
+ suggestedSchemaType = "Article";
200
+ confidence = 0.8;
201
+ reason = "Path contains blog/article/post keywords";
202
+ } else if (/product|item|shop/.test(normalized)) {
203
+ suggestedSchemaType = "Product";
204
+ confidence = 0.75;
205
+ reason = "Path contains product/shop keywords";
206
+ } else if (/about|company|contact/.test(normalized)) {
207
+ suggestedSchemaType = "Organization";
208
+ confidence = 0.7;
209
+ reason = "Path suggests organization/company page";
210
+ } else if (/event|calendar/.test(normalized)) {
211
+ suggestedSchemaType = "Event";
212
+ confidence = 0.7;
213
+ reason = "Path suggests event page";
214
+ }
215
+ if (!suggestedSchemaType || confidence < 0.6) {
216
+ return null;
217
+ }
218
+ return {
219
+ ...routePath && { routePath },
220
+ suggestedSchemaType,
221
+ confidence,
222
+ ...reason && { reason }
223
+ };
224
+ }
225
+ async function getProjectStats(rootDir = process.cwd()) {
226
+ const framework = await detectFramework(rootDir);
227
+ let estimatedFiles = 0;
228
+ try {
229
+ const entries = await fs.readdir(rootDir, { withFileTypes: true });
230
+ estimatedFiles = entries.filter((e) => e.isFile()).length;
231
+ } catch {
232
+ estimatedFiles = 0;
233
+ }
234
+ let hasPackageJson = false;
235
+ try {
236
+ await fs.access(path.join(rootDir, "package.json"));
237
+ hasPackageJson = true;
238
+ } catch {
239
+ hasPackageJson = false;
240
+ }
241
+ return {
242
+ framework,
243
+ estimatedFiles,
244
+ hasPackageJson
245
+ };
246
+ }
247
+ var PostalAddressSchema = z.object({
248
+ "@type": z.literal("PostalAddress").default("PostalAddress"),
249
+ streetAddress: z.string().optional(),
250
+ addressLocality: z.string().optional(),
251
+ addressRegion: z.string().optional(),
252
+ postalCode: z.string().optional(),
253
+ addressCountry: z.string().length(2).optional()
254
+ // ISO 3166-1 alpha-2
255
+ });
256
+ var ContactPointSchema = z.object({
257
+ "@type": z.literal("ContactPoint").default("ContactPoint"),
258
+ telephone: z.string().optional(),
259
+ email: z.string().email().optional(),
260
+ contactType: z.enum(["customer service", "technical support", "sales", "billing"]).optional(),
261
+ areaServed: z.string().optional(),
262
+ availableLanguage: z.array(z.string()).optional()
263
+ });
264
+ var OrganizationSchema = z.object({
265
+ "@context": z.literal("https://schema.org").default("https://schema.org"),
266
+ "@type": z.literal("Organization").default("Organization"),
267
+ // Required fields
268
+ name: z.string().min(1, "Organization name is required"),
269
+ url: z.string().url("Must be a valid URL"),
270
+ // Recommended fields
271
+ logo: z.string().url("Logo must be a valid URL").optional(),
272
+ image: z.string().url("Image must be a valid URL").optional(),
273
+ description: z.string().min(10, "Description should be at least 10 characters").optional(),
274
+ /**
275
+ * CRITICAL for GEO: Entity disambiguation
276
+ * Should include at least one social profile or authority ID (Wikidata, Wikipedia, etc.)
277
+ */
278
+ sameAs: z.array(z.string().url()).min(1, "At least one sameAs URL is required for entity disambiguation").describe("Social profiles, Wikidata ID, Wikipedia URL for entity disambiguation"),
279
+ // Contact information
280
+ email: z.string().email().optional(),
281
+ telephone: z.string().optional(),
282
+ address: PostalAddressSchema.optional(),
283
+ contactPoint: z.union([ContactPointSchema, z.array(ContactPointSchema)]).optional(),
284
+ // Additional information
285
+ alternateName: z.string().optional(),
286
+ foundingDate: z.string().regex(/^\d{4}-\d{2}-\d{2}$/, "Must be in YYYY-MM-DD format").optional(),
287
+ founder: z.array(z.string()).optional(),
288
+ // Relationships
289
+ parentOrganization: z.string().optional(),
290
+ subOrganization: z.array(z.string()).optional()
291
+ });
292
+ var PersonSchema = z.object({
293
+ "@context": z.literal("https://schema.org").default("https://schema.org"),
294
+ "@type": z.literal("Person").default("Person"),
295
+ // Required fields
296
+ name: z.string().min(1, "Person name is required"),
297
+ url: z.string().url("Must be a valid URL").optional(),
298
+ // Recommended fields
299
+ image: z.string().url("Image must be a valid URL").optional(),
300
+ description: z.string().min(10, "Description should be at least 10 characters").optional(),
301
+ /**
302
+ * CRITICAL for GEO: Entity disambiguation
303
+ * Should include social profiles, Wikidata ID, Wikipedia URL, etc.
304
+ */
305
+ sameAs: z.array(z.string().url()).min(1, "At least one sameAs URL is required for entity disambiguation").describe("Social profiles, Wikidata ID, Wikipedia URL for entity disambiguation"),
306
+ // Contact information
307
+ email: z.string().email().optional(),
308
+ telephone: z.string().optional(),
309
+ address: PostalAddressSchema.optional(),
310
+ // Professional information
311
+ jobTitle: z.string().optional(),
312
+ worksFor: z.object({
313
+ "@type": z.literal("Organization"),
314
+ name: z.string(),
315
+ url: z.string().url().optional()
316
+ }).optional(),
317
+ // Additional information
318
+ alternateName: z.string().optional(),
319
+ birthDate: z.string().regex(/^\d{4}-\d{2}-\d{2}$/, "Must be in YYYY-MM-DD format").optional(),
320
+ nationality: z.string().optional(),
321
+ // Relationships
322
+ colleague: z.array(z.string()).optional(),
323
+ alumniOf: z.string().optional()
324
+ });
325
+ var OfferSchema = z.object({
326
+ "@type": z.literal("Offer").default("Offer"),
327
+ price: z.union([z.string(), z.number()]),
328
+ priceCurrency: z.string().length(3),
329
+ // ISO 4217 currency code
330
+ availability: z.string().url().optional().describe("e.g., https://schema.org/InStock"),
331
+ url: z.string().url().optional(),
332
+ priceValidUntil: z.string().regex(/^\d{4}-\d{2}-\d{2}$/).optional(),
333
+ seller: z.object({
334
+ "@type": z.literal("Organization"),
335
+ name: z.string()
336
+ }).optional()
337
+ });
338
+ var AggregateRatingSchema = z.object({
339
+ "@type": z.literal("AggregateRating").default("AggregateRating"),
340
+ ratingValue: z.union([z.string(), z.number()]),
341
+ reviewCount: z.number().int().positive(),
342
+ bestRating: z.union([z.string(), z.number()]).optional().default(5),
343
+ worstRating: z.union([z.string(), z.number()]).optional().default(1)
344
+ });
345
+ var ReviewSchema = z.object({
346
+ "@type": z.literal("Review").default("Review"),
347
+ author: z.object({
348
+ "@type": z.literal("Person"),
349
+ name: z.string()
350
+ }),
351
+ datePublished: z.string().regex(/^\d{4}-\d{2}-\d{2}$/),
352
+ reviewBody: z.string().min(10),
353
+ reviewRating: z.object({
354
+ "@type": z.literal("Rating"),
355
+ ratingValue: z.union([z.string(), z.number()]),
356
+ bestRating: z.union([z.string(), z.number()]).optional().default(5)
357
+ })
358
+ });
359
+ var ProductSchema = z.object({
360
+ "@context": z.literal("https://schema.org").default("https://schema.org"),
361
+ "@type": z.literal("Product").default("Product"),
362
+ // Required fields
363
+ name: z.string().min(1, "Product name is required"),
364
+ // Recommended fields
365
+ image: z.union([z.string().url(), z.array(z.string().url())]).optional(),
366
+ description: z.string().min(10, "Description should be at least 10 characters").optional(),
367
+ // SKU or other identifiers
368
+ sku: z.string().optional(),
369
+ gtin: z.string().optional(),
370
+ gtin8: z.string().optional(),
371
+ gtin12: z.string().optional(),
372
+ gtin13: z.string().optional(),
373
+ gtin14: z.string().optional(),
374
+ mpn: z.string().optional(),
375
+ // Brand
376
+ brand: z.object({
377
+ "@type": z.literal("Brand"),
378
+ name: z.string()
379
+ }).optional(),
380
+ // Offers (pricing)
381
+ offers: z.union([OfferSchema, z.array(OfferSchema)]).optional(),
382
+ // Ratings and Reviews
383
+ aggregateRating: AggregateRatingSchema.optional(),
384
+ review: z.union([ReviewSchema, z.array(ReviewSchema)]).optional(),
385
+ // Additional information
386
+ category: z.string().optional(),
387
+ color: z.union([z.string(), z.array(z.string())]).optional(),
388
+ material: z.string().optional(),
389
+ manufacturer: z.object({
390
+ "@type": z.literal("Organization"),
391
+ name: z.string()
392
+ }).optional()
393
+ });
394
+ var ArticleSchema = z.object({
395
+ "@context": z.literal("https://schema.org").default("https://schema.org"),
396
+ "@type": z.enum(["Article", "NewsArticle", "BlogPosting", "TechArticle"]).default("Article"),
397
+ // Required fields
398
+ headline: z.string().min(1).max(110, "Headline should be less than 110 characters for optimal SEO"),
399
+ image: z.union([z.string().url(), z.array(z.string().url())]),
400
+ // Recommended fields
401
+ author: z.union([
402
+ z.object({
403
+ "@type": z.enum(["Person", "Organization"]),
404
+ name: z.string(),
405
+ url: z.string().url().optional()
406
+ }),
407
+ z.array(
408
+ z.object({
409
+ "@type": z.enum(["Person", "Organization"]),
410
+ name: z.string(),
411
+ url: z.string().url().optional()
412
+ })
413
+ )
414
+ ]).describe("Author(s) of the article"),
415
+ datePublished: z.string().regex(/^\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}Z)?$/, "Must be ISO 8601 format"),
416
+ dateModified: z.string().regex(/^\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}Z)?$/, "Must be ISO 8601 format").optional(),
417
+ // Publisher (recommended for Google News)
418
+ publisher: z.object({
419
+ "@type": z.literal("Organization"),
420
+ name: z.string(),
421
+ logo: z.object({
422
+ "@type": z.literal("ImageObject"),
423
+ url: z.string().url(),
424
+ width: z.number().optional(),
425
+ height: z.number().optional()
426
+ }).optional()
427
+ }).optional(),
428
+ // Content
429
+ description: z.string().min(10).optional(),
430
+ articleBody: z.string().min(50, "Article body should be at least 50 characters").optional(),
431
+ wordCount: z.number().int().positive().optional(),
432
+ // Additional metadata
433
+ url: z.string().url().optional(),
434
+ mainEntityOfPage: z.string().url().optional(),
435
+ keywords: z.union([z.string(), z.array(z.string())]).optional(),
436
+ articleSection: z.string().optional(),
437
+ inLanguage: z.string().optional()
438
+ });
439
+ ArticleSchema.extend({
440
+ "@type": z.literal("BlogPosting").default("BlogPosting")
441
+ });
442
+ ArticleSchema.extend({
443
+ "@type": z.literal("NewsArticle").default("NewsArticle"),
444
+ dateline: z.string().optional()
445
+ });
446
+ var QuestionSchema = z.object({
447
+ "@type": z.literal("Question").default("Question"),
448
+ name: z.string().min(1, "Question text is required"),
449
+ acceptedAnswer: z.object({
450
+ "@type": z.literal("Answer").default("Answer"),
451
+ text: z.string().min(1, "Answer text is required")
452
+ })
453
+ });
454
+ var FAQPageSchema = z.object({
455
+ "@context": z.literal("https://schema.org").default("https://schema.org"),
456
+ "@type": z.literal("FAQPage").default("FAQPage"),
457
+ mainEntity: z.array(QuestionSchema).min(1, "At least one question is required for FAQ").describe("Array of questions and answers")
458
+ });
459
+ var ListItemSchema = z.object({
460
+ "@type": z.literal("ListItem").default("ListItem"),
461
+ position: z.number().int().positive(),
462
+ name: z.string().min(1),
463
+ item: z.string().url().optional()
464
+ });
465
+ var BreadcrumbListSchema = z.object({
466
+ "@context": z.literal("https://schema.org").default("https://schema.org"),
467
+ "@type": z.literal("BreadcrumbList").default("BreadcrumbList"),
468
+ itemListElement: z.array(ListItemSchema).min(2, "Breadcrumb list must have at least 2 items").describe("Ordered list of breadcrumb items")
469
+ });
470
+ var WebPageSchema = z.object({
471
+ "@context": z.literal("https://schema.org").default("https://schema.org"),
472
+ "@type": z.literal("WebPage").default("WebPage"),
473
+ name: z.string().min(1),
474
+ description: z.string().min(10).optional(),
475
+ url: z.string().url(),
476
+ // Optional fields
477
+ inLanguage: z.string().optional(),
478
+ isPartOf: z.object({
479
+ "@type": z.literal("WebSite"),
480
+ name: z.string(),
481
+ url: z.string().url()
482
+ }).optional(),
483
+ breadcrumb: z.object({
484
+ "@type": z.literal("BreadcrumbList")
485
+ }).optional(),
486
+ datePublished: z.string().regex(/^\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}Z)?$/).optional(),
487
+ dateModified: z.string().regex(/^\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}Z)?$/).optional(),
488
+ author: z.object({
489
+ "@type": z.enum(["Person", "Organization"]),
490
+ name: z.string()
491
+ }).optional()
492
+ });
493
+ z.object({
494
+ title: z.string().min(1).max(60, "Title should be less than 60 characters for optimal display"),
495
+ description: z.string().min(1).max(160, "Description should be less than 160 characters"),
496
+ url: z.string().url(),
497
+ type: z.enum(["website", "article", "product", "book", "profile", "music", "video"]).default("website"),
498
+ image: z.string().url(),
499
+ imageAlt: z.string().optional(),
500
+ siteName: z.string().optional(),
501
+ locale: z.string().default("en_US"),
502
+ // Article-specific
503
+ publishedTime: z.string().optional(),
504
+ modifiedTime: z.string().optional(),
505
+ author: z.string().optional(),
506
+ section: z.string().optional(),
507
+ tags: z.array(z.string()).optional(),
508
+ // Product-specific
509
+ price: z.union([z.string(), z.number()]).optional(),
510
+ currency: z.string().length(3).optional()
511
+ });
512
+ z.object({
513
+ card: z.enum(["summary", "summary_large_image", "app", "player"]).default("summary_large_image"),
514
+ site: z.string().optional(),
515
+ // @username
516
+ creator: z.string().optional(),
517
+ // @username
518
+ title: z.string().min(1).max(70, "Twitter title should be less than 70 characters"),
519
+ description: z.string().min(1).max(200, "Twitter description should be less than 200 characters"),
520
+ image: z.string().url(),
521
+ imageAlt: z.string().optional()
522
+ });
523
+ var GeoCoordinatesSchema = z.object({
524
+ "@type": z.literal("GeoCoordinates").default("GeoCoordinates"),
525
+ latitude: z.number().min(-90).max(90),
526
+ longitude: z.number().min(-180).max(180)
527
+ });
528
+ var OpeningHoursSchema = z.object({
529
+ "@type": z.literal("OpeningHoursSpecification").default("OpeningHoursSpecification"),
530
+ dayOfWeek: z.union([
531
+ z.enum(["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]),
532
+ z.array(z.enum(["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]))
533
+ ]),
534
+ opens: z.string().regex(/^([01]\d|2[0-3]):([0-5]\d)$/, "Must be in HH:MM format"),
535
+ closes: z.string().regex(/^([01]\d|2[0-3]):([0-5]\d)$/, "Must be in HH:MM format")
536
+ });
537
+ var LocalBusinessSchema = z.object({
538
+ "@context": z.literal("https://schema.org").default("https://schema.org"),
539
+ "@type": z.string().default("LocalBusiness"),
540
+ // Can be Restaurant, Store, etc.
541
+ // Required
542
+ name: z.string().min(1, "Business name is required"),
543
+ image: z.union([z.string().url(), z.array(z.string().url())]),
544
+ address: PostalAddressSchema,
545
+ // Recommended
546
+ "@id": z.string().url().optional(),
547
+ url: z.string().url().optional(),
548
+ telephone: z.string().optional(),
549
+ priceRange: z.string().optional(),
550
+ // e.g., "$$"
551
+ // Location
552
+ geo: GeoCoordinatesSchema.optional(),
553
+ // Opening hours
554
+ openingHoursSpecification: z.union([
555
+ OpeningHoursSchema,
556
+ z.array(OpeningHoursSchema)
557
+ ]).optional(),
558
+ // Ratings
559
+ aggregateRating: z.object({
560
+ "@type": z.literal("AggregateRating"),
561
+ ratingValue: z.union([z.string(), z.number()]),
562
+ reviewCount: z.number().int().positive()
563
+ }).optional(),
564
+ // Additional info
565
+ description: z.string().min(10).optional(),
566
+ servesCuisine: z.union([z.string(), z.array(z.string())]).optional(),
567
+ // For restaurants
568
+ menu: z.string().url().optional(),
569
+ // For restaurants
570
+ acceptsReservations: z.union([z.boolean(), z.string()]).optional(),
571
+ paymentAccepted: z.string().optional()
572
+ });
573
+ var PlaceSchema = z.object({
574
+ "@type": z.literal("Place").default("Place"),
575
+ name: z.string(),
576
+ address: z.union([
577
+ z.string(),
578
+ z.object({
579
+ "@type": z.literal("PostalAddress"),
580
+ streetAddress: z.string().optional(),
581
+ addressLocality: z.string().optional(),
582
+ addressRegion: z.string().optional(),
583
+ postalCode: z.string().optional(),
584
+ addressCountry: z.string().optional()
585
+ })
586
+ ]).optional()
587
+ });
588
+ var EventOfferSchema = z.object({
589
+ "@type": z.literal("Offer").default("Offer"),
590
+ url: z.string().url().optional(),
591
+ price: z.union([z.string(), z.number()]),
592
+ priceCurrency: z.string().length(3),
593
+ availability: z.string().url().optional(),
594
+ validFrom: z.string().optional()
595
+ });
596
+ var EventSchema = z.object({
597
+ "@context": z.literal("https://schema.org").default("https://schema.org"),
598
+ "@type": z.enum(["Event", "MusicEvent", "BusinessEvent", "SportsEvent", "TheaterEvent", "EducationEvent"]).default("Event"),
599
+ // Required
600
+ name: z.string().min(1, "Event name is required"),
601
+ startDate: z.string().regex(/^\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}[+-]\d{2}:\d{2})?$/, "Must be ISO 8601 format"),
602
+ // Recommended
603
+ endDate: z.string().regex(/^\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}[+-]\d{2}:\d{2})?$/, "Must be ISO 8601 format").optional(),
604
+ eventStatus: z.enum([
605
+ "https://schema.org/EventScheduled",
606
+ "https://schema.org/EventCancelled",
607
+ "https://schema.org/EventMovedOnline",
608
+ "https://schema.org/EventPostponed",
609
+ "https://schema.org/EventRescheduled"
610
+ ]).optional(),
611
+ eventAttendanceMode: z.enum([
612
+ "https://schema.org/OfflineEventAttendanceMode",
613
+ "https://schema.org/OnlineEventAttendanceMode",
614
+ "https://schema.org/MixedEventAttendanceMode"
615
+ ]).optional(),
616
+ location: z.union([PlaceSchema, z.string().url()]).optional(),
617
+ // Can be Place or VirtualLocation URL
618
+ image: z.union([z.string().url(), z.array(z.string().url())]).optional(),
619
+ description: z.string().min(10).optional(),
620
+ // Organizer
621
+ organizer: z.object({
622
+ "@type": z.enum(["Organization", "Person"]),
623
+ name: z.string(),
624
+ url: z.string().url().optional()
625
+ }).optional(),
626
+ // Performer
627
+ performer: z.union([
628
+ z.object({
629
+ "@type": z.enum(["Person", "PerformingGroup"]),
630
+ name: z.string()
631
+ }),
632
+ z.array(z.object({
633
+ "@type": z.enum(["Person", "PerformingGroup"]),
634
+ name: z.string()
635
+ }))
636
+ ]).optional(),
637
+ // Offers (tickets)
638
+ offers: z.union([EventOfferSchema, z.array(EventOfferSchema)]).optional(),
639
+ // Additional
640
+ url: z.string().url().optional()
641
+ });
642
+
643
+ // src/auditor/schema-validator.ts
644
+ var SCHEMA_VALIDATORS = {
645
+ Article: ArticleSchema,
646
+ Product: ProductSchema,
647
+ Organization: OrganizationSchema,
648
+ Person: PersonSchema,
649
+ LocalBusiness: LocalBusinessSchema,
650
+ Event: EventSchema,
651
+ WebPage: WebPageSchema,
652
+ FAQPage: FAQPageSchema,
653
+ BreadcrumbList: BreadcrumbListSchema
654
+ };
655
+ var REQUIRED_FIELDS = {
656
+ Article: ["@type", "@context", "headline", "author", "datePublished"],
657
+ Product: ["@type", "@context", "name"],
658
+ Organization: ["@type", "@context", "name"],
659
+ Person: ["@type", "@context", "name"],
660
+ LocalBusiness: ["@type", "@context", "name", "address"],
661
+ Event: ["@type", "@context", "name", "startDate", "location"],
662
+ WebPage: ["@type", "@context", "name"],
663
+ FAQ: ["@type", "@context", "mainEntity"],
664
+ Breadcrumb: ["@type", "@context", "itemListElement"]
665
+ };
666
+ var RECOMMENDED_FIELDS = {
667
+ Article: ["image", "dateModified", "publisher", "description"],
668
+ Product: ["image", "price", "availability", "brand", "description"],
669
+ Organization: ["url", "logo", "sameAs", "contactPoint"],
670
+ Person: ["image", "url", "sameAs", "jobTitle"],
671
+ LocalBusiness: ["telephone", "geo", "openingHours", "image"],
672
+ Event: ["description", "image", "organizer", "eventStatus"],
673
+ WebPage: ["description", "url"],
674
+ FAQ: [],
675
+ // FAQ no tiene campos especialmente recomendados adicionales
676
+ Breadcrumb: []
677
+ // Breadcrumb es simple
678
+ };
679
+ var DEPRECATED_FIELDS = {
680
+ Article: ["version"],
681
+ // 'version' ya no se usa en Article
682
+ Organization: ["founder"],
683
+ // Bajo valor semántico para GEO
684
+ Product: ["model"]
685
+ // Confuso, mejor usar 'name' o 'mpn'
686
+ };
687
+ async function validateSchemas(scanResult, options = {}) {
688
+ const fileResults = [];
689
+ let totalSchemas = 0;
690
+ let validSchemas = 0;
691
+ let totalErrors = 0;
692
+ let totalWarnings = 0;
693
+ for (const file of scanResult.filesWithSchemas) {
694
+ const schemasToValidate = options.schemaTypes ? file.schemas.filter((s) => options.schemaTypes.includes(s.type)) : file.schemas;
695
+ const fileIssues = [];
696
+ for (const schema of schemasToValidate) {
697
+ totalSchemas++;
698
+ const issues = validateSchema(schema);
699
+ schema.issues = issues;
700
+ const errors = issues.filter((i) => i.severity === "error").length;
701
+ const warnings = issues.filter((i) => i.severity === "warning").length;
702
+ totalErrors += errors;
703
+ totalWarnings += warnings;
704
+ if (errors === 0) {
705
+ validSchemas++;
706
+ }
707
+ fileIssues.push(...issues);
708
+ }
709
+ const fileValid = fileIssues.filter((i) => i.severity === "error").length === 0 && (!options.strict || fileIssues.filter((i) => i.severity === "warning").length === 0);
710
+ fileResults.push({
711
+ filePath: file.filePath,
712
+ valid: fileValid,
713
+ schemas: schemasToValidate,
714
+ issues: fileIssues
715
+ });
716
+ }
717
+ const commonIssues = generateCommonIssues(fileResults);
718
+ const valid = totalErrors === 0 && (!options.strict || totalWarnings === 0);
719
+ return {
720
+ valid,
721
+ totalSchemas,
722
+ validSchemas,
723
+ errors: totalErrors,
724
+ warnings: totalWarnings,
725
+ fileResults,
726
+ commonIssues
727
+ };
728
+ }
729
+ function validateSchema(schema) {
730
+ const issues = [];
731
+ const schemaType = schema.type;
732
+ if (!schema.schema) {
733
+ issues.push({
734
+ severity: "info",
735
+ message: `Could not parse schema for deep validation. Using '${schema.generatorFunction}' at line ${schema.line}.`
736
+ });
737
+ return issues;
738
+ }
739
+ const data = schema.schema;
740
+ const requiredFields = REQUIRED_FIELDS[schemaType] || [];
741
+ for (const field of requiredFields) {
742
+ if (!hasField(data, field)) {
743
+ const autoFix = generateAutoFix(field, schemaType);
744
+ issues.push({
745
+ severity: "error",
746
+ field,
747
+ message: `Required field '${field}' is missing`,
748
+ fix: `Add '${field}' field to your ${schemaType} schema`,
749
+ ...autoFix && { autoFixCode: autoFix }
750
+ });
751
+ }
752
+ }
753
+ const recommendedFields = RECOMMENDED_FIELDS[schemaType] || [];
754
+ for (const field of recommendedFields) {
755
+ if (!hasField(data, field)) {
756
+ issues.push({
757
+ severity: "warning",
758
+ field,
759
+ message: `Recommended field '${field}' is missing`,
760
+ fix: `Consider adding '${field}' for better SEO and GEO`
761
+ });
762
+ }
763
+ }
764
+ const deprecatedFields = DEPRECATED_FIELDS[schemaType] || [];
765
+ for (const field of deprecatedFields) {
766
+ if (hasField(data, field)) {
767
+ issues.push({
768
+ severity: "info",
769
+ field,
770
+ message: `Field '${field}' is deprecated or has low semantic value`,
771
+ fix: `Consider removing '${field}' to reduce token usage`
772
+ });
773
+ }
774
+ }
775
+ const validator = SCHEMA_VALIDATORS[schemaType];
776
+ if (validator) {
777
+ try {
778
+ validator.parse(data);
779
+ } catch (error) {
780
+ if (error.errors) {
781
+ for (const zodError of error.errors) {
782
+ issues.push({
783
+ severity: "error",
784
+ field: zodError.path.join("."),
785
+ message: zodError.message,
786
+ fix: `Fix type or format of '${zodError.path.join(".")}'`
787
+ });
788
+ }
789
+ }
790
+ }
791
+ }
792
+ issues.push(...validateSchemaSpecificRules(schemaType, data));
793
+ return issues;
794
+ }
795
+ function validateSchemaSpecificRules(schemaType, data) {
796
+ const issues = [];
797
+ switch (schemaType) {
798
+ case "Article":
799
+ if (data.datePublished && data.dateModified) {
800
+ const published = new Date(data.datePublished);
801
+ const modified = new Date(data.dateModified);
802
+ if (published > modified) {
803
+ issues.push({
804
+ severity: "error",
805
+ field: "dateModified",
806
+ message: "dateModified must be after datePublished",
807
+ fix: "Ensure dateModified is later than datePublished"
808
+ });
809
+ }
810
+ }
811
+ if (data.wordCount && (data.wordCount < 100 || data.wordCount > 1e4)) {
812
+ issues.push({
813
+ severity: "warning",
814
+ field: "wordCount",
815
+ message: `wordCount seems unusual: ${data.wordCount}`,
816
+ fix: "Verify wordCount is calculated correctly"
817
+ });
818
+ }
819
+ break;
820
+ case "Product":
821
+ if (data.price && !data.priceCurrency) {
822
+ issues.push({
823
+ severity: "error",
824
+ field: "priceCurrency",
825
+ message: "priceCurrency is required when price is specified",
826
+ fix: 'Add priceCurrency (e.g., "USD", "EUR")'
827
+ });
828
+ }
829
+ if (data.availability) {
830
+ const validAvailability = [
831
+ "InStock",
832
+ "OutOfStock",
833
+ "PreOrder",
834
+ "Discontinued",
835
+ "SoldOut"
836
+ ];
837
+ if (!validAvailability.includes(data.availability)) {
838
+ issues.push({
839
+ severity: "warning",
840
+ field: "availability",
841
+ message: `availability value '${data.availability}' is not standard`,
842
+ fix: `Use one of: ${validAvailability.join(", ")}`
843
+ });
844
+ }
845
+ }
846
+ break;
847
+ case "Organization":
848
+ case "LocalBusiness":
849
+ if (data.logo && typeof data.logo === "string") {
850
+ if (!data.logo.startsWith("https://")) {
851
+ issues.push({
852
+ severity: "warning",
853
+ field: "logo",
854
+ message: "Logo URL should use HTTPS",
855
+ fix: "Change logo URL to HTTPS"
856
+ });
857
+ }
858
+ }
859
+ if (data.telephone && typeof data.telephone === "string") {
860
+ if (!/^\+?[\d\s\-()]+$/.test(data.telephone)) {
861
+ issues.push({
862
+ severity: "info",
863
+ field: "telephone",
864
+ message: "Telephone format may not be standard",
865
+ fix: "Use international format (e.g., +1-555-1234)"
866
+ });
867
+ }
868
+ }
869
+ break;
870
+ case "Event":
871
+ if (data.startDate && data.endDate) {
872
+ const start = new Date(data.startDate);
873
+ const end = new Date(data.endDate);
874
+ if (start > end) {
875
+ issues.push({
876
+ severity: "error",
877
+ field: "endDate",
878
+ message: "endDate must be after startDate",
879
+ fix: "Ensure endDate is later than startDate"
880
+ });
881
+ }
882
+ }
883
+ break;
884
+ }
885
+ return issues;
886
+ }
887
+ function hasField(obj, fieldPath) {
888
+ const parts = fieldPath.split(".");
889
+ let current = obj;
890
+ for (const part of parts) {
891
+ if (!current || typeof current !== "object" || !(part in current)) {
892
+ return false;
893
+ }
894
+ current = current[part];
895
+ }
896
+ return current !== void 0 && current !== null;
897
+ }
898
+ function generateAutoFix(field, schemaType) {
899
+ const fixes = {
900
+ "@context": '"@context": "https://schema.org"',
901
+ "@type": `"@type": "${schemaType}"`,
902
+ headline: '"headline": "Your article title"',
903
+ author: '"author": "Author Name"',
904
+ datePublished: `"datePublished": new Date().toISOString()`,
905
+ name: '"name": "Item name"'
906
+ };
907
+ return fixes[field];
908
+ }
909
+ function generateCommonIssues(fileResults) {
910
+ const issueMap = /* @__PURE__ */ new Map();
911
+ for (const fileResult of fileResults) {
912
+ for (const issue of fileResult.issues) {
913
+ const key = `${issue.severity}:${issue.message}`;
914
+ if (issueMap.has(key)) {
915
+ const existing = issueMap.get(key);
916
+ existing.count++;
917
+ if (!existing.affectedFiles.includes(fileResult.filePath)) {
918
+ existing.affectedFiles.push(fileResult.filePath);
919
+ }
920
+ } else {
921
+ issueMap.set(key, {
922
+ type: issue.message,
923
+ count: 1,
924
+ severity: issue.severity,
925
+ affectedFiles: [fileResult.filePath]
926
+ });
927
+ }
928
+ }
929
+ }
930
+ return Array.from(issueMap.values()).sort((a, b) => b.count - a.count);
931
+ }
932
+ function validateSingleSchema(schemaType, schemaData) {
933
+ const foundSchema = {
934
+ type: schemaType,
935
+ generatorFunction: "manual",
936
+ line: 0,
937
+ schema: schemaData};
938
+ return validateSchema(foundSchema);
939
+ }
940
+ function generateCoverageReport(scanResult, validationResult, optimizations = []) {
941
+ const totalFiles = scanResult.filesScanned;
942
+ const filesWithSchemas = scanResult.filesWithSchemas.length;
943
+ const routesWithoutSchemas = scanResult.routesWithoutSchemas.length;
944
+ const totalSchemas = validationResult.totalSchemas;
945
+ const totalRoutes = filesWithSchemas + routesWithoutSchemas;
946
+ const coverage = totalRoutes > 0 ? filesWithSchemas / totalRoutes * 100 : 0;
947
+ const schemasByType = {};
948
+ for (const file of scanResult.filesWithSchemas) {
949
+ for (const schema of file.schemas) {
950
+ schemasByType[schema.type] = (schemasByType[schema.type] || 0) + 1;
951
+ }
952
+ }
953
+ const score = calculateProjectScore(scanResult, validationResult, coverage);
954
+ const allIssues = validationResult.fileResults.flatMap((f) => f.issues);
955
+ return {
956
+ project: {
957
+ root: scanResult.projectRoot,
958
+ ...scanResult.framework && { framework: scanResult.framework },
959
+ scannedAt: scanResult.scannedAt
960
+ },
961
+ stats: {
962
+ totalFiles,
963
+ filesWithSchemas,
964
+ routesWithoutSchemas,
965
+ totalSchemas,
966
+ coverage: Math.round(coverage)
967
+ },
968
+ schemasByType,
969
+ score,
970
+ files: scanResult.filesWithSchemas,
971
+ missingSchemas: scanResult.routesWithoutSchemas,
972
+ issues: allIssues,
973
+ optimizations
974
+ };
975
+ }
976
+ function calculateProjectScore(_scanResult, validationResult, coverage) {
977
+ const presenceScore = coverage;
978
+ const totalSchemas = validationResult.totalSchemas;
979
+ const validSchemas = validationResult.validSchemas;
980
+ const validityScore = totalSchemas > 0 ? validSchemas / totalSchemas * 100 : 0;
981
+ const totalWarnings = validationResult.warnings;
982
+ const maxExpectedWarnings = totalSchemas * 2;
983
+ const completenessScore = Math.max(
984
+ 0,
985
+ 100 - totalWarnings / Math.max(1, maxExpectedWarnings) * 100
986
+ );
987
+ const infoIssues = validationResult.fileResults.flatMap(
988
+ (f) => f.issues.filter((i) => i.severity === "info")
989
+ ).length;
990
+ const optimizationScore = Math.max(0, 100 - infoIssues * 10);
991
+ const total = Math.round(
992
+ presenceScore * 0.25 + validityScore * 0.35 + completenessScore * 0.25 + optimizationScore * 0.15
993
+ );
994
+ return {
995
+ total: Math.min(100, Math.max(0, total)),
996
+ breakdown: {
997
+ presence: Math.round(presenceScore),
998
+ validity: Math.round(validityScore),
999
+ completeness: Math.round(completenessScore),
1000
+ optimization: Math.round(optimizationScore)
1001
+ }
1002
+ };
1003
+ }
1004
+ async function saveReport(report, options = {}) {
1005
+ const format = options.format || "json";
1006
+ let content;
1007
+ switch (format) {
1008
+ case "json":
1009
+ content = generateJSONReport(report);
1010
+ break;
1011
+ case "html":
1012
+ content = generateHTMLReport(report);
1013
+ break;
1014
+ case "markdown":
1015
+ content = generateMarkdownReport(report);
1016
+ break;
1017
+ default:
1018
+ throw new Error(`Unknown format: ${format}`);
1019
+ }
1020
+ if (options.output) {
1021
+ await fs.writeFile(options.output, content, "utf-8");
1022
+ } else {
1023
+ console.log(content);
1024
+ }
1025
+ }
1026
+ function generateJSONReport(report) {
1027
+ return JSON.stringify(report, null, 2);
1028
+ }
1029
+ function generateMarkdownReport(report) {
1030
+ const { project, stats, score, schemasByType, issues, missingSchemas } = report;
1031
+ let md = `# JSON-LD Audit Report
1032
+
1033
+ `;
1034
+ md += `**Project:** ${project.root}
1035
+ `;
1036
+ md += `**Framework:** ${project.framework || "Unknown"}
1037
+ `;
1038
+ md += `**Date:** ${new Date(project.scannedAt).toLocaleString()}
1039
+
1040
+ `;
1041
+ md += `## Score: ${score.total}/100
1042
+
1043
+ `;
1044
+ md += `- **Presence:** ${score.breakdown.presence}/100 (${stats.coverage}% coverage)
1045
+ `;
1046
+ md += `- **Validity:** ${score.breakdown.validity}/100
1047
+ `;
1048
+ md += `- **Completeness:** ${score.breakdown.completeness}/100
1049
+ `;
1050
+ md += `- **Optimization:** ${score.breakdown.optimization}/100
1051
+
1052
+ `;
1053
+ md += `## Statistics
1054
+
1055
+ `;
1056
+ md += `- Total files scanned: ${stats.totalFiles}
1057
+ `;
1058
+ md += `- Files with schemas: ${stats.filesWithSchemas}
1059
+ `;
1060
+ md += `- Routes without schemas: ${stats.routesWithoutSchemas}
1061
+ `;
1062
+ md += `- Total schemas: ${stats.totalSchemas}
1063
+
1064
+ `;
1065
+ md += `## Schemas by Type
1066
+
1067
+ `;
1068
+ for (const [type, count] of Object.entries(schemasByType)) {
1069
+ md += `- **${type}**: ${count}
1070
+ `;
1071
+ }
1072
+ md += `
1073
+ `;
1074
+ if (issues.length > 0) {
1075
+ md += `## Issues Found
1076
+
1077
+ `;
1078
+ const errors = issues.filter((i) => i.severity === "error");
1079
+ const warnings = issues.filter((i) => i.severity === "warning");
1080
+ if (errors.length > 0) {
1081
+ md += `### Errors (${errors.length})
1082
+
1083
+ `;
1084
+ errors.slice(0, 10).forEach((issue) => {
1085
+ md += `- **${issue.field || "General"}**: ${issue.message}
1086
+ `;
1087
+ if (issue.fix) md += ` - Fix: ${issue.fix}
1088
+ `;
1089
+ });
1090
+ if (errors.length > 10) {
1091
+ md += `
1092
+ ... and ${errors.length - 10} more errors
1093
+ `;
1094
+ }
1095
+ md += `
1096
+ `;
1097
+ }
1098
+ if (warnings.length > 0) {
1099
+ md += `### Warnings (${warnings.length})
1100
+
1101
+ `;
1102
+ warnings.slice(0, 10).forEach((issue) => {
1103
+ md += `- **${issue.field || "General"}**: ${issue.message}
1104
+ `;
1105
+ if (issue.fix) md += ` - Suggestion: ${issue.fix}
1106
+ `;
1107
+ });
1108
+ if (warnings.length > 10) {
1109
+ md += `
1110
+ ... and ${warnings.length - 10} more warnings
1111
+ `;
1112
+ }
1113
+ md += `
1114
+ `;
1115
+ }
1116
+ }
1117
+ if (missingSchemas.length > 0) {
1118
+ md += `## Missing Schemas
1119
+
1120
+ `;
1121
+ md += `Routes that should have schemas but don't:
1122
+
1123
+ `;
1124
+ missingSchemas.slice(0, 10).forEach((route) => {
1125
+ md += `- **${route.filePath}**`;
1126
+ if (route.suggestedSchemaType) {
1127
+ md += ` \u2192 Suggested: ${route.suggestedSchemaType}`;
1128
+ }
1129
+ md += `
1130
+ `;
1131
+ });
1132
+ if (missingSchemas.length > 10) {
1133
+ md += `
1134
+ ... and ${missingSchemas.length - 10} more routes
1135
+ `;
1136
+ }
1137
+ md += `
1138
+ `;
1139
+ }
1140
+ md += `---
1141
+
1142
+ `;
1143
+ md += `Generated by [geo-semantic-layer](https://www.npmjs.com/package/geo-semantic-layer)
1144
+ `;
1145
+ return md;
1146
+ }
1147
+ function generateHTMLReport(report) {
1148
+ const { project, stats, score, schemasByType, issues, missingSchemas } = report;
1149
+ const errors = issues.filter((i) => i.severity === "error");
1150
+ const warnings = issues.filter((i) => i.severity === "warning");
1151
+ return `<!DOCTYPE html>
1152
+ <html lang="en">
1153
+ <head>
1154
+ <meta charset="UTF-8">
1155
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
1156
+ <title>JSON-LD Audit Report</title>
1157
+ <style>
1158
+ * { margin: 0; padding: 0; box-sizing: border-box; }
1159
+ body {
1160
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
1161
+ line-height: 1.6;
1162
+ color: #333;
1163
+ background: #f5f5f5;
1164
+ padding: 20px;
1165
+ }
1166
+ .container {
1167
+ max-width: 1200px;
1168
+ margin: 0 auto;
1169
+ background: white;
1170
+ border-radius: 8px;
1171
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
1172
+ overflow: hidden;
1173
+ }
1174
+ header {
1175
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
1176
+ color: white;
1177
+ padding: 30px;
1178
+ }
1179
+ h1 { font-size: 32px; margin-bottom: 10px; }
1180
+ .meta { opacity: 0.9; font-size: 14px; }
1181
+ .score-section {
1182
+ padding: 30px;
1183
+ background: #f9fafb;
1184
+ border-bottom: 1px solid #e5e7eb;
1185
+ }
1186
+ .score-card {
1187
+ display: flex;
1188
+ align-items: center;
1189
+ gap: 30px;
1190
+ }
1191
+ .score-circle {
1192
+ width: 120px;
1193
+ height: 120px;
1194
+ border-radius: 50%;
1195
+ display: flex;
1196
+ align-items: center;
1197
+ justify-content: center;
1198
+ font-size: 36px;
1199
+ font-weight: bold;
1200
+ color: white;
1201
+ flex-shrink: 0;
1202
+ }
1203
+ .score-excellent { background: #10b981; }
1204
+ .score-good { background: #3b82f6; }
1205
+ .score-warning { background: #f59e0b; }
1206
+ .score-poor { background: #ef4444; }
1207
+ .score-breakdown {
1208
+ flex: 1;
1209
+ display: grid;
1210
+ grid-template-columns: repeat(2, 1fr);
1211
+ gap: 15px;
1212
+ }
1213
+ .score-item {
1214
+ background: white;
1215
+ padding: 15px;
1216
+ border-radius: 6px;
1217
+ border-left: 4px solid #667eea;
1218
+ }
1219
+ .score-item h3 { font-size: 14px; color: #6b7280; margin-bottom: 5px; }
1220
+ .score-item .value { font-size: 24px; font-weight: bold; color: #111827; }
1221
+ .stats-grid {
1222
+ display: grid;
1223
+ grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
1224
+ gap: 20px;
1225
+ padding: 30px;
1226
+ }
1227
+ .stat-card {
1228
+ background: #f9fafb;
1229
+ padding: 20px;
1230
+ border-radius: 8px;
1231
+ border: 1px solid #e5e7eb;
1232
+ }
1233
+ .stat-card h3 { font-size: 14px; color: #6b7280; margin-bottom: 10px; }
1234
+ .stat-card .value { font-size: 32px; font-weight: bold; color: #111827; }
1235
+ .section {
1236
+ padding: 30px;
1237
+ border-bottom: 1px solid #e5e7eb;
1238
+ }
1239
+ .section h2 {
1240
+ font-size: 20px;
1241
+ margin-bottom: 20px;
1242
+ color: #111827;
1243
+ }
1244
+ .issue-list {
1245
+ display: flex;
1246
+ flex-direction: column;
1247
+ gap: 12px;
1248
+ }
1249
+ .issue {
1250
+ padding: 15px;
1251
+ border-radius: 6px;
1252
+ border-left: 4px solid;
1253
+ }
1254
+ .issue-error {
1255
+ background: #fef2f2;
1256
+ border-left-color: #ef4444;
1257
+ }
1258
+ .issue-warning {
1259
+ background: #fffbeb;
1260
+ border-left-color: #f59e0b;
1261
+ }
1262
+ .issue-info {
1263
+ background: #eff6ff;
1264
+ border-left-color: #3b82f6;
1265
+ }
1266
+ .issue-header {
1267
+ display: flex;
1268
+ align-items: center;
1269
+ gap: 10px;
1270
+ margin-bottom: 8px;
1271
+ }
1272
+ .issue-badge {
1273
+ padding: 4px 8px;
1274
+ border-radius: 4px;
1275
+ font-size: 11px;
1276
+ font-weight: 600;
1277
+ text-transform: uppercase;
1278
+ }
1279
+ .badge-error { background: #ef4444; color: white; }
1280
+ .badge-warning { background: #f59e0b; color: white; }
1281
+ .badge-info { background: #3b82f6; color: white; }
1282
+ .issue-field {
1283
+ font-family: 'Courier New', monospace;
1284
+ font-size: 13px;
1285
+ color: #6b7280;
1286
+ }
1287
+ .issue-message { font-size: 14px; color: #111827; margin-bottom: 6px; }
1288
+ .issue-fix {
1289
+ font-size: 13px;
1290
+ color: #059669;
1291
+ font-style: italic;
1292
+ }
1293
+ .file-list {
1294
+ display: flex;
1295
+ flex-direction: column;
1296
+ gap: 10px;
1297
+ }
1298
+ .file-item {
1299
+ padding: 12px;
1300
+ background: #f9fafb;
1301
+ border-radius: 6px;
1302
+ border: 1px solid #e5e7eb;
1303
+ font-family: 'Courier New', monospace;
1304
+ font-size: 13px;
1305
+ }
1306
+ .schema-type {
1307
+ display: inline-block;
1308
+ padding: 2px 8px;
1309
+ background: #667eea;
1310
+ color: white;
1311
+ border-radius: 4px;
1312
+ font-size: 11px;
1313
+ margin-left: 8px;
1314
+ }
1315
+ footer {
1316
+ padding: 20px 30px;
1317
+ background: #f9fafb;
1318
+ text-align: center;
1319
+ color: #6b7280;
1320
+ font-size: 14px;
1321
+ }
1322
+ .chart {
1323
+ width: 100%;
1324
+ max-width: 400px;
1325
+ margin: 0 auto;
1326
+ }
1327
+ .chart-bar {
1328
+ display: flex;
1329
+ align-items: center;
1330
+ gap: 10px;
1331
+ margin-bottom: 12px;
1332
+ }
1333
+ .chart-label {
1334
+ width: 100px;
1335
+ text-align: right;
1336
+ font-size: 14px;
1337
+ color: #6b7280;
1338
+ }
1339
+ .chart-track {
1340
+ flex: 1;
1341
+ height: 24px;
1342
+ background: #e5e7eb;
1343
+ border-radius: 4px;
1344
+ overflow: hidden;
1345
+ }
1346
+ .chart-fill {
1347
+ height: 100%;
1348
+ background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
1349
+ transition: width 0.3s ease;
1350
+ }
1351
+ .chart-value {
1352
+ width: 40px;
1353
+ text-align: right;
1354
+ font-weight: bold;
1355
+ font-size: 14px;
1356
+ }
1357
+ </style>
1358
+ </head>
1359
+ <body>
1360
+ <div class="container">
1361
+ <header>
1362
+ <h1>\u{1F4CA} JSON-LD Audit Report</h1>
1363
+ <div class="meta">
1364
+ <strong>Project:</strong> ${project.root}<br>
1365
+ <strong>Framework:</strong> ${project.framework || "Unknown"}<br>
1366
+ <strong>Date:</strong> ${new Date(project.scannedAt).toLocaleString()}
1367
+ </div>
1368
+ </header>
1369
+
1370
+ <div class="score-section">
1371
+ <div class="score-card">
1372
+ <div class="score-circle ${getScoreClass(score.total)}">
1373
+ ${score.total}
1374
+ </div>
1375
+ <div class="score-breakdown">
1376
+ <div class="score-item">
1377
+ <h3>Presence</h3>
1378
+ <div class="value">${score.breakdown.presence}/100</div>
1379
+ </div>
1380
+ <div class="score-item">
1381
+ <h3>Validity</h3>
1382
+ <div class="value">${score.breakdown.validity}/100</div>
1383
+ </div>
1384
+ <div class="score-item">
1385
+ <h3>Completeness</h3>
1386
+ <div class="value">${score.breakdown.completeness}/100</div>
1387
+ </div>
1388
+ <div class="score-item">
1389
+ <h3>Optimization</h3>
1390
+ <div class="value">${score.breakdown.optimization}/100</div>
1391
+ </div>
1392
+ </div>
1393
+ </div>
1394
+ </div>
1395
+
1396
+ <div class="stats-grid">
1397
+ <div class="stat-card">
1398
+ <h3>Files Scanned</h3>
1399
+ <div class="value">${stats.totalFiles}</div>
1400
+ </div>
1401
+ <div class="stat-card">
1402
+ <h3>With Schemas</h3>
1403
+ <div class="value">${stats.filesWithSchemas}</div>
1404
+ </div>
1405
+ <div class="stat-card">
1406
+ <h3>Coverage</h3>
1407
+ <div class="value">${stats.coverage}%</div>
1408
+ </div>
1409
+ <div class="stat-card">
1410
+ <h3>Total Schemas</h3>
1411
+ <div class="value">${stats.totalSchemas}</div>
1412
+ </div>
1413
+ </div>
1414
+
1415
+ <div class="section">
1416
+ <h2>Schemas by Type</h2>
1417
+ <div class="chart">
1418
+ ${Object.entries(schemasByType).map(
1419
+ ([type, count]) => `
1420
+ <div class="chart-bar">
1421
+ <div class="chart-label">${type}</div>
1422
+ <div class="chart-track">
1423
+ <div class="chart-fill" style="width: ${count / Math.max(...Object.values(schemasByType)) * 100}%"></div>
1424
+ </div>
1425
+ <div class="chart-value">${count}</div>
1426
+ </div>
1427
+ `
1428
+ ).join("")}
1429
+ </div>
1430
+ </div>
1431
+
1432
+ ${errors.length > 0 ? `
1433
+ <div class="section">
1434
+ <h2>\u274C Errors (${errors.length})</h2>
1435
+ <div class="issue-list">
1436
+ ${errors.slice(0, 20).map(
1437
+ (issue) => `
1438
+ <div class="issue issue-error">
1439
+ <div class="issue-header">
1440
+ <span class="issue-badge badge-error">Error</span>
1441
+ ${issue.field ? `<span class="issue-field">${issue.field}</span>` : ""}
1442
+ </div>
1443
+ <div class="issue-message">${issue.message}</div>
1444
+ ${issue.fix ? `<div class="issue-fix">\u{1F4A1} ${issue.fix}</div>` : ""}
1445
+ </div>
1446
+ `
1447
+ ).join("")}
1448
+ ${errors.length > 20 ? `<div class="file-item">... and ${errors.length - 20} more errors</div>` : ""}
1449
+ </div>
1450
+ </div>
1451
+ ` : ""}
1452
+
1453
+ ${warnings.length > 0 ? `
1454
+ <div class="section">
1455
+ <h2>\u26A0\uFE0F Warnings (${warnings.length})</h2>
1456
+ <div class="issue-list">
1457
+ ${warnings.slice(0, 20).map(
1458
+ (issue) => `
1459
+ <div class="issue issue-warning">
1460
+ <div class="issue-header">
1461
+ <span class="issue-badge badge-warning">Warning</span>
1462
+ ${issue.field ? `<span class="issue-field">${issue.field}</span>` : ""}
1463
+ </div>
1464
+ <div class="issue-message">${issue.message}</div>
1465
+ ${issue.fix ? `<div class="issue-fix">\u{1F4A1} ${issue.fix}</div>` : ""}
1466
+ </div>
1467
+ `
1468
+ ).join("")}
1469
+ ${warnings.length > 20 ? `<div class="file-item">... and ${warnings.length - 20} more warnings</div>` : ""}
1470
+ </div>
1471
+ </div>
1472
+ ` : ""}
1473
+
1474
+ ${missingSchemas.length > 0 ? `
1475
+ <div class="section">
1476
+ <h2>\u{1F4C4} Routes Without Schemas (${missingSchemas.length})</h2>
1477
+ <div class="file-list">
1478
+ ${missingSchemas.slice(0, 20).map(
1479
+ (route) => `
1480
+ <div class="file-item">
1481
+ ${route.filePath}
1482
+ ${route.suggestedSchemaType ? `<span class="schema-type">${route.suggestedSchemaType}</span>` : ""}
1483
+ ${route.reason ? `<br><small style="color: #6b7280;">${route.reason}</small>` : ""}
1484
+ </div>
1485
+ `
1486
+ ).join("")}
1487
+ ${missingSchemas.length > 20 ? `<div class="file-item">... and ${missingSchemas.length - 20} more routes</div>` : ""}
1488
+ </div>
1489
+ </div>
1490
+ ` : ""}
1491
+
1492
+ <footer>
1493
+ Generated by <strong>geo-semantic-layer</strong> &middot; <a href="https://www.npmjs.com/package/geo-semantic-layer" target="_blank">npm</a>
1494
+ </footer>
1495
+ </div>
1496
+ </body>
1497
+ </html>`;
1498
+ }
1499
+ function getScoreClass(score) {
1500
+ if (score >= 90) return "score-excellent";
1501
+ if (score >= 70) return "score-good";
1502
+ if (score >= 50) return "score-warning";
1503
+ return "score-poor";
1504
+ }
1505
+
1506
+ // src/optimizer/llm-profiles.ts
1507
+ var LLM_PROFILES = {
1508
+ "gpt-4": {
1509
+ model: "gpt-4",
1510
+ tokensPerChar: 0.25,
1511
+ // ~4 chars per token (average for English)
1512
+ maxContextTokens: 8192,
1513
+ recommendedMaxTokens: 500,
1514
+ supportsFunctionCalling: true
1515
+ },
1516
+ "gpt-4-turbo": {
1517
+ model: "gpt-4-turbo",
1518
+ tokensPerChar: 0.25,
1519
+ maxContextTokens: 128e3,
1520
+ recommendedMaxTokens: 1e3,
1521
+ supportsFunctionCalling: true
1522
+ },
1523
+ "gpt-3.5-turbo": {
1524
+ model: "gpt-3.5-turbo",
1525
+ tokensPerChar: 0.25,
1526
+ maxContextTokens: 16385,
1527
+ recommendedMaxTokens: 400,
1528
+ supportsFunctionCalling: true
1529
+ },
1530
+ "claude-3-opus": {
1531
+ model: "claude-3-opus",
1532
+ tokensPerChar: 0.27,
1533
+ // Slightly more efficient tokenization
1534
+ maxContextTokens: 2e5,
1535
+ recommendedMaxTokens: 1e3,
1536
+ supportsFunctionCalling: true
1537
+ },
1538
+ "claude-3-sonnet": {
1539
+ model: "claude-3-sonnet",
1540
+ tokensPerChar: 0.27,
1541
+ maxContextTokens: 2e5,
1542
+ recommendedMaxTokens: 800,
1543
+ supportsFunctionCalling: true
1544
+ },
1545
+ "claude-3-haiku": {
1546
+ model: "claude-3-haiku",
1547
+ tokensPerChar: 0.27,
1548
+ maxContextTokens: 2e5,
1549
+ recommendedMaxTokens: 600,
1550
+ supportsFunctionCalling: true
1551
+ },
1552
+ "claude-2": {
1553
+ model: "claude-2",
1554
+ tokensPerChar: 0.27,
1555
+ maxContextTokens: 1e5,
1556
+ recommendedMaxTokens: 600,
1557
+ supportsFunctionCalling: false
1558
+ },
1559
+ "gemini-pro": {
1560
+ model: "gemini-pro",
1561
+ tokensPerChar: 0.26,
1562
+ maxContextTokens: 32760,
1563
+ recommendedMaxTokens: 600,
1564
+ supportsFunctionCalling: true
1565
+ },
1566
+ "gemini-ultra": {
1567
+ model: "gemini-ultra",
1568
+ tokensPerChar: 0.26,
1569
+ maxContextTokens: 32760,
1570
+ recommendedMaxTokens: 800,
1571
+ supportsFunctionCalling: true
1572
+ }
1573
+ };
1574
+ function getModelProfile(model) {
1575
+ return LLM_PROFILES[model];
1576
+ }
1577
+
1578
+ // src/optimizer/token-analyzer.ts
1579
+ function analyzeTokenUsage(schema, model) {
1580
+ const serialized = JSON.stringify(schema, null, 2);
1581
+ const totalTokens = estimateTokens(serialized, model);
1582
+ const byField = calculateTokensByField(schema, model);
1583
+ const recommendations = generateRecommendations(totalTokens, byField, model);
1584
+ return {
1585
+ totalTokens,
1586
+ byField,
1587
+ recommendations,
1588
+ model
1589
+ };
1590
+ }
1591
+ function estimateTokens(text, model) {
1592
+ if (model.startsWith("gpt-")) {
1593
+ try {
1594
+ return encode(text).length;
1595
+ } catch (e) {
1596
+ return approximateTokens(text, model);
1597
+ }
1598
+ }
1599
+ return approximateTokens(text, model);
1600
+ }
1601
+ function approximateTokens(text, model) {
1602
+ const profile = getModelProfile(model);
1603
+ return Math.ceil(text.length * profile.tokensPerChar);
1604
+ }
1605
+ function calculateTokensByField(schema, model, path2 = "") {
1606
+ const result = {};
1607
+ if (schema === null || schema === void 0) {
1608
+ return result;
1609
+ }
1610
+ if (typeof schema !== "object") {
1611
+ const serialized = JSON.stringify(schema);
1612
+ const tokens = estimateTokens(serialized, model);
1613
+ result[path2 || "value"] = tokens;
1614
+ return result;
1615
+ }
1616
+ if (Array.isArray(schema)) {
1617
+ schema.forEach((item, index) => {
1618
+ const itemPath = path2 ? `${path2}[${index}]` : `[${index}]`;
1619
+ const itemTokens = calculateTokensByField(item, model, itemPath);
1620
+ Object.assign(result, itemTokens);
1621
+ });
1622
+ return result;
1623
+ }
1624
+ for (const [key, value] of Object.entries(schema)) {
1625
+ const fieldPath = path2 ? `${path2}.${key}` : key;
1626
+ const keyTokens = estimateTokens(`"${key}":`, model);
1627
+ result[`${fieldPath}#key`] = keyTokens;
1628
+ const valueTokens = calculateTokensByField(value, model, fieldPath);
1629
+ Object.assign(result, valueTokens);
1630
+ }
1631
+ return result;
1632
+ }
1633
+ function generateRecommendations(totalTokens, byField, model) {
1634
+ const recommendations = [];
1635
+ const profile = getModelProfile(model);
1636
+ if (totalTokens > profile.recommendedMaxTokens) {
1637
+ const excess = totalTokens - profile.recommendedMaxTokens;
1638
+ const reduction = (excess / totalTokens * 100).toFixed(1);
1639
+ recommendations.push(
1640
+ `Schema uses ${totalTokens} tokens, ${excess} over recommended ${profile.recommendedMaxTokens} for ${model}. Consider reducing by ${reduction}%.`
1641
+ );
1642
+ }
1643
+ const sortedFields = Object.entries(byField).filter(([key]) => !key.endsWith("#key")).sort(([, a], [, b]) => b - a).slice(0, 5);
1644
+ const topField = sortedFields[0];
1645
+ if (topField && topField[1] > totalTokens * 0.2) {
1646
+ recommendations.push(
1647
+ `Most expensive field: "${topField[0]}" (${topField[1]} tokens, ${(topField[1] / totalTokens * 100).toFixed(1)}% of total)`
1648
+ );
1649
+ }
1650
+ for (const [field, tokens] of Object.entries(byField)) {
1651
+ if (!field.endsWith("#key") && tokens > 100) {
1652
+ recommendations.push(`Consider shortening field "${field}" (${tokens} tokens)`);
1653
+ }
1654
+ }
1655
+ const arrayFields = Object.keys(byField).filter((key) => key.includes("[") && !key.endsWith("#key"));
1656
+ if (arrayFields.length > 10) {
1657
+ recommendations.push(`Schema contains ${arrayFields.length} array items. Consider limiting array sizes.`);
1658
+ }
1659
+ return recommendations;
1660
+ }
1661
+
1662
+ // src/optimizer/semantic-scorer.ts
1663
+ var DEFAULT_SEMANTIC_WEIGHTS = {
1664
+ coreIdentity: 1,
1665
+ seoCritical: 0.85,
1666
+ authorship: 0.75,
1667
+ relationships: 0.6,
1668
+ decorative: 0.2
1669
+ };
1670
+ var FIELD_IMPORTANCE = {
1671
+ // Core Identity (CRITICAL - never remove)
1672
+ "@context": 1,
1673
+ "@type": 1,
1674
+ name: 1,
1675
+ url: 0.95,
1676
+ // SEO Critical
1677
+ headline: 0.9,
1678
+ description: 0.85,
1679
+ title: 0.85,
1680
+ image: 0.8,
1681
+ // Authorship & Provenance
1682
+ author: 0.8,
1683
+ datePublished: 0.75,
1684
+ publisher: 0.75,
1685
+ dateModified: 0.7,
1686
+ // Content
1687
+ articleBody: 0.7,
1688
+ text: 0.7,
1689
+ about: 0.65,
1690
+ // Products
1691
+ price: 0.85,
1692
+ priceCurrency: 0.8,
1693
+ offers: 0.85,
1694
+ availability: 0.75,
1695
+ brand: 0.7,
1696
+ sku: 0.6,
1697
+ gtin: 0.55,
1698
+ // Relationships (good for context)
1699
+ sameAs: 0.6,
1700
+ isPartOf: 0.55,
1701
+ mainEntity: 0.6,
1702
+ mainEntityOfPage: 0.55,
1703
+ // Navigation
1704
+ breadcrumb: 0.65,
1705
+ itemListElement: 0.6,
1706
+ position: 0.5,
1707
+ // Medium importance
1708
+ keywords: 0.5,
1709
+ inLanguage: 0.5,
1710
+ wordCount: 0.45,
1711
+ commentCount: 0.4,
1712
+ // Low importance (decorative/verbose)
1713
+ alternateName: 0.3,
1714
+ disambiguatingDescription: 0.25,
1715
+ identifier: 0.2,
1716
+ potentialAction: 0.15,
1717
+ // Very low importance
1718
+ interactionStatistic: 0.1,
1719
+ subjectOf: 0.1
1720
+ };
1721
+ function scoreFields(schema, tokensByField, weights = DEFAULT_SEMANTIC_WEIGHTS) {
1722
+ const scores = {};
1723
+ function traverse(obj, path2 = "") {
1724
+ if (obj === null || obj === void 0 || typeof obj !== "object") {
1725
+ return;
1726
+ }
1727
+ if (Array.isArray(obj)) {
1728
+ obj.forEach((item, index) => {
1729
+ traverse(item, path2 ? `${path2}[${index}]` : `[${index}]`);
1730
+ });
1731
+ return;
1732
+ }
1733
+ for (const [key, value] of Object.entries(obj)) {
1734
+ const fieldPath = path2 ? `${path2}.${key}` : key;
1735
+ const tokenCost = tokensByField[fieldPath] || 0;
1736
+ const semanticValue = calculateSemanticValue(key, value);
1737
+ const efficiency = tokenCost > 0 ? semanticValue / tokenCost : 0;
1738
+ const { recommendation, reason } = determineRecommendation(
1739
+ semanticValue,
1740
+ efficiency,
1741
+ tokenCost
1742
+ );
1743
+ scores[fieldPath] = {
1744
+ semanticValue,
1745
+ tokenCost,
1746
+ efficiency,
1747
+ recommendation,
1748
+ ...reason && { reason }
1749
+ };
1750
+ if (typeof value === "object" && value !== null) {
1751
+ traverse(value, fieldPath);
1752
+ }
1753
+ }
1754
+ }
1755
+ traverse(schema);
1756
+ return scores;
1757
+ }
1758
+ function calculateSemanticValue(fieldName, value, _weights) {
1759
+ const baseImportance = FIELD_IMPORTANCE[fieldName] || 0.5;
1760
+ let adjustedValue = baseImportance;
1761
+ if (typeof value === "string" && value.length > 500) {
1762
+ adjustedValue *= 0.7;
1763
+ }
1764
+ if (Array.isArray(value) && value.length > 10) {
1765
+ adjustedValue *= 0.8;
1766
+ }
1767
+ if (typeof value === "string" && value.length > 10 && value.length < 200) {
1768
+ adjustedValue *= 1.1;
1769
+ }
1770
+ return Math.min(adjustedValue, 1);
1771
+ }
1772
+ function determineRecommendation(semanticValue, efficiency, tokenCost) {
1773
+ if (semanticValue >= 0.9) {
1774
+ return { recommendation: "keep", reason: "Critical for schema validity" };
1775
+ }
1776
+ if (semanticValue < 0.3) {
1777
+ return { recommendation: "remove", reason: "Low semantic value" };
1778
+ }
1779
+ if (efficiency < 0.01 && tokenCost > 50) {
1780
+ return { recommendation: "remove", reason: "Inefficient (high cost, low value)" };
1781
+ }
1782
+ if (efficiency > 0.05) {
1783
+ return { recommendation: "keep", reason: "High efficiency" };
1784
+ }
1785
+ return { recommendation: "optional", reason: "Medium value/cost ratio" };
1786
+ }
1787
+
1788
+ // src/auditor/optimizer-suggestions.ts
1789
+ async function generateOptimizationSuggestions(scanResult, targetModel = "gpt-4") {
1790
+ const suggestions = [];
1791
+ for (const file of scanResult.filesWithSchemas) {
1792
+ for (const schema of file.schemas) {
1793
+ if (!schema.schema) continue;
1794
+ const schemaSuggestions = await analyzeSchemaOptimization(
1795
+ file.filePath,
1796
+ schema.type,
1797
+ schema.schema,
1798
+ targetModel
1799
+ );
1800
+ suggestions.push(...schemaSuggestions);
1801
+ }
1802
+ }
1803
+ suggestions.sort((a, b) => {
1804
+ const reductionA = a.impact.reduction || 0;
1805
+ const reductionB = b.impact.reduction || 0;
1806
+ return reductionB - reductionA;
1807
+ });
1808
+ return suggestions;
1809
+ }
1810
+ async function analyzeSchemaOptimization(filePath, schemaType, schemaData, targetModel) {
1811
+ const suggestions = [];
1812
+ try {
1813
+ const tokenAnalysis = analyzeTokenUsage(schemaData, targetModel);
1814
+ const totalTokens = tokenAnalysis.totalTokens;
1815
+ if (totalTokens > 800) {
1816
+ suggestions.push({
1817
+ filePath,
1818
+ schemaType,
1819
+ type: "token-reduction",
1820
+ message: `Schema is large (${totalTokens} tokens). Consider optimizing for LLM consumption.`,
1821
+ impact: {
1822
+ tokensBefore: totalTokens,
1823
+ tokensAfter: Math.round(totalTokens * 0.6),
1824
+ // Estimación conservadora
1825
+ reduction: 40
1826
+ }
1827
+ });
1828
+ }
1829
+ const fieldScores = scoreFields(schemaData, tokenAnalysis.byField);
1830
+ const lowValueFields = [];
1831
+ for (const [field, score] of Object.entries(fieldScores)) {
1832
+ if (score.semanticValue < 0.4 && score.tokenCost > 20) {
1833
+ lowValueFields.push(field);
1834
+ }
1835
+ }
1836
+ if (lowValueFields.length > 0) {
1837
+ const tokenSavings = lowValueFields.reduce(
1838
+ (sum, field) => {
1839
+ var _a;
1840
+ return sum + (((_a = fieldScores[field]) == null ? void 0 : _a.tokenCost) || 0);
1841
+ },
1842
+ 0
1843
+ );
1844
+ suggestions.push({
1845
+ filePath,
1846
+ schemaType,
1847
+ type: "field-removal",
1848
+ message: `${lowValueFields.length} field(s) have low semantic value but high token cost.`,
1849
+ impact: {
1850
+ tokensBefore: totalTokens,
1851
+ tokensAfter: totalTokens - tokenSavings,
1852
+ reduction: Math.round(tokenSavings / totalTokens * 100)
1853
+ },
1854
+ fields: lowValueFields
1855
+ });
1856
+ }
1857
+ const missingHighValueFields = detectMissingHighValueFields(schemaType, schemaData);
1858
+ if (missingHighValueFields.length > 0) {
1859
+ suggestions.push({
1860
+ filePath,
1861
+ schemaType,
1862
+ type: "field-addition",
1863
+ message: `Consider adding high-value fields: ${missingHighValueFields.join(", ")}`,
1864
+ impact: {
1865
+ tokensBefore: totalTokens,
1866
+ tokensAfter: totalTokens + 50
1867
+ // Estimación
1868
+ },
1869
+ fields: missingHighValueFields
1870
+ });
1871
+ }
1872
+ const redundancySuggestions = detectRedundancies(schemaData, filePath, schemaType);
1873
+ suggestions.push(...redundancySuggestions);
1874
+ } catch (error) {
1875
+ console.warn(`Failed to analyze schema in ${filePath}:`, error);
1876
+ }
1877
+ return suggestions;
1878
+ }
1879
+ function detectMissingHighValueFields(schemaType, data) {
1880
+ const missing = [];
1881
+ const highValueFields = {
1882
+ Article: ["image", "dateModified", "publisher", "description", "wordCount"],
1883
+ Product: ["image", "brand", "aggregateRating", "review"],
1884
+ Organization: ["url", "logo", "sameAs", "description"],
1885
+ Person: ["image", "url", "sameAs", "description"],
1886
+ LocalBusiness: ["telephone", "address", "geo", "openingHours"],
1887
+ Event: ["image", "organizer", "eventStatus", "eventAttendanceMode"]
1888
+ };
1889
+ const recommendedFields = highValueFields[schemaType] || [];
1890
+ for (const field of recommendedFields) {
1891
+ if (!hasField2(data, field)) {
1892
+ missing.push(field);
1893
+ }
1894
+ }
1895
+ return missing;
1896
+ }
1897
+ function detectRedundancies(data, filePath, schemaType) {
1898
+ const suggestions = [];
1899
+ if (data.publisher) {
1900
+ if (data.publisher["@id"] && data.publisher.url) {
1901
+ if (data.publisher["@id"] === data.publisher.url) {
1902
+ suggestions.push({
1903
+ filePath,
1904
+ schemaType,
1905
+ type: "field-removal",
1906
+ message: "Redundant @id and url in publisher (same value)",
1907
+ impact: {
1908
+ reduction: 5
1909
+ },
1910
+ fields: ["publisher.@id"]
1911
+ });
1912
+ }
1913
+ }
1914
+ }
1915
+ const maxDepth = getMaxDepth(data);
1916
+ if (maxDepth > 4) {
1917
+ suggestions.push({
1918
+ filePath,
1919
+ schemaType,
1920
+ type: "field-update",
1921
+ message: `Schema has deep nesting (${maxDepth} levels). Consider flattening.`,
1922
+ impact: {
1923
+ reduction: 10
1924
+ }
1925
+ });
1926
+ }
1927
+ const largeArrays = findLargeArrays(data);
1928
+ if (largeArrays.length > 0) {
1929
+ suggestions.push({
1930
+ filePath,
1931
+ schemaType,
1932
+ type: "field-update",
1933
+ message: `Large arrays found: ${largeArrays.join(", ")}. Consider limiting.`,
1934
+ impact: {
1935
+ reduction: 15
1936
+ },
1937
+ fields: largeArrays
1938
+ });
1939
+ }
1940
+ return suggestions;
1941
+ }
1942
+ function hasField2(obj, fieldPath) {
1943
+ const parts = fieldPath.split(".");
1944
+ let current = obj;
1945
+ for (const part of parts) {
1946
+ if (!current || typeof current !== "object" || !(part in current)) {
1947
+ return false;
1948
+ }
1949
+ current = current[part];
1950
+ }
1951
+ return current !== void 0 && current !== null;
1952
+ }
1953
+ function getMaxDepth(obj, currentDepth = 0) {
1954
+ if (typeof obj !== "object" || obj === null) {
1955
+ return currentDepth;
1956
+ }
1957
+ let maxDepth = currentDepth;
1958
+ for (const value of Object.values(obj)) {
1959
+ if (typeof value === "object" && value !== null && !Array.isArray(value)) {
1960
+ const depth = getMaxDepth(value, currentDepth + 1);
1961
+ maxDepth = Math.max(maxDepth, depth);
1962
+ }
1963
+ }
1964
+ return maxDepth;
1965
+ }
1966
+ function findLargeArrays(obj, path2 = "") {
1967
+ const largeArrays = [];
1968
+ if (Array.isArray(obj) && obj.length > 10) {
1969
+ largeArrays.push(path2 || "root");
1970
+ }
1971
+ if (typeof obj === "object" && obj !== null) {
1972
+ for (const [key, value] of Object.entries(obj)) {
1973
+ const newPath = path2 ? `${path2}.${key}` : key;
1974
+ largeArrays.push(...findLargeArrays(value, newPath));
1975
+ }
1976
+ }
1977
+ return largeArrays;
1978
+ }
1979
+ function getOptimizationSummary(suggestions) {
1980
+ const totalReduction = suggestions.reduce(
1981
+ (sum, s) => sum + (s.impact.reduction || 0),
1982
+ 0
1983
+ );
1984
+ const byType = {};
1985
+ for (const suggestion of suggestions) {
1986
+ byType[suggestion.type] = (byType[suggestion.type] || 0) + 1;
1987
+ }
1988
+ const topOpportunities = suggestions.slice(0, 5).sort((a, b) => (b.impact.reduction || 0) - (a.impact.reduction || 0));
1989
+ return {
1990
+ totalReduction: Math.round(totalReduction / Math.max(1, suggestions.length)),
1991
+ byType,
1992
+ topOpportunities
1993
+ };
1994
+ }
1995
+
1996
+ // src/auditor/index.ts
1997
+ async function auditProject(options = {}) {
1998
+ console.log("\u{1F50D} Scanning project...");
1999
+ const scanResult = await scanProject({
2000
+ ...options.rootDir && { rootDir: options.rootDir },
2001
+ ...options.include && { include: options.include },
2002
+ ...options.exclude && { exclude: options.exclude },
2003
+ ...options.maxDepth && { maxDepth: options.maxDepth },
2004
+ ...options.deepAnalysis !== void 0 && { deepAnalysis: options.deepAnalysis }
2005
+ });
2006
+ console.log(
2007
+ `\u2705 Found ${scanResult.filesWithSchemas.length} files with schemas`
2008
+ );
2009
+ console.log("\u{1F52C} Validating schemas...");
2010
+ const validationResult = await validateSchemas(scanResult, {
2011
+ ...options.strict !== void 0 && { strict: options.strict },
2012
+ ...options.schemaTypes && { schemaTypes: options.schemaTypes },
2013
+ ...options.ignore && { ignore: options.ignore }
2014
+ });
2015
+ if (validationResult.errors > 0) {
2016
+ console.log(`\u274C Found ${validationResult.errors} errors`);
2017
+ }
2018
+ if (validationResult.warnings > 0) {
2019
+ console.log(`\u26A0\uFE0F Found ${validationResult.warnings} warnings`);
2020
+ }
2021
+ let optimizations = [];
2022
+ if (options.includeOptimizations !== false) {
2023
+ console.log("\u{1F4A1} Analyzing optimizations...");
2024
+ optimizations = await generateOptimizationSuggestions(
2025
+ scanResult,
2026
+ options.targetModel || "gpt-4"
2027
+ );
2028
+ if (optimizations.length > 0) {
2029
+ console.log(`\u{1F4A1} Found ${optimizations.length} optimization opportunities`);
2030
+ }
2031
+ }
2032
+ console.log("\u{1F4CA} Generating report...");
2033
+ const report = generateCoverageReport(
2034
+ scanResult,
2035
+ validationResult,
2036
+ optimizations
2037
+ );
2038
+ if (options.output) {
2039
+ await saveReport(report, {
2040
+ ...options.format && { format: options.format },
2041
+ output: options.output
2042
+ });
2043
+ console.log(`\u2705 Report saved to: ${options.output}`);
2044
+ }
2045
+ return report;
2046
+ }
2047
+ async function quickValidate(options = {}) {
2048
+ const scanResult = await scanProject({
2049
+ ...options.rootDir && { rootDir: options.rootDir },
2050
+ ...options.include && { include: options.include },
2051
+ ...options.exclude && { exclude: options.exclude }
2052
+ });
2053
+ const validationResult = await validateSchemas(scanResult, {
2054
+ ...options.strict !== void 0 && { strict: options.strict },
2055
+ ...options.schemaTypes && { schemaTypes: options.schemaTypes },
2056
+ ...options.ignore && { ignore: options.ignore }
2057
+ });
2058
+ return validationResult.valid;
2059
+ }
2060
+ async function generateReportOnly(scanResult, validationResult, options = {}) {
2061
+ const report = generateCoverageReport(scanResult, validationResult);
2062
+ await saveReport(report, options);
2063
+ }
2064
+
2065
+ export { auditProject, generateCoverageReport, generateOptimizationSuggestions, generateReportOnly, getOptimizationSummary, getProjectStats, quickValidate, saveReport, scanProject, validateSchemas, validateSingleSchema };
2066
+ //# sourceMappingURL=auditor.js.map
2067
+ //# sourceMappingURL=auditor.js.map