@cloudcreate/adsense-check 1.3.1 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -36,15 +36,13 @@ var BrowserManager = class {
36
36
  async function fetchPage(page, url, timeout = 3e4) {
37
37
  const response = await page.goto(url, { waitUntil: "domcontentloaded", timeout });
38
38
  const status = response?.status() ?? 0;
39
- let text = await page.evaluate(() => document.body?.innerText ?? "");
40
- if (text.replace(/\s+/g, "").length < 100) {
41
- try {
42
- await page.waitForLoadState("networkidle", { timeout: 1e4 });
43
- } catch {
44
- }
45
- await page.waitForTimeout(2e3);
46
- text = await page.evaluate(() => document.body?.innerText ?? "");
39
+ try {
40
+ await page.waitForLoadState("networkidle", { timeout: 1e4 });
41
+ } catch {
47
42
  }
43
+ await page.waitForTimeout(1500);
44
+ const urlAfterRender = page.url();
45
+ let text = await page.evaluate(() => document.body?.innerText ?? "");
48
46
  const content = await page.content();
49
47
  const links = await page.evaluate(
50
48
  () => Array.from(document.querySelectorAll("a[href]")).map((a) => a.href).filter((href) => href.startsWith("http"))
@@ -52,7 +50,7 @@ async function fetchPage(page, url, timeout = 3e4) {
52
50
  const linkDetails = await page.evaluate(
53
51
  () => Array.from(document.querySelectorAll("a[href]")).filter((a) => a.href.startsWith("http")).map((a) => ({
54
52
  href: a.href,
55
- text: a.innerText.trim()
53
+ text: a.innerText?.trim() ?? ""
56
54
  }))
57
55
  );
58
56
  const navText = await page.evaluate(() => {
@@ -89,10 +87,11 @@ async function fetchPage(page, url, timeout = 3e4) {
89
87
  canvasCount: document.querySelectorAll("canvas").length,
90
88
  articleCount: document.querySelectorAll("article").length,
91
89
  textLength: (document.body?.innerText ?? "").replace(/\s+/g, "").length,
92
- gameLinks
90
+ gameLinks,
91
+ videoElementCount: document.querySelectorAll("video").length
93
92
  };
94
93
  });
95
- return { status, content, text, links, linkDetails, navText, footerText, title, url, signals };
94
+ return { status, content, text, links, linkDetails, navText, footerText, title, url: urlAfterRender, signals };
96
95
  }
97
96
  async function checkRobotsTxt(origin) {
98
97
  try {
@@ -180,6 +179,10 @@ var en = {
180
179
  "item.content.game_desc": "Game Descriptions",
181
180
  "item.content.iframe_quality": "Iframe Quality",
182
181
  "item.content.game_variety": "Game Variety",
182
+ "item.content.video_desc": "Video Descriptions",
183
+ "item.content.video_variety": "Video Variety",
184
+ "item.content.reference_entry": "Reference Entries",
185
+ "item.content.reference_variety": "Reference Variety",
183
186
  // Structure items
184
187
  "item.structure.internal": "Internal Links",
185
188
  "item.structure.deadlinks": "Dead Links",
@@ -230,9 +233,21 @@ var en = {
230
233
  "content.iframe_quality.warn": "{count} game iframes detected \u2014 ensure each has proper title and size attributes",
231
234
  "content.game_variety.pass": "Game pages show good variety",
232
235
  "content.game_variety.warn": "Game pages are {pct}% similar \u2014 may look like mass-produced content",
236
+ // Video-specific messages
237
+ "content.video_desc.pass": "{total} video page(s) have sufficient description text",
238
+ "content.video_desc.warn": "{thin}/{total} video pages lack description text (recommend 50+ chars)",
239
+ "content.video_variety.pass": "Video pages show good variety (similarity {pct}%)",
240
+ "content.video_variety.warn": "Video pages are {pct}% similar \u2014 may look like mass-produced content",
241
+ // Reference-specific messages
242
+ "content.reference_entry.pass": "Reference entries have sufficient structure and metadata",
243
+ "content.reference_entry.warn": "{thin}/{total} reference entries lack structure (recommend 100+ chars)",
244
+ "content.reference_variety.pass": "Reference pages show good variety (similarity {pct}%)",
245
+ "content.reference_variety.warn": "Reference pages are {pct}% similar \u2014 expected for structured entries",
233
246
  // Site type detection
234
247
  "detector.type.content": "Content Site",
235
248
  "detector.type.game": "Game Site",
249
+ "detector.type.video": "Video Site",
250
+ "detector.type.reference": "Reference Site",
236
251
  "detector.signals": "Signals: {details}",
237
252
  // Required pages messages
238
253
  "pages.found": "Found {name} page ({path})",
@@ -266,6 +281,7 @@ var en = {
266
281
  // Policy messages
267
282
  "policy.keywords.pass": "No policy-violating keywords found",
268
283
  "policy.keywords.fail": "{count} potentially violating keyword(s) found",
284
+ "policy.keywords.warn": "{count} potentially violating keyword(s) found (on pages with substantial content \u2014 verify with AI analysis)",
269
285
  // AI messages
270
286
  "ai.skip": "AI_API_KEY not configured, skipping AI analysis",
271
287
  "ai.fail": "AI analysis failed: {error}",
@@ -288,7 +304,7 @@ var en = {
288
304
  "report.content_label": "Content",
289
305
  // Two-group scoring
290
306
  "report.hard_requirements": "Hard Requirements",
291
- "report.soft_scoring": "Soft Scoring",
307
+ "report.soft_scoring": "Smart Scoring",
292
308
  "report.hard.ready": "READY \u2014 all requirements met",
293
309
  "report.hard.warn": "NEEDS FIXES \u2014 {count} warning(s) to address",
294
310
  "report.hard.fail": "NOT READY \u2014 {count} failure(s) must be fixed",
@@ -301,7 +317,7 @@ var en = {
301
317
  "group.policy": "Policy Compliance",
302
318
  "group.site_scale": "Site Scale",
303
319
  "group.content_quality": "Content Quality",
304
- "group.ai_value": "AI Value Analysis",
320
+ "group.ai_value": "Value Analysis",
305
321
  "group.ai_analysis": "AI Content Analysis",
306
322
  "group.page_quality": "Page Quality",
307
323
  "group.user_experience": "User Experience",
@@ -312,7 +328,69 @@ var en = {
312
328
  "detector.type.unsupported": "Unsupported Type",
313
329
  "topic.info": "Site topic: {topic}",
314
330
  "topic.description": "{description}",
315
- "topic.unsupported_warning": "This site type ({type}) is not supported by AdSense checklist"
331
+ "topic.unsupported_warning": "This site type ({type}) is not supported by AdSense checklist",
332
+ // Reporter UI
333
+ "reporter.site_type": "Site type",
334
+ "reporter.topic": "Topic",
335
+ "reporter.pages_label": "Pages",
336
+ "reporter.confidence": "{confidence} confidence",
337
+ "reporter.ai_value_label": "Value Score",
338
+ "reporter.ai_value_note": "geometric mean \xD7 page-type weights",
339
+ "reporter.ai_dimensions": "AI Dimensions",
340
+ "reporter.avg_per_10": "avg /10",
341
+ "reporter.dim_value": "Value",
342
+ "reporter.dim_originality": "Originality",
343
+ "reporter.dim_relevance": "Relevance",
344
+ "reporter.dim_compliance": "Compliance",
345
+ "reporter.formula_label": "Hard {hardPct}% \xD7 0.4 + Soft {softPct}% \xD7 0.6 - Penalty {penalty} = {total}",
346
+ "reporter.mechanical_label": "Base Score",
347
+ "reporter.advanced_label": "Advanced Score",
348
+ // Markdown report
349
+ "md.report_title": "AdSense Review Report",
350
+ "md.table.project": "Item",
351
+ "md.table.value": "Value",
352
+ "md.table.url": "URL",
353
+ "md.table.time": "Time",
354
+ "md.table.site_type": "Site Type",
355
+ "md.table.topic": "Topic",
356
+ "md.table.description": "Description",
357
+ "md.table.sampling": "Sampling",
358
+ "md.table.total": "total",
359
+ "md.table.recent": "recent (6mo)",
360
+ "md.table.sampled": "sampled",
361
+ "md.table.confidence": "confidence",
362
+ "md.composite_score_title": "Composite Score",
363
+ "md.hard_requirements": "Hard Requirements",
364
+ "md.soft_scoring": "Smart Scoring",
365
+ "md.ai_value_title": "Value Analysis",
366
+ "md.table.dimension": "Dimension",
367
+ "md.table.avg_score": "Avg Score",
368
+ "md.dim_value": "Value",
369
+ "md.dim_originality": "Originality",
370
+ "md.dim_relevance": "Relevance",
371
+ "md.dim_compliance": "Compliance",
372
+ "md.site_ai_score": "Site AI Score",
373
+ "md.geometric_weighted": "geometric mean \xD7 page-type weights",
374
+ "md.page_details": "Page Details",
375
+ "md.pages_count": "{count} pages",
376
+ "md.table.status": "Status",
377
+ "md.table.type": "Type",
378
+ "md.table.path": "Path",
379
+ "md.table.score": "Score",
380
+ "md.table.content_ratio": "Content Ratio",
381
+ "md.table.ai_composite": "Advanced",
382
+ "md.table.title": "Title",
383
+ "md.problem_pages": "Problem Page Details",
384
+ "md.ai_status": "AI Status",
385
+ "md.four_dimensions": "Four-dimension scores",
386
+ "md.ai_composite_score": "AI composite score",
387
+ "md.geometric_mean": "geometric mean",
388
+ "md.assessment": "Assessment",
389
+ "md.suggestions": "Suggestions",
390
+ "md.summary.not_ready": "**\u274C NOT READY** \u2014 {count} item(s) must be fixed",
391
+ "md.summary.needs_fixes": "**\u26A0\uFE0F NEEDS FIXES** \u2014 {count} warning(s) to address",
392
+ "md.summary.mostly_ready": "**\u26A0\uFE0F MOSTLY READY** \u2014 fix {count} warning(s) before submitting",
393
+ "md.summary.ready": "**\u2705 READY** \u2014 can submit for AdSense review"
316
394
  };
317
395
  var zh = {
318
396
  // 分类
@@ -334,6 +412,10 @@ var zh = {
334
412
  "item.content.game_desc": "\u6E38\u620F\u63CF\u8FF0",
335
413
  "item.content.iframe_quality": "Iframe \u8D28\u91CF",
336
414
  "item.content.game_variety": "\u6E38\u620F\u591A\u6837\u6027",
415
+ "item.content.video_desc": "\u89C6\u9891\u63CF\u8FF0",
416
+ "item.content.video_variety": "\u89C6\u9891\u591A\u6837\u6027",
417
+ "item.content.reference_entry": "\u53C2\u8003\u6761\u76EE\u5B8C\u6574\u6027",
418
+ "item.content.reference_variety": "\u53C2\u8003\u591A\u6837\u6027",
337
419
  // 结构检查项
338
420
  "item.structure.internal": "\u5185\u90E8\u94FE\u63A5",
339
421
  "item.structure.deadlinks": "\u6B7B\u94FE\u68C0\u6D4B",
@@ -384,9 +466,21 @@ var zh = {
384
466
  "content.iframe_quality.warn": "\u68C0\u6D4B\u5230 {count} \u4E2A\u6E38\u620F iframe \u2014 \u786E\u4FDD\u6BCF\u4E2A\u90FD\u6709 title \u548C\u5408\u7406\u5C3A\u5BF8",
385
467
  "content.game_variety.pass": "\u6E38\u620F\u9875\u9762\u591A\u6837\u6027\u6B63\u5E38",
386
468
  "content.game_variety.warn": "\u6E38\u620F\u9875\u9762\u76F8\u4F3C\u5EA6 {pct}% \u2014 \u53EF\u80FD\u662F\u6A21\u677F\u6279\u91CF\u751F\u6210",
469
+ // 视频站专用消息
470
+ "content.video_desc.pass": "{total} \u4E2A\u89C6\u9891\u9875\u9762\u6709\u8DB3\u591F\u7684\u63CF\u8FF0\u6587\u5B57",
471
+ "content.video_desc.warn": "{thin}/{total} \u4E2A\u89C6\u9891\u9875\u9762\u7F3A\u5C11\u63CF\u8FF0\u6587\u5B57\uFF08\u5EFA\u8BAE 50+ \u5B57\uFF09",
472
+ "content.video_variety.pass": "\u89C6\u9891\u9875\u9762\u591A\u6837\u6027\u6B63\u5E38 (\u76F8\u4F3C\u5EA6 {pct}%)",
473
+ "content.video_variety.warn": "\u89C6\u9891\u9875\u9762\u76F8\u4F3C\u5EA6 {pct}% \u2014 \u53EF\u80FD\u662F\u6A21\u677F\u6279\u91CF\u751F\u6210",
474
+ // 参考站专用消息
475
+ "content.reference_entry.pass": "\u53C2\u8003\u6761\u76EE\u5177\u6709\u8DB3\u591F\u7684\u7ED3\u6784\u548C\u5143\u6570\u636E",
476
+ "content.reference_entry.warn": "{thin}/{total} \u4E2A\u53C2\u8003\u6761\u76EE\u7ED3\u6784\u4E0D\u8DB3\uFF08\u5EFA\u8BAE 100+ \u5B57\uFF09",
477
+ "content.reference_variety.pass": "\u53C2\u8003\u9875\u9762\u591A\u6837\u6027\u6B63\u5E38 (\u76F8\u4F3C\u5EA6 {pct}%)",
478
+ "content.reference_variety.warn": "\u53C2\u8003\u9875\u9762\u76F8\u4F3C\u5EA6 {pct}% \u2014 \u7ED3\u6784\u5316\u6761\u76EE\u5C5E\u4E8E\u6B63\u5E38",
387
479
  // 站点类型检测
388
480
  "detector.type.content": "\u5185\u5BB9\u7AD9",
389
481
  "detector.type.game": "\u6E38\u620F\u7AD9",
482
+ "detector.type.video": "\u89C6\u9891\u7AD9",
483
+ "detector.type.reference": "\u53C2\u8003\u7AD9",
390
484
  "detector.signals": "\u68C0\u6D4B\u4FE1\u53F7: {details}",
391
485
  // 必要页面消息
392
486
  "pages.found": "\u627E\u5230 {name} \u9875\u9762 ({path})",
@@ -420,6 +514,7 @@ var zh = {
420
514
  // 合规消息
421
515
  "policy.keywords.pass": "\u672A\u68C0\u6D4B\u5230\u660E\u663E\u7684\u8FDD\u89C4\u5173\u952E\u8BCD",
422
516
  "policy.keywords.fail": "\u68C0\u6D4B\u5230 {count} \u4E2A\u53EF\u7591\u5173\u952E\u8BCD",
517
+ "policy.keywords.warn": "\u68C0\u6D4B\u5230 {count} \u4E2A\u53EF\u7591\u5173\u952E\u8BCD\uFF08\u4F4D\u4E8E\u5185\u5BB9\u5145\u5B9E\u9875\u9762\uFF0C\u8BF7\u7ED3\u5408 AI \u5206\u6790\u786E\u8BA4\uFF09",
423
518
  // AI 消息
424
519
  "ai.skip": "\u672A\u914D\u7F6E AI_API_KEY\uFF0C\u8DF3\u8FC7 AI \u5206\u6790",
425
520
  "ai.fail": "AI \u5206\u6790\u5931\u8D25: {error}",
@@ -442,7 +537,7 @@ var zh = {
442
537
  "report.content_label": "\u6B63\u6587",
443
538
  // 两组评分
444
539
  "report.hard_requirements": "\u786C\u6027\u8981\u6C42",
445
- "report.soft_scoring": "\u67D4\u6027\u8BC4\u5206",
540
+ "report.soft_scoring": "\u667A\u80FD\u8BC4\u5206",
446
541
  "report.hard.ready": "READY \u2014 \u6240\u6709\u5FC5\u8981\u9879\u8FBE\u6807",
447
542
  "report.hard.warn": "NEEDS FIXES \u2014 {count} \u9879\u8B66\u544A\u5F85\u4FEE\u590D",
448
543
  "report.hard.fail": "NOT READY \u2014 {count} \u9879\u5931\u8D25\u5FC5\u987B\u4FEE\u590D",
@@ -455,7 +550,7 @@ var zh = {
455
550
  "group.policy": "\u653F\u7B56\u5408\u89C4",
456
551
  "group.site_scale": "\u7AD9\u70B9\u89C4\u6A21",
457
552
  "group.content_quality": "\u5185\u5BB9\u8D28\u91CF",
458
- "group.ai_value": "AI \u4EF7\u503C\u5206\u6790",
553
+ "group.ai_value": "\u4EF7\u503C\u5206\u6790",
459
554
  "group.ai_analysis": "AI \u5185\u5BB9\u5206\u6790",
460
555
  "group.page_quality": "\u9875\u9762\u8D28\u91CF",
461
556
  "group.user_experience": "\u7528\u6237\u4F53\u9A8C",
@@ -466,7 +561,70 @@ var zh = {
466
561
  "detector.type.unsupported": "\u4E0D\u652F\u6301\u7684\u7C7B\u578B",
467
562
  "topic.info": "\u7AD9\u70B9\u4E3B\u9898: {topic}",
468
563
  "topic.description": "{description}",
469
- "topic.unsupported_warning": "\u8BE5\u7AD9\u70B9\u7C7B\u578B\uFF08{type}\uFF09\u4E0D\u5728 AdSense \u68C0\u67E5\u652F\u6301\u8303\u56F4\u5185"
564
+ "topic.unsupported_warning": "\u8BE5\u7AD9\u70B9\u7C7B\u578B\uFF08{type}\uFF09\u4E0D\u5728 AdSense \u68C0\u67E5\u652F\u6301\u8303\u56F4\u5185",
565
+ // 报告 UI(终端)
566
+ "reporter.site_type": "\u7AD9\u70B9\u7C7B\u578B",
567
+ "reporter.topic": "\u4E3B\u9898",
568
+ "reporter.pages_label": "\u9875\u9762",
569
+ "reporter.confidence": "\u7F6E\u4FE1\u5EA6: {confidence}",
570
+ "reporter.ai_value_label": "\u4EF7\u503C\u8BC4\u5206",
571
+ "reporter.ai_value_note": "\u51E0\u4F55\u5747\u503C \xD7 \u9875\u9762\u7C7B\u578B\u52A0\u6743",
572
+ "reporter.ai_dimensions": "AI \u7EF4\u5EA6",
573
+ "reporter.avg_per_10": "\u5747\u5206 /10",
574
+ "reporter.formula_label": "\u786C\u6027 {hardPct}% \xD7 0.4 + \u667A\u80FD {softPct}% \xD7 0.6 - \u6263\u5206 {penalty} = {total}",
575
+ "reporter.mechanical_label": "\u57FA\u7840\u8BC4\u5206",
576
+ "reporter.advanced_label": "\u9AD8\u7EA7\u8BC4\u5206",
577
+ // 维度名称(终端和 Markdown)
578
+ "reporter.dim_value": "\u4EF7\u503C",
579
+ "reporter.dim_originality": "\u539F\u521B",
580
+ "reporter.dim_relevance": "\u76F8\u5173",
581
+ "reporter.dim_compliance": "\u5408\u89C4",
582
+ // Markdown 报告
583
+ "md.report_title": "AdSense \u5BA1\u6838\u62A5\u544A",
584
+ "md.table.project": "\u9879\u76EE",
585
+ "md.table.value": "\u503C",
586
+ "md.table.url": "URL",
587
+ "md.table.time": "\u65F6\u95F4",
588
+ "md.table.site_type": "\u7AD9\u70B9\u7C7B\u578B",
589
+ "md.table.topic": "\u4E3B\u9898",
590
+ "md.table.description": "\u63CF\u8FF0",
591
+ "md.table.sampling": "\u62BD\u6837",
592
+ "md.table.total": "\u603B\u8BA1",
593
+ "md.table.recent": "\u8FD1 6 \u4E2A\u6708",
594
+ "md.table.sampled": "\u5DF2\u62BD\u6837",
595
+ "md.table.confidence": "\u7F6E\u4FE1\u5EA6",
596
+ "md.composite_score_title": "\u7EFC\u5408\u8BC4\u5206",
597
+ "md.hard_requirements": "\u786C\u6027\u8981\u6C42",
598
+ "md.soft_scoring": "\u667A\u80FD\u8BC4\u5206",
599
+ "md.ai_value_title": "\u4EF7\u503C\u5206\u6790",
600
+ "md.table.dimension": "\u7EF4\u5EA6",
601
+ "md.table.avg_score": "\u5747\u5206",
602
+ "md.dim_value": "\u4EF7\u503C",
603
+ "md.dim_originality": "\u539F\u521B",
604
+ "md.dim_relevance": "\u76F8\u5173",
605
+ "md.dim_compliance": "\u5408\u89C4",
606
+ "md.site_ai_score": "\u7AD9\u70B9 AI \u8BC4\u5206",
607
+ "md.geometric_weighted": "\u51E0\u4F55\u5747\u503C \xD7 \u9875\u9762\u7C7B\u578B\u52A0\u6743",
608
+ "md.page_details": "\u9010\u9875\u8BE6\u60C5",
609
+ "md.pages_count": "{count} \u4E2A\u9875\u9762",
610
+ "md.table.status": "\u72B6\u6001",
611
+ "md.table.type": "\u7C7B\u578B",
612
+ "md.table.path": "\u8DEF\u5F84",
613
+ "md.table.score": "\u8BC4\u5206",
614
+ "md.table.content_ratio": "\u6B63\u6587\u6BD4",
615
+ "md.table.ai_composite": "\u9AD8\u7EA7\u8BC4\u5206",
616
+ "md.table.title": "\u6807\u9898",
617
+ "md.problem_pages": "\u95EE\u9898\u9875\u9762\u8BE6\u60C5",
618
+ "md.ai_status": "AI \u72B6\u6001",
619
+ "md.four_dimensions": "\u56DB\u7EF4\u8BC4\u5206",
620
+ "md.ai_composite_score": "AI \u7EFC\u5408\u5206",
621
+ "md.geometric_mean": "\u51E0\u4F55\u5747\u503C",
622
+ "md.assessment": "\u8BC4\u4F30",
623
+ "md.suggestions": "\u6539\u8FDB\u5EFA\u8BAE",
624
+ "md.summary.not_ready": "**\u274C NOT READY** \u2014 {count} \u9879\u5931\u8D25\u9700\u8981\u4FEE\u590D",
625
+ "md.summary.needs_fixes": "**\u26A0\uFE0F NEEDS FIXES** \u2014 {count} \u9879\u8B66\u544A\u5F85\u4FEE\u590D",
626
+ "md.summary.mostly_ready": "**\u26A0\uFE0F MOSTLY READY** \u2014 \u4FEE\u590D {count} \u9879\u8B66\u544A\u540E\u53EF\u63D0\u4EA4\u5BA1\u6838",
627
+ "md.summary.ready": "**\u2705 READY** \u2014 \u53EF\u4EE5\u63D0\u4EA4 AdSense \u5BA1\u6838"
470
628
  };
471
629
  var langMap = { en, zh };
472
630
  function getSupportedLangs() {
@@ -552,6 +710,28 @@ Score each dimension from 0 to 10:
552
710
  Also set "relevanceLabel": "relevant" | "tangential" | "off-topic".
553
711
  4. compliance (0-10): Does the content comply with Google AdSense policies? 10 = fully compliant, 0 = serious violations.
554
712
  Flag: adult content, gambling, drugs, violence, copyright infringement, deceptive content.
713
+ Important context rules:
714
+ - Words like "crack", "bet", "drug", "gamble" used in educational, news, or informational contexts are NOT violations.
715
+ - If the page discusses or reports on sensitive topics (e.g., "puzzle crack" as a news headline, "betting odds" in sports analysis), this is NOT a violation.
716
+ - Only flag actual promotion or facilitation of policy-violating content.
717
+ - If the page appears to be a 404 error page or has minimal content, do not flag it as a compliance violation. Note it as "insufficient content for compliance review".
718
+
719
+ Also classify the page type based on its content and purpose. Choose ONE:
720
+ - "homepage": The site's main landing page
721
+ - "listing": An index/category page listing multiple items (articles, mods, products)
722
+ - "content": A standalone article, blog post, guide, or tutorial
723
+ - "game_detail": A game page with playable game or game download
724
+ - "video_detail": A page centered around a video or video embed
725
+ - "reference_detail": A wiki entry, glossary term, encyclopedia article, or database record
726
+ - "required": About, Privacy, Terms, Contact, Editorial Policy, Legal
727
+ - "utility": Search, Login, Signup, Download, 404, or functional tool pages
728
+
729
+ IMPORTANT \u2014 special handling for "required" and "utility" pages:
730
+ These pages are necessary for site operation. Do NOT penalize them for low value, originality, or relevance.
731
+ - For "required" pages (Privacy, Terms, About, Contact, Legal): set value=10, originality=10, relevance=10 automatically.
732
+ - Only score compliance normally. Check if the page has reasonable content (not empty or placeholder).
733
+ - For "utility" pages (Search, Login, 404): same rule \u2014 set value=10, originality=10, relevance=10, only evaluate compliance and basic completeness.
734
+ - For all other page types (homepage, listing, content, game_detail, video_detail, reference_detail): score all four dimensions normally.
555
735
 
556
736
  Page: ${page.url}
557
737
 
@@ -565,6 +745,7 @@ Reply in ${langName} with JSON:
565
745
  "relevance": <0-10>,
566
746
  "relevanceLabel": "relevant|tangential|off-topic",
567
747
  "compliance": <0-10>,
748
+ "pageType": "homepage|listing|content|game_detail|video_detail|reference_detail|required|utility",
568
749
  "assessment": "Brief assessment covering the key findings across all dimensions",
569
750
  "suggestions": ["Specific actionable suggestion to improve this page"]
570
751
  }`;
@@ -575,18 +756,29 @@ Reply in ${langName} with JSON:
575
756
  const originalityScore = clampScore(result.originality);
576
757
  const relevanceScore = clampScore(result.relevance);
577
758
  const complianceScore = clampScore(result.compliance);
578
- const geoMean = Math.pow(valueScore * originalityScore * relevanceScore * complianceScore, 0.25);
759
+ const validPageTypes = ["homepage", "listing", "content", "game_detail", "video_detail", "reference_detail", "required", "utility"];
760
+ const inferredPageType = validPageTypes.includes(result.pageType) ? result.pageType : void 0;
761
+ let finalValueScore = valueScore;
762
+ let finalOriginalityScore = originalityScore;
763
+ let finalRelevanceScore = relevanceScore;
764
+ if (inferredPageType === "required" || inferredPageType === "utility") {
765
+ finalValueScore = 10;
766
+ finalOriginalityScore = 10;
767
+ finalRelevanceScore = 10;
768
+ }
769
+ const geoMean = Math.pow(finalValueScore * finalOriginalityScore * finalRelevanceScore * complianceScore, 0.25);
579
770
  const status = geoMean >= 7 ? "pass" : geoMean >= 4 ? "warn" : "fail";
580
771
  return {
581
772
  url: page.url,
582
773
  status,
583
- relevance: result.relevanceLabel ?? (relevanceScore >= 7 ? "relevant" : relevanceScore >= 4 ? "tangential" : "off-topic"),
584
- valueScore,
585
- originalityScore,
586
- relevanceScore,
774
+ relevance: result.relevanceLabel ?? (finalRelevanceScore >= 7 ? "relevant" : finalRelevanceScore >= 4 ? "tangential" : "off-topic"),
775
+ valueScore: finalValueScore,
776
+ originalityScore: finalOriginalityScore,
777
+ relevanceScore: finalRelevanceScore,
587
778
  complianceScore,
588
779
  assessment: result.assessment ?? "",
589
- suggestions: result.suggestions ?? []
780
+ suggestions: result.suggestions ?? [],
781
+ inferredPageType
590
782
  };
591
783
  } catch (err) {
592
784
  return {
@@ -602,6 +794,62 @@ function clampScore(v) {
602
794
  if (isNaN(n)) return 5;
603
795
  return Math.max(0, Math.min(10, Math.round(n)));
604
796
  }
797
+ async function recheckCompliance(pages, langName, onProgress) {
798
+ const result = /* @__PURE__ */ new Map();
799
+ if (pages.length === 0) return result;
800
+ const progress = onProgress ?? (() => {
801
+ });
802
+ progress(`AI: re-checking ${pages.length} suspicious page(s) for compliance...`);
803
+ for (const page of pages) {
804
+ const content = page.text.slice(0, PAGE_CHARS);
805
+ const prompt = `You are a Google AdSense policy compliance expert. A previous analysis flagged this page as potentially non-compliant (score: ${page.firstComplianceScore}/10). Perform a careful second review.
806
+
807
+ Focus ONLY on compliance. Check for:
808
+ - Adult or sexually explicit content
809
+ - Gambling or casino promotion
810
+ - Illegal drugs or controlled substances
811
+ - Violence, gore, or hate speech
812
+ - Copyright infringement or pirated content
813
+ - Deceptive content, phishing, or scams
814
+ - Excessive profanity
815
+ - Misleading medical/financial claims
816
+ - Content that targets children inappropriately
817
+
818
+ Be fair \u2014 informational/educational content ABOUT sensitive topics (e.g., health articles, news reporting) is NOT a violation. Only flag actual policy violations.
819
+
820
+ Additional instructions:
821
+ - If the page text is very short (< 200 characters) and appears to be an error page, 404, or placeholder, do not flag any compliance violations. Score compliance as 10 and note "insufficient content".
822
+ - Context matters: words that match policy keywords but appear in news reporting, educational content, or informational discussion are NOT violations.
823
+
824
+ Page: ${page.url}
825
+
826
+ Content:
827
+ ${content}
828
+
829
+ Reply in ${langName} with JSON:
830
+ {
831
+ "compliance": <0-10>,
832
+ "verdict": "compliant|borderline|violation",
833
+ "assessment": "Brief explanation of your compliance determination"
834
+ }`;
835
+ try {
836
+ const text = await callAI(prompt, 1024);
837
+ const r = extractJson(text);
838
+ const newScore = clampScore(r.compliance);
839
+ const finalScore = Math.max(page.firstComplianceScore, newScore);
840
+ result.set(page.url, {
841
+ complianceScore: finalScore,
842
+ assessment: r.assessment ?? ""
843
+ });
844
+ } catch {
845
+ result.set(page.url, {
846
+ complianceScore: page.firstComplianceScore,
847
+ assessment: "Re-check failed, keeping original score"
848
+ });
849
+ }
850
+ }
851
+ return result;
852
+ }
605
853
  async function analyzeOverall(pageAnalyses, langName, date) {
606
854
  const summaries = pageAnalyses.map(
607
855
  (p, i) => `Page ${i + 1} (${p.url}): [${p.status}] value=${p.valueScore} originality=${p.originalityScore} relevance=${p.relevanceScore} compliance=${p.complianceScore} \u2014 ${p.assessment.slice(0, 150)}`
@@ -705,7 +953,7 @@ function extractJson2(text) {
705
953
  }
706
954
  throw new Error("No JSON found in response");
707
955
  }
708
- var VALID_TYPES = ["content", "tool", "game"];
956
+ var VALID_TYPES = ["content", "tool", "game", "video", "reference"];
709
957
  async function analyzeSiteTopic(homepage, lang = "en", apiKey) {
710
958
  const langName = lang === "zh" ? "\u4E2D\u6587" : "English";
711
959
  const content = homepage.text.slice(0, 2e3);
@@ -717,16 +965,18 @@ Homepage content (first 2000 chars):
717
965
  ${content}
718
966
 
719
967
  Classify this website into ONE of these types:
720
- - "content": informational site (news, blog, reference materials, educational content)
968
+ - "content": informational site (news, blog, educational articles, guides)
721
969
  - "tool": utility/tool site (calculator, converter, generator, online tool)
722
970
  - "game": online game site (playable games, game portal)
971
+ - "video": video site (video sharing, video blog, YouTube-style site with embedded videos)
972
+ - "reference": wiki/encyclopedia/reference site (structured knowledge base, searchable database, glossary, dictionary, encyclopedia-style content with interlinked articles, transcript archive)
723
973
  - "unsupported": e-commerce, SaaS product, social media, forum, portfolio, or anything not fitting above categories
724
974
 
725
975
  Reply language: ${langName}
726
976
 
727
977
  Reply in ${langName} with JSON:
728
978
  {
729
- "type": "content|tool|game|unsupported",
979
+ "type": "content|tool|game|video|reference|unsupported",
730
980
  "topic": "Main topic in 3-5 words (e.g. 'Excel translation reference')",
731
981
  "description": "One sentence describing what this site does",
732
982
  "confidence": "high|medium|low",
@@ -759,6 +1009,10 @@ var GAME_NAV_KEYWORDS = /\b(games?|play\b|arcade|puzzle|action)\b/i;
759
1009
  var GAME_NAV_KEYWORDS_ZH = /游戏|玩游戏/;
760
1010
  var TOOL_NAV_KEYWORDS = /\b(calculator|converter|generator|tool|translat|calculat|checker|analyzer|formatter|validator|encoder|decoder)\b/i;
761
1011
  var TOOL_NAV_KEYWORDS_ZH = /计算器|转换器|工具|翻译/;
1012
+ var REFERENCE_NAV_KEYWORDS = /\b(wiki|encyclopedia|reference|glossary|docs|documentation|knowledge\s*base|archive|database|transcript)\b/i;
1013
+ var REFERENCE_NAV_KEYWORDS_ZH = /百科|知识库|参考|词典|文档|数据库|档案/;
1014
+ var VIDEO_NAV_KEYWORDS = /\b(video|videos|watch|channel|channels|stream|vlog|clip|playlist|shorts|tv)\b/i;
1015
+ var VIDEO_NAV_KEYWORDS_ZH = /视频|频道|直播|短视频/;
762
1016
  var GAME_IFRAME_PATTERNS = [
763
1017
  /game/i,
764
1018
  /play/i,
@@ -773,9 +1027,27 @@ var GAME_IFRAME_PATTERNS = [
773
1027
  /friv/i,
774
1028
  /itch\.io/i,
775
1029
  /htmlgames/i,
776
- /gameflare/i,
777
- /embed/i
1030
+ /gameflare/i
1031
+ ];
1032
+ var VIDEO_IFRAME_PATTERNS = [
1033
+ /youtube\.com\/embed/i,
1034
+ /youtube-nocookie\.com/i,
1035
+ /youtu\.be/i,
1036
+ /player\.vimeo\.com/i,
1037
+ /player\.bilibili\.com/i,
1038
+ /dailymotion\.com\/embed/i,
1039
+ /embed\.twitch\.tv/i,
1040
+ /streamable\.com\/o/i,
1041
+ /wistia.*\.net\/medias/i,
1042
+ /vidyard\.com\/embed/i,
1043
+ /brightcove/i
778
1044
  ];
1045
+ function isVideoIframe(src) {
1046
+ return VIDEO_IFRAME_PATTERNS.some((p) => p.test(src));
1047
+ }
1048
+ function isGameIframe(src) {
1049
+ return GAME_IFRAME_PATTERNS.some((p) => p.test(src));
1050
+ }
779
1051
  function detectSiteType(pagesSignals, navText, manualType) {
780
1052
  if (manualType) {
781
1053
  return { type: manualType, confidence: "high", signals: { iframeRatio: 0, canvasRatio: 0, articleRatio: 0, navGameKeywords: false } };
@@ -788,28 +1060,55 @@ function detectSiteType(pagesSignals, navText, manualType) {
788
1060
  let pagesWithCanvas = 0;
789
1061
  let pagesWithArticle = 0;
790
1062
  let pagesWithGameIframe = 0;
1063
+ let pagesWithVideoIframe = 0;
1064
+ let pagesWithVideoElement = 0;
791
1065
  let firstPageIframes = 0;
792
1066
  let firstPageCanvas = 0;
1067
+ let firstPageVideoIframes = 0;
793
1068
  let totalGameLinks = 0;
794
1069
  for (let i = 0; i < pagesSignals.length; i++) {
795
1070
  const sig = pagesSignals[i];
796
1071
  if (sig.iframeCount > 0) pagesWithIframe++;
797
1072
  if (sig.canvasCount > 0) pagesWithCanvas++;
798
1073
  if (sig.articleCount > 0) pagesWithArticle++;
1074
+ if (sig.videoElementCount > 0) pagesWithVideoElement++;
799
1075
  totalGameLinks += sig.gameLinks || 0;
800
1076
  if (i === 0) {
801
1077
  firstPageIframes = sig.iframeCount;
802
1078
  firstPageCanvas = sig.canvasCount;
803
1079
  }
804
- const hasGameIframe = sig.iframeSrcs.some((src) => GAME_IFRAME_PATTERNS.some((p) => p.test(src)));
1080
+ const hasGameIframe = sig.iframeSrcs.some((s) => isGameIframe(s));
1081
+ const hasVideoIframe = sig.iframeSrcs.some((s) => isVideoIframe(s));
805
1082
  if (hasGameIframe) pagesWithGameIframe++;
1083
+ if (hasVideoIframe) {
1084
+ pagesWithVideoIframe++;
1085
+ if (i === 0) firstPageVideoIframes = sig.iframeSrcs.filter((s) => isVideoIframe(s)).length;
1086
+ }
806
1087
  }
807
1088
  const avgGameLinks = totalGameLinks / total;
808
1089
  const iframeRatio = pagesWithIframe / total;
809
1090
  const canvasRatio = pagesWithCanvas / total;
810
1091
  const articleRatio = pagesWithArticle / total;
811
1092
  const gameIframeRatio = pagesWithGameIframe / total;
1093
+ const videoIframeRatio = pagesWithVideoIframe / total;
1094
+ const videoElementRatio = pagesWithVideoElement / total;
812
1095
  const navGameKeywords = GAME_NAV_KEYWORDS.test(navText) || GAME_NAV_KEYWORDS_ZH.test(navText);
1096
+ const navVideoKeywords = VIDEO_NAV_KEYWORDS.test(navText) || VIDEO_NAV_KEYWORDS_ZH.test(navText);
1097
+ let videoScore = 0;
1098
+ if (videoIframeRatio >= 0.3) videoScore += 5;
1099
+ else if (videoIframeRatio >= 0.1) videoScore += 3;
1100
+ if (videoElementRatio >= 0.3) videoScore += 5;
1101
+ else if (videoElementRatio >= 0.1) videoScore += 3;
1102
+ if (navVideoKeywords) videoScore += 3;
1103
+ if (firstPageVideoIframes >= 3) videoScore += 3;
1104
+ else if (firstPageVideoIframes >= 1) videoScore += 1;
1105
+ if (videoScore >= 3) {
1106
+ return {
1107
+ type: "video",
1108
+ confidence: videoScore >= 6 ? "high" : "medium",
1109
+ signals: { iframeRatio, canvasRatio, articleRatio, navGameKeywords }
1110
+ };
1111
+ }
813
1112
  let gameScore = 0;
814
1113
  if (gameIframeRatio >= 0.3) gameScore += 5;
815
1114
  else if (gameIframeRatio >= 0.1) gameScore += 3;
@@ -823,23 +1122,31 @@ function detectSiteType(pagesSignals, navText, manualType) {
823
1122
  else if (avgGameLinks >= 2) gameScore += 2;
824
1123
  else if (totalGameLinks >= 3) gameScore += 1;
825
1124
  if (articleRatio >= 0.7 && gameScore < 3) gameScore -= 2;
826
- const isGame = gameScore >= 3;
827
- let type;
828
- let confidence;
829
- if (isGame) {
830
- type = "game";
831
- confidence = gameScore >= 6 ? "high" : "medium";
832
- } else {
833
- const navToolKeywords = TOOL_NAV_KEYWORDS.test(navText) || TOOL_NAV_KEYWORDS_ZH.test(navText);
834
- if (navToolKeywords) {
835
- type = "tool";
836
- confidence = "medium";
837
- } else {
838
- type = "content";
839
- confidence = "high";
840
- }
1125
+ if (gameScore >= 3) {
1126
+ return {
1127
+ type: "game",
1128
+ confidence: gameScore >= 6 ? "high" : "medium",
1129
+ signals: { iframeRatio, canvasRatio, articleRatio, navGameKeywords }
1130
+ };
1131
+ }
1132
+ const navToolKeywords = TOOL_NAV_KEYWORDS.test(navText) || TOOL_NAV_KEYWORDS_ZH.test(navText);
1133
+ if (navToolKeywords) {
1134
+ return { type: "tool", confidence: "medium", signals: { iframeRatio, canvasRatio, articleRatio, navGameKeywords } };
841
1135
  }
842
- return { type, confidence, signals: { iframeRatio, canvasRatio, articleRatio, navGameKeywords } };
1136
+ const navReferenceKeywords = REFERENCE_NAV_KEYWORDS.test(navText) || REFERENCE_NAV_KEYWORDS_ZH.test(navText);
1137
+ let referenceScore = 0;
1138
+ if (articleRatio >= 0.7) referenceScore += 3;
1139
+ else if (articleRatio >= 0.5) referenceScore += 1;
1140
+ if (navReferenceKeywords) referenceScore += 3;
1141
+ if (iframeRatio < 0.1) referenceScore += 1;
1142
+ if (referenceScore >= 3) {
1143
+ return {
1144
+ type: "reference",
1145
+ confidence: referenceScore >= 6 ? "high" : "medium",
1146
+ signals: { iframeRatio, canvasRatio, articleRatio, navGameKeywords }
1147
+ };
1148
+ }
1149
+ return { type: "content", confidence: "high", signals: { iframeRatio, canvasRatio, articleRatio, navGameKeywords } };
843
1150
  }
844
1151
 
845
1152
  // src/scorer.ts
@@ -870,11 +1177,29 @@ function scorePage(pageType, contentChars, contentRatio, issues, siteType, aiSta
870
1177
  checks.push({ label: "Content depth", status: contentChars >= 300 ? "pass" : contentChars >= 100 ? "warn" : "fail", weight: 3 });
871
1178
  }
872
1179
  checks.push({ label: "Content ratio", status: contentRatio >= 30 ? "pass" : contentRatio >= 15 ? "warn" : "fail", weight: 2 });
1180
+ } else if (pageType === "video_detail") {
1181
+ if (siteType === "video") {
1182
+ checks.push({ label: "Video description", status: contentChars >= 50 ? "pass" : "warn", weight: 3 });
1183
+ checks.push({ label: "Content ratio", status: contentRatio >= 15 ? "pass" : contentRatio >= 5 ? "warn" : "fail", weight: 2 });
1184
+ } else {
1185
+ checks.push({ label: "Content depth", status: contentChars >= 300 ? "pass" : contentChars >= 100 ? "warn" : "fail", weight: 3 });
1186
+ checks.push({ label: "Content ratio", status: contentRatio >= 30 ? "pass" : contentRatio >= 15 ? "warn" : "fail", weight: 2 });
1187
+ }
1188
+ } else if (pageType === "reference_detail") {
1189
+ if (siteType === "reference") {
1190
+ checks.push({ label: "Entry completeness", status: contentChars >= 100 ? "pass" : contentChars >= 50 ? "warn" : "fail", weight: 3 });
1191
+ checks.push({ label: "Content ratio", status: contentRatio >= 20 ? "pass" : contentRatio >= 5 ? "warn" : "fail", weight: 2 });
1192
+ } else {
1193
+ checks.push({ label: "Content depth", status: contentChars >= 300 ? "pass" : contentChars >= 100 ? "warn" : "fail", weight: 3 });
1194
+ checks.push({ label: "Content ratio", status: contentRatio >= 30 ? "pass" : contentRatio >= 15 ? "warn" : "fail", weight: 2 });
1195
+ }
1196
+ } else if (pageType === "reference_listing") {
1197
+ checks.push({ label: "Listing content", status: contentChars >= 200 ? "pass" : contentChars >= 50 ? "warn" : "fail", weight: 2 });
1198
+ } else if (pageType === "listing") {
1199
+ checks.push({ label: "Content", status: contentChars >= 200 ? "pass" : contentChars >= 50 ? "warn" : "fail", weight: 2 });
873
1200
  } else if (pageType === "required") {
874
1201
  checks.push({ label: "Exists", status: contentChars > 0 ? "pass" : "fail", weight: 3 });
875
1202
  checks.push({ label: "Content depth", status: contentChars >= 300 ? "pass" : contentChars >= 100 ? "warn" : "fail", weight: 2 });
876
- } else if (pageType === "listing") {
877
- checks.push({ label: "Content", status: contentChars >= 200 ? "pass" : contentChars >= 50 ? "warn" : "fail", weight: 2 });
878
1203
  } else if (pageType === "utility") {
879
1204
  checks.push({ label: "Functional", status: contentChars > 0 ? "pass" : "warn", weight: 1 });
880
1205
  } else {
@@ -889,8 +1214,11 @@ var AI_PAGE_TYPE_WEIGHTS = {
889
1214
  homepage: 1.5,
890
1215
  content: 1,
891
1216
  game_detail: 1,
1217
+ video_detail: 1,
1218
+ reference_detail: 1,
892
1219
  unknown: 0.5,
893
1220
  listing: 0.1,
1221
+ reference_listing: 0.1,
894
1222
  required: 0.2,
895
1223
  utility: 0.1
896
1224
  };
@@ -1174,12 +1502,90 @@ function checkGameSite(pages, pagesSignals, lang) {
1174
1502
  }
1175
1503
  return items;
1176
1504
  }
1505
+ function checkVideoSite(pages, pagesSignals, lang) {
1506
+ const items = [];
1507
+ const subpages = pages.slice(1);
1508
+ const subpageSignals = pagesSignals.slice(1);
1509
+ if (subpages.length > 0) {
1510
+ let thinDesc = 0;
1511
+ const thinPages = [];
1512
+ for (let i = 0; i < subpages.length; i++) {
1513
+ const sig = subpageSignals[i];
1514
+ if (sig && sig.textLength < 50) {
1515
+ thinDesc++;
1516
+ try {
1517
+ thinPages.push(new URL(subpages[i].url).pathname);
1518
+ } catch {
1519
+ thinPages.push(subpages[i].url);
1520
+ }
1521
+ }
1522
+ }
1523
+ if (subpages.length > 0) {
1524
+ const ratio = thinDesc / subpages.length;
1525
+ items.push(
1526
+ ratio > 0.5 ? { name: t("item.content.video_desc", lang), status: "warn", message: t("content.video_desc.warn", lang, { thin: thinDesc, total: subpages.length }), detail: thinPages.slice(0, 5).join(", ") } : { name: t("item.content.video_desc", lang), status: "pass", message: t("content.video_desc.pass", lang, { total: subpages.length }) }
1527
+ );
1528
+ }
1529
+ }
1530
+ if (subpages.length >= 3) {
1531
+ const tpl = detectTemplatePages(subpages);
1532
+ items.push({
1533
+ name: t("item.content.video_variety", lang),
1534
+ status: tpl.isTemplate ? "warn" : "pass",
1535
+ message: t(tpl.isTemplate ? "content.video_variety.warn" : "content.video_variety.pass", lang, { pct: tpl.similarity })
1536
+ });
1537
+ }
1538
+ return items;
1539
+ }
1540
+ function checkReferenceSite(pages, pagesSignals, lang) {
1541
+ const items = [];
1542
+ const subpages = pages.slice(1);
1543
+ const subpageSignals = pagesSignals.slice(1);
1544
+ if (subpages.length > 0) {
1545
+ let thinEntries = 0;
1546
+ const thinPages = [];
1547
+ for (let i = 0; i < subpages.length; i++) {
1548
+ const sig = subpageSignals[i];
1549
+ if (sig && sig.textLength < 100) {
1550
+ thinEntries++;
1551
+ try {
1552
+ thinPages.push(new URL(subpages[i].url).pathname);
1553
+ } catch {
1554
+ thinPages.push(subpages[i].url);
1555
+ }
1556
+ }
1557
+ }
1558
+ if (subpages.length > 0) {
1559
+ const ratio = thinEntries / subpages.length;
1560
+ items.push(
1561
+ ratio > 0.5 ? { name: t("item.content.reference_entry", lang), status: "warn", message: t("content.reference_entry.warn", lang, { thin: thinEntries, total: subpages.length }), detail: thinPages.slice(0, 5).join(", ") } : { name: t("item.content.reference_entry", lang), status: "pass", message: t("content.reference_entry.pass", lang) }
1562
+ );
1563
+ }
1564
+ }
1565
+ if (subpages.length >= 3) {
1566
+ const tpl = detectTemplatePages(subpages);
1567
+ items.push({
1568
+ name: t("item.content.reference_variety", lang),
1569
+ status: tpl.similarity > 70 ? "warn" : "pass",
1570
+ message: t(tpl.similarity > 70 ? "content.reference_variety.warn" : "content.reference_variety.pass", lang, { pct: tpl.similarity })
1571
+ });
1572
+ }
1573
+ return items;
1574
+ }
1177
1575
  function checkContentQuality(pages, sitePageCount, lang, siteType = "content", pagesSignals) {
1178
1576
  const items = [];
1179
1577
  if (siteType === "game") {
1180
1578
  if (pagesSignals) {
1181
1579
  items.push(...checkGameSite(pages, pagesSignals, lang));
1182
1580
  }
1581
+ } else if (siteType === "video") {
1582
+ if (pagesSignals) {
1583
+ items.push(...checkVideoSite(pages, pagesSignals, lang));
1584
+ }
1585
+ } else if (siteType === "reference") {
1586
+ if (pagesSignals) {
1587
+ items.push(...checkReferenceSite(pages, pagesSignals, lang));
1588
+ }
1183
1589
  } else {
1184
1590
  items.push(...checkContentSite(pages, lang));
1185
1591
  }
@@ -1322,20 +1728,31 @@ function checkPolicyCompliance(pages, lang) {
1322
1728
  for (const page of pages) {
1323
1729
  for (const p of BLACKLIST) {
1324
1730
  const m = page.text.match(p);
1325
- if (m) violations.push({ url: page.url, match: m[0] });
1731
+ if (m) {
1732
+ const hasSubstance = page.text.replace(/\s+/g, "").length > 200;
1733
+ violations.push({ url: page.url, match: m[0], hasSubstance });
1734
+ }
1326
1735
  }
1327
1736
  }
1328
- items.push(
1329
- violations.length > 0 ? { name: t("item.policy.keywords", lang), status: "fail", message: t("policy.keywords.fail", lang, { count: violations.length }), detail: violations.map((v) => `${v.url}: "${v.match}"`).join("; ") } : { name: t("item.policy.keywords", lang), status: "pass", message: t("policy.keywords.pass", lang) }
1330
- );
1737
+ const allHaveSubstance = violations.length > 0 && violations.every((v) => v.hasSubstance);
1738
+ const status = violations.length === 0 ? "pass" : allHaveSubstance ? "warn" : "fail";
1739
+ items.push({
1740
+ name: t("item.policy.keywords", lang),
1741
+ status,
1742
+ message: violations.length > 0 ? t("policy.keywords.fail", lang, { count: violations.length }) : t("policy.keywords.pass", lang),
1743
+ detail: violations.length > 0 ? violations.map((v) => `${new URL(v.url).pathname}: "${v.match}"`).join("; ") : void 0
1744
+ });
1331
1745
  return { name: t("cat.policy", lang), items };
1332
1746
  }
1333
1747
 
1334
1748
  // src/classifier.ts
1335
- var REQUIRED_PATTERNS = [/\/about/i, /\/privacy/i, /\/contact/i, /\/terms/i, /\/legal/i];
1336
- var CONTENT_PREFIXES = ["/blog/", "/news/", "/guides/", "/articles/", "/posts/", "/tutorials/", "/wiki/"];
1749
+ var REQUIRED_PATTERNS = [/\/about/i, /\/privacy/i, /\/contact/i, /\/terms/i, /\/legal/i, /\/editorial-policy/i, /\/imprint/i];
1750
+ var CONTENT_PREFIXES = ["/blog/", "/news/", "/guides/", "/articles/", "/posts/", "/tutorials/"];
1337
1751
  var GAME_PREFIXES = ["/games/", "/game/", "/play/", "/online-games/"];
1338
- var LISTING_PATHS = ["/blog", "/news", "/guides", "/articles", "/games", "/play", "/categories", "/tags", "/archive"];
1752
+ var VIDEO_PREFIXES = ["/videos/", "/video/", "/watch/", "/v/", "/shorts/", "/clip/", "/stream/"];
1753
+ var REFERENCE_PREFIXES = ["/wiki/", "/reference/", "/docs/", "/encyclopedia/", "/glossary/", "/knowledge/", "/archive/", "/database/", "/transcript/"];
1754
+ var REFERENCE_LISTING_PATHS = ["/wiki", "/reference", "/docs", "/encyclopedia", "/glossary", "/knowledge", "/archive", "/database", "/transcript"];
1755
+ var LISTING_PATHS = ["/blog", "/news", "/guides", "/articles", "/games", "/play", "/videos", "/watch", "/channels", "/categories", "/tags", "/archive"];
1339
1756
  var UTILITY_PATTERNS = [/\/download/i, /\/search/i, /\/login/i, /\/signup/i, /\/register/i, /\/sitemap/i, /\/404/i];
1340
1757
  function classifyPage(url) {
1341
1758
  let pathname;
@@ -1361,6 +1778,19 @@ function classifyPage(url) {
1361
1778
  if (suffix.length > 1) return "game_detail";
1362
1779
  }
1363
1780
  }
1781
+ for (const prefix of VIDEO_PREFIXES) {
1782
+ if (normalizedPath.startsWith(prefix.replace(/\/$/, "/"))) {
1783
+ const suffix = normalizedPath.slice(prefix.replace(/\/$/, "").length);
1784
+ if (suffix.length > 1) return "video_detail";
1785
+ }
1786
+ }
1787
+ for (const prefix of REFERENCE_PREFIXES) {
1788
+ if (normalizedPath.startsWith(prefix.replace(/\/$/, "/"))) {
1789
+ const suffix = normalizedPath.slice(prefix.replace(/\/$/, "").length);
1790
+ if (suffix.length > 1) return "reference_detail";
1791
+ }
1792
+ }
1793
+ if (REFERENCE_LISTING_PATHS.some((p) => normalizedPath === p || normalizedPath === p.replace(/\/$/, ""))) return "reference_listing";
1364
1794
  if (LISTING_PATHS.some((p) => normalizedPath === p || normalizedPath === p.replace(/\/$/, ""))) return "listing";
1365
1795
  const langPrefix = normalizedPath.match(/^\/[a-z]{2}(\/|$)/);
1366
1796
  if (langPrefix) {
@@ -1380,6 +1810,20 @@ function classifyPage(url) {
1380
1810
  return "listing";
1381
1811
  }
1382
1812
  }
1813
+ for (const prefix of VIDEO_PREFIXES) {
1814
+ if (rest.startsWith(prefix.replace(/\/$/, "/"))) {
1815
+ const suffix = rest.slice(prefix.replace(/\/$/, "").length);
1816
+ if (suffix.length > 1) return "video_detail";
1817
+ return "listing";
1818
+ }
1819
+ }
1820
+ for (const prefix of REFERENCE_PREFIXES) {
1821
+ if (rest.startsWith(prefix.replace(/\/$/, "/"))) {
1822
+ const suffix = rest.slice(prefix.replace(/\/$/, "").length);
1823
+ if (suffix.length > 1) return "reference_detail";
1824
+ return "reference_listing";
1825
+ }
1826
+ }
1383
1827
  if (REQUIRED_PATTERNS.some((p) => p.test(rest))) return "required";
1384
1828
  }
1385
1829
  return "unknown";
@@ -1408,7 +1852,12 @@ function buildPageDetails(pages, aiAnalyses, siteType) {
1408
1852
  const contentRatio = totalChars > 0 ? Math.round(contentChars / totalChars * 100) : 0;
1409
1853
  const issues = [];
1410
1854
  let contentStatus = "pass";
1411
- if (siteType === "content") {
1855
+ const ai = aiMap.get(page.url);
1856
+ const aiStatus = ai?.status;
1857
+ const relevance = ai?.relevance;
1858
+ const pageType = ai?.inferredPageType ?? classifyPage(page.url);
1859
+ const isFunctional = pageType === "required" || pageType === "utility";
1860
+ if (siteType === "content" && !isFunctional) {
1412
1861
  if (contentRatio < 30 && totalChars > 200) {
1413
1862
  issues.push(`Content ratio only ${contentRatio}%, mostly boilerplate`);
1414
1863
  contentStatus = "fail";
@@ -1418,10 +1867,6 @@ function buildPageDetails(pages, aiAnalyses, siteType) {
1418
1867
  contentStatus = contentStatus === "fail" ? "fail" : "warn";
1419
1868
  }
1420
1869
  }
1421
- const pageType = classifyPage(page.url);
1422
- const ai = aiMap.get(page.url);
1423
- const aiStatus = ai?.status;
1424
- const relevance = ai?.relevance;
1425
1870
  const { score } = scorePage(pageType, contentChars, contentRatio, issues, siteType, aiStatus);
1426
1871
  const detail = { url: page.url, title: page.title, pageType, totalChars, contentChars, contentRatio, contentStatus, issues, score };
1427
1872
  if (relevance) detail.relevance = relevance;
@@ -1518,7 +1963,11 @@ async function check(options) {
1518
1963
  const allSignals = [homeData.signals];
1519
1964
  const internalLinks = homeData.links.filter((l) => {
1520
1965
  try {
1521
- return new URL(l).origin === origin && isContentUrl(l);
1966
+ const u = new URL(l);
1967
+ if (u.origin !== origin) return false;
1968
+ if (!isContentUrl(l)) return false;
1969
+ if (u.pathname === "/" && u.search.length > 0) return false;
1970
+ return true;
1522
1971
  } catch {
1523
1972
  return false;
1524
1973
  }
@@ -1533,18 +1982,24 @@ async function check(options) {
1533
1982
  const allInternal = [.../* @__PURE__ */ new Set([...internalLinks, ...sitemapInternal])];
1534
1983
  const uniqueLinks = allInternal.slice(0, phase1Limit);
1535
1984
  const deadLinks = [];
1536
- const crawledUrls = /* @__PURE__ */ new Set([url.replace(/\/+$/, "")]);
1985
+ const crawledUrls = /* @__PURE__ */ new Set([homeData.url.replace(/\/+$/, "")]);
1537
1986
  async function crawlPage(link) {
1538
- const norm = link.replace(/\/+$/, "");
1987
+ const norm = link.replace(/\/+$/, "").split("#")[0];
1539
1988
  if (crawledUrls.has(norm)) return;
1540
1989
  crawledUrls.add(norm);
1541
1990
  try {
1542
1991
  const pg = await browser.newPage();
1543
1992
  const data = await fetchPage(pg, link, timeout);
1993
+ const postNorm = data.url.replace(/\/+$/, "").split("#")[0];
1994
+ if (crawledUrls.has(postNorm) && postNorm !== norm) {
1995
+ await pg.close();
1996
+ return;
1997
+ }
1998
+ crawledUrls.add(postNorm);
1544
1999
  if (data.status >= 400) {
1545
2000
  deadLinks.push(`${link} (${data.status})`);
1546
2001
  } else {
1547
- pages.push({ url: link, text: data.text, title: data.title, links: data.links });
2002
+ pages.push({ url: data.url, text: data.text, title: data.title, links: data.links });
1548
2003
  allSignals.push(data.signals);
1549
2004
  }
1550
2005
  await pg.close();
@@ -1555,14 +2010,14 @@ async function check(options) {
1555
2010
  progress(`Phase 1: Crawling ${uniqueLinks.length} pages...`);
1556
2011
  for (let i = 0; i < uniqueLinks.length; i++) {
1557
2012
  const link = uniqueLinks[i];
1558
- progress(`Phase 1: [${i + 1}/${uniqueLinks.length}] ${new URL(link).pathname}`);
2013
+ progress(`Phase 1: [${i + 1}/${uniqueLinks.length}] ${new URL(link).pathname}${new URL(link).search}`);
1559
2014
  await crawlPage(link);
1560
2015
  }
1561
2016
  const CHILDREN_PER_LISTING = 10;
1562
2017
  const MAX_DISCOVERY_DEPTH = 3;
1563
2018
  const discoveredContent = /* @__PURE__ */ new Set();
1564
2019
  const discoveryQueue = pages.map((p) => ({ url: p.url, links: p.links, depth: 0 }));
1565
- const seenInDiscovery = new Set([...crawledUrls].map((u) => u.replace(/\/+$/, "")));
2020
+ const seenInDiscovery = new Set([...crawledUrls].map((u) => u.replace(/\/+$/, "").split("#")[0]));
1566
2021
  while (discoveryQueue.length > 0) {
1567
2022
  const current = discoveryQueue.shift();
1568
2023
  if (current.depth > MAX_DISCOVERY_DEPTH) continue;
@@ -1596,7 +2051,7 @@ async function check(options) {
1596
2051
  if (toCrawl.length > 0) progress(`Phase 2: Crawling ${toCrawl.length} content pages (from ${discoveredContent.size} discovered)...`);
1597
2052
  for (let i = 0; i < toCrawl.length; i++) {
1598
2053
  const link = toCrawl[i];
1599
- progress(`Phase 2: [${i + 1}/${toCrawl.length}] ${new URL(link).pathname}`);
2054
+ progress(`Phase 2: [${i + 1}/${toCrawl.length}] ${new URL(link).pathname}${new URL(link).search}`);
1600
2055
  await crawlPage(link);
1601
2056
  const crawledPage = pages[pages.length - 1];
1602
2057
  if (crawledPage && crawledPage.url === link) {
@@ -1674,11 +2129,55 @@ async function check(options) {
1674
2129
  aiItems.push({ name: t("item.ai.suggestions", lang), status: "warn", message: t("ai.suggestion_count", lang, { count: aiResult.suggestions.length }), detail: aiResult.suggestions.join("; ") });
1675
2130
  }
1676
2131
  allCategories.push({ name: t("group.ai_value", lang), items: aiItems, group: "soft" });
1677
- const seriousViolations = pageAnalyses.filter((a) => (a.complianceScore ?? 5) <= 2);
1678
- const suspiciousPages = pageAnalyses.filter((a) => {
2132
+ let suspiciousPages = pageAnalyses.filter((a) => {
1679
2133
  const c = a.complianceScore ?? 5;
1680
2134
  return c > 2 && c <= 5;
1681
2135
  });
2136
+ const shortTextPages = pageAnalyses.filter((a) => {
2137
+ const c = a.complianceScore ?? 5;
2138
+ const text = uniquePages.find((up) => up.url === a.url)?.text ?? "";
2139
+ return c <= 2 && text.replace(/\s+/g, "").length < 200;
2140
+ });
2141
+ const recheckUrls = new Set(suspiciousPages.map((p) => p.url));
2142
+ for (const p of shortTextPages) {
2143
+ if (!recheckUrls.has(p.url)) {
2144
+ suspiciousPages.push(p);
2145
+ recheckUrls.add(p.url);
2146
+ }
2147
+ }
2148
+ if (suspiciousPages.length > 0) {
2149
+ const apiKeyResolved2 = apiKey || process.env.AI_API_KEY;
2150
+ if (apiKeyResolved2) {
2151
+ const recheckResults = await recheckCompliance(
2152
+ suspiciousPages.map((p) => ({
2153
+ url: p.url,
2154
+ text: uniquePages.find((up) => up.url === p.url)?.text ?? "",
2155
+ firstComplianceScore: p.complianceScore ?? 5
2156
+ })),
2157
+ lang,
2158
+ progress
2159
+ );
2160
+ for (const analysis of pageAnalyses) {
2161
+ const recheck = recheckResults.get(analysis.url);
2162
+ if (recheck) {
2163
+ analysis.complianceScore = recheck.complianceScore;
2164
+ }
2165
+ }
2166
+ for (const analysis of pageAnalyses) {
2167
+ const v = analysis.valueScore ?? 5;
2168
+ const o = analysis.originalityScore ?? 5;
2169
+ const r = analysis.relevanceScore ?? 5;
2170
+ const c = analysis.complianceScore ?? 5;
2171
+ const geoMean = Math.pow(v * o * r * c, 0.25);
2172
+ analysis.status = geoMean >= 7 ? "pass" : geoMean >= 4 ? "warn" : "fail";
2173
+ }
2174
+ suspiciousPages = pageAnalyses.filter((a) => {
2175
+ const c = a.complianceScore ?? 5;
2176
+ return c > 2 && c <= 5;
2177
+ });
2178
+ }
2179
+ }
2180
+ const seriousViolations = pageAnalyses.filter((a) => (a.complianceScore ?? 5) <= 2);
1682
2181
  const complianceItems = [];
1683
2182
  if (seriousViolations.length > 0) {
1684
2183
  complianceItems.push({
@@ -1701,6 +2200,16 @@ async function check(options) {
1701
2200
  });
1702
2201
  }
1703
2202
  allCategories.push({ name: t("group.policy_compliance", lang), items: complianceItems, group: "hard" });
2203
+ const avgCompliance = pageAnalyses.length > 0 ? pageAnalyses.reduce((s, a) => s + (a.complianceScore ?? 5), 0) / pageAnalyses.length : 5;
2204
+ if (avgCompliance >= 7) {
2205
+ const policyCat2 = allCategories.find((c) => c.name === t("cat.policy", lang));
2206
+ if (policyCat2) {
2207
+ const keywordItem = policyCat2.items.find((i) => i.name === t("item.policy.keywords", lang));
2208
+ if (keywordItem && keywordItem.status === "fail") {
2209
+ keywordItem.status = "warn";
2210
+ }
2211
+ }
2212
+ }
1704
2213
  } catch (err) {
1705
2214
  allCategories.push({ name: t("group.ai_value", lang), items: [{ name: "AI", status: "skip", message: t("ai.fail", lang, { error: err instanceof Error ? err.message : String(err) }) }], group: "soft" });
1706
2215
  }