npm - @f-o-t/content-analysis - Versions diffs - 1.0.2 → 1.0.6 - Mend

@f-o-t/content-analysis 1.0.2 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/LICENSE +21 -0
package/dist/bad-patterns.d.ts +10 -0
package/dist/bad-patterns.d.ts.map +1 -0
package/dist/index-9t11m1re.js +3 -0
package/dist/index-9t11m1re.js.map +9 -0
package/dist/index.d.ts +45 -240
package/dist/index.d.ts.map +1 -0
package/dist/index.js +175 -28
package/dist/index.js.map +17 -0
package/dist/keywords.d.ts +10 -0
package/dist/keywords.d.ts.map +1 -0
package/dist/markdown.d.ts +20 -0
package/dist/markdown.d.ts.map +1 -0
package/dist/plugins/types/index.d.ts +154 -0
package/dist/plugins/types/index.d.ts.map +1 -0
package/dist/plugins/types/index.js +4 -0
package/dist/plugins/types/index.js.map +9 -0
package/dist/readability.d.ts +10 -0
package/dist/readability.d.ts.map +1 -0
package/dist/seo.d.ts +10 -0
package/dist/seo.d.ts.map +1 -0
package/dist/structure.d.ts +10 -0
package/dist/structure.d.ts.map +1 -0
package/dist/utils.d.ts +59 -0
package/dist/utils.d.ts.map +1 -0
package/package.json +23 -51

package/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 FOT (F-O-T)
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

package/dist/bad-patterns.d.ts ADDED Viewed

@@ -0,0 +1,10 @@
+/**
+ * Bad Pattern Detection Module
+ * Detects problematic content patterns that hurt quality and SEO
+ */
+import type { BadPatternResult } from "./plugins/types/index";
+/**
+ * Analyze content for bad patterns
+ */
+export declare function analyzeBadPatterns(content: string, title?: string): BadPatternResult;
+//# sourceMappingURL=bad-patterns.d.ts.map

package/dist/bad-patterns.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"bad-patterns.d.ts","sourceRoot":"","sources":["../src/bad-patterns.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAc,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAG1E;;GAEG;AACH,wBAAgB,kBAAkB,CAC/B,OAAO,EAAE,MAAM,EACf,KAAK,CAAC,EAAE,MAAM,GACd,gBAAgB,CAsOlB"}

package/dist/index-9t11m1re.js ADDED Viewed

@@ -0,0 +1,3 @@
+// @bun
+//# debugId=C976E1F5BBFB4D3F64756E2164756E21

package/dist/index-9t11m1re.js.map ADDED Viewed

@@ -0,0 +1,9 @@
+{
+  "version": 3,
+  "sources": [],
+  "sourcesContent": [
+  ],
+  "mappings": "",
+  "debugId": "C976E1F5BBFB4D3F64756E2164756E21",
+  "names": []
+}

package/dist/index.d.ts CHANGED Viewed

@@ -1,241 +1,46 @@
 /**
-* Content Analysis Types
-* All type definitions for SEO, readability, structure, and pattern analysis
-*/
-type SeoIssueType = "title" | "meta_description" | "headings" | "keyword_density" | "content_length" | "readability" | "links" | "images" | "quick_answer" | "first_paragraph" | "heading_keywords" | "structure";
-type Severity = "error" | "warning" | "info";
-type SeoIssue = {
-	type: SeoIssueType;
-	severity: Severity;
-	message: string;
-	suggestion: string;
-};
-type SeoMetrics = {
-	wordCount: number;
-	headingCount: number;
-	paragraphCount: number;
-	linkCount: number;
-	imageCount: number;
-	hasQuickAnswer: boolean;
-	keywordInFirstParagraph: boolean;
-	keywordDensity?: Record<string, number>;
-};
-type SeoResult = {
-	score: number;
-	issues: SeoIssue[];
-	recommendations: string[];
-	metrics: SeoMetrics;
-};
-type SeoInput = {
-	content: string;
-	title?: string;
-	metaDescription?: string;
-	targetKeywords?: string[];
-};
-type TargetAudience = "general" | "technical" | "academic" | "casual";
-type ReadabilityMetrics = {
-	sentenceCount: number;
-	wordCount: number;
-	avgWordsPerSentence: number;
-	avgSyllablesPerWord: number;
-	complexWordCount: number;
-	complexWordPercentage: number;
-};
-type TargetScore = {
-	min: number;
-	max: number;
-	description: string;
-};
-type ReadabilityResult = {
-	fleschKincaidReadingEase: number;
-	fleschKincaidGradeLevel: number;
-	readabilityLevel: string;
-	targetScore: TargetScore;
-	isOnTarget: boolean;
-	suggestions: string[];
-	metrics: ReadabilityMetrics;
-};
-type ContentType = "how-to" | "comparison" | "explainer" | "listicle" | "general";
-type StructureIssue = {
-	type: string;
-	severity: Severity;
-	message: string;
-	suggestion: string;
-};
-type ContentStructure = {
-	hasQuickAnswer: boolean;
-	headingHierarchyValid: boolean;
-	avgParagraphLength: number;
-	hasTableOfContents: boolean;
-	hasTables: boolean;
-	hasConclusion: boolean;
-	headingCount: number;
-	wordCount: number;
-};
-type StructureResult = {
-	score: number;
-	issues: StructureIssue[];
-	structure: ContentStructure;
-};
-type BadPatternType = "word_count_mention" | "word_count_in_title" | "meta_commentary" | "engagement_begging" | "endless_introduction" | "vague_instructions" | "clickbait_markers" | "filler_phrases" | "over_formatting" | "wall_of_text" | "keyword_stuffing";
-type BadPattern = {
-	pattern: string;
-	severity: "error" | "warning";
-	locations: string[];
-	suggestion: string;
-};
-type BadPatternResult = {
-	hasIssues: boolean;
-	issueCount: number;
-	patterns: BadPattern[];
-};
-type KeywordLocationType = "title" | "heading" | "paragraph" | "first100words" | "last100words";
-type KeywordStatus = "optimal" | "low" | "high" | "missing";
-type KeywordLocation = {
-	type: KeywordLocationType;
-	index?: number;
-};
-type KeywordAnalysisItem = {
-	keyword: string;
-	count: number;
-	density: number;
-	locations: KeywordLocation[];
-	status: KeywordStatus;
-	suggestion?: string;
-};
-type TopKeyword = {
-	keyword: string;
-	count: number;
-	density: number;
-};
-type KeywordMetrics = {
-	totalWordCount: number;
-	uniqueWordCount: number;
-	avgKeywordDensity: number;
-};
-type KeywordAnalysisResult = {
-	analysis: KeywordAnalysisItem[];
-	overallScore: number;
-	topKeywords: TopKeyword[];
-	recommendations: string[];
-	metrics: KeywordMetrics;
-};
-type KeywordInput = {
-	content: string;
-	title?: string;
-	targetKeywords: string[];
-};
-type ContentAnalysisResult = {
-	seo: SeoResult;
-	readability: ReadabilityResult;
-	structure: StructureResult;
-	badPatterns: BadPatternResult;
-	keywords: KeywordAnalysisResult | null;
-	analyzedAt: string;
-};
-type AnalysisInput = {
-	content: string;
-	title?: string;
-	description?: string;
-	targetKeywords?: string[];
-};
-/**
-* Analyze content for bad patterns
-*/
-declare function analyzeBadPatterns(content: string, title?: string): BadPatternResult;
-/**
-* Analyze keyword usage in content
-*/
-declare function analyzeKeywords(input: KeywordInput): KeywordAnalysisResult;
-/**
-* Analyze content readability
-*/
-declare function analyzeReadability(content: string, targetAudience?: TargetAudience): ReadabilityResult;
-/**
-* Analyze content for SEO optimization
-*/
-declare function analyzeSeo(input: SeoInput): SeoResult;
-/**
-* Analyze content structure
-*/
-declare function analyzeStructure(content: string, contentType?: ContentType): StructureResult;
-/**
-* Shared utility functions for content analysis
-*/
-/**
-* Count syllables in a word using a simplified vowel group algorithm
-*/
-declare function countSyllables(word: string): number;
-/**
-* Calculate Flesch-Kincaid readability metrics
-*/
-declare function calculateFleschKincaid(text: string): {
-	readingEase: number;
-	gradeLevel: number;
-};
-/**
-* Convert reading ease score to human-readable level
-*/
-declare function getReadabilityLevel(score: number): string;
-/**
-* Find all occurrences of a regex pattern with surrounding context
-*/
-declare function findOccurrences(regex: RegExp, text: string): string[];
-/**
-* Extract words from content
-*/
-declare function extractWords(content: string): string[];
-/**
-* Extract paragraphs from content
-*/
-declare function extractParagraphs(content: string): string[];
-/**
-* Extract headings from markdown content
-*/
-declare function extractHeadings(content: string): Array<{
-	level: number;
-	text: string;
-	index: number;
-}>;
-/**
-* Clamp score between 0 and 100
-*/
-declare function clampScore(score: number): number;
-/**
-* Check if content has a quick answer pattern in the first portion
-*/
-declare function hasQuickAnswerPattern(text: string): boolean;
-/**
-* Check if content has a conclusion section
-*/
-declare function hasConclusionSection(content: string): boolean;
-/**
-* Perform a comprehensive content analysis
-*
-* This function runs all available analyzers and returns a combined result:
-* - SEO analysis (title, meta, keywords, structure)
-* - Readability analysis (Flesch-Kincaid scores)
-* - Structure analysis (headings, paragraphs, quick answers)
-* - Bad pattern detection (filler phrases, clickbait, etc.)
-* - Keyword analysis (density, placement, recommendations)
-*
-* @param input - The content and metadata to analyze
-* @returns Combined analysis results from all analyzers
-*
-* @example
-* ```typescript
-* import { analyzeContent } from '@f-o-t/content-analysis';
-*
-* const result = analyzeContent({
-*   content: '## Introduction\n\nThis is my blog post...',
-*   title: 'My Blog Post Title',
-*   description: 'A short description for SEO',
-*   targetKeywords: ['blog', 'tutorial'],
-* });
-*
-* console.log(result.seo.score); // 85
-* console.log(result.readability.fleschKincaidReadingEase); // 65.2
-* ```
-*/
-declare function analyzeContent(input: AnalysisInput): ContentAnalysisResult;
-export { hasQuickAnswerPattern, hasConclusionSection, getReadabilityLevel, findOccurrences, extractWords, extractParagraphs, extractHeadings, countSyllables, clampScore, calculateFleschKincaid, analyzeStructure, analyzeSeo, analyzeReadability, analyzeKeywords, analyzeContent, analyzeBadPatterns, TopKeyword, TargetScore, TargetAudience, StructureResult, StructureIssue, Severity, SeoResult, SeoMetrics, SeoIssueType, SeoIssue, SeoInput, ReadabilityResult, ReadabilityMetrics, KeywordStatus, KeywordMetrics, KeywordLocationType, KeywordLocation, KeywordInput, KeywordAnalysisResult, KeywordAnalysisItem, ContentType, ContentStructure, ContentAnalysisResult, BadPatternType, BadPatternResult, BadPattern, AnalysisInput };
+ * Content Analysis Library
+ *
+ * A comprehensive library for analyzing content quality, SEO optimization,
+ * readability, structure, and detecting problematic patterns.
+ *
+ * @packageDocumentation
+ */
+export { analyzeBadPatterns } from "./bad-patterns";
+export { analyzeKeywords } from "./keywords";
+export { analyzeReadability } from "./readability";
+export { analyzeSeo } from "./seo";
+export { analyzeStructure } from "./structure";
+export * from "./plugins/types/index";
+export { calculateFleschKincaid, clampScore, countSyllables, extractHeadings, extractParagraphs, extractWords, findOccurrences, getReadabilityLevel, hasConclusionSection, hasQuickAnswerPattern, } from "./utils";
+import type { AnalysisInput, ContentAnalysisResult } from "./plugins/types/index";
+/**
+ * Perform a comprehensive content analysis
+ *
+ * This function runs all available analyzers and returns a combined result:
+ * - SEO analysis (title, meta, keywords, structure)
+ * - Readability analysis (Flesch-Kincaid scores)
+ * - Structure analysis (headings, paragraphs, quick answers)
+ * - Bad pattern detection (filler phrases, clickbait, etc.)
+ * - Keyword analysis (density, placement, recommendations)
+ *
+ * @param input - The content and metadata to analyze
+ * @returns Combined analysis results from all analyzers
+ *
+ * @example
+ * ```typescript
+ * import { analyzeContent } from '@f-o-t/content-analysis';
+ *
+ * const result = analyzeContent({
+ *   content: '## Introduction\n\nThis is my blog post...',
+ *   title: 'My Blog Post Title',
+ *   description: 'A short description for SEO',
+ *   targetKeywords: ['blog', 'tutorial'],
+ * });
+ *
+ * console.log(result.seo.score); // 85
+ * console.log(result.readability.fleschKincaidReadingEase); // 65.2
+ * ```
+ */
+export declare function analyzeContent(input: AnalysisInput): ContentAnalysisResult;
+//# sourceMappingURL=index.d.ts.map

package/dist/index.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAC;AACpD,OAAO,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAC7C,OAAO,EAAE,kBAAkB,EAAE,MAAM,eAAe,CAAC;AAEnD,OAAO,EAAE,UAAU,EAAE,MAAM,OAAO,CAAC;AACnC,OAAO,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAG/C,cAAc,uBAAuB,CAAC;AAGtC,OAAO,EACJ,sBAAsB,EACtB,UAAU,EACV,cAAc,EACd,eAAe,EACf,iBAAiB,EACjB,YAAY,EACZ,eAAe,EACf,mBAAmB,EACnB,oBAAoB,EACpB,qBAAqB,GACvB,MAAM,SAAS,CAAC;AAQjB,OAAO,KAAK,EAAE,aAAa,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAElF;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AACH,wBAAgB,cAAc,CAAC,KAAK,EAAE,aAAa,GAAG,qBAAqB,CA2B1E"}

package/dist/index.js CHANGED Viewed

@@ -1,3 +1,93 @@
+// @bun
+import"./index-9t11m1re.js";
+// src/markdown.ts
+import { parseToAst } from "@f-o-t/markdown";
+function extractFromMarkdown(content) {
+  const ast = parseToAst(content);
+  const headings = [];
+  const links = [];
+  const images = [];
+  const paragraphs = [];
+  let tables = 0;
+  const textParts = [];
+  let index = 0;
+  const walk = (node) => {
+    switch (node.type) {
+      case "heading": {
+        const text = collectText(node);
+        headings.push({ level: node.level, text, index });
+        textParts.push(text);
+        index += 1;
+        break;
+      }
+      case "paragraph": {
+        const text = collectText(node);
+        if (text.trim().length > 0) {
+          paragraphs.push(text);
+          textParts.push(text);
+        }
+        break;
+      }
+      case "link": {
+        const text = collectText(node);
+        links.push({ href: node.url, text });
+        break;
+      }
+      case "image": {
+        images.push({ alt: node.alt ?? "", src: node.url });
+        break;
+      }
+      case "codeBlock": {
+        return;
+      }
+      case "codeSpan": {
+        return;
+      }
+      case "table": {
+        tables += 1;
+        break;
+      }
+      default:
+        break;
+    }
+    if ("children" in node && Array.isArray(node.children)) {
+      for (const child of node.children)
+        walk(child);
+    }
+  };
+  for (const node of ast.children) {
+    walk(node);
+  }
+  return {
+    text: textParts.join(`
+`),
+    headings,
+    links,
+    images,
+    tables,
+    paragraphs
+  };
+}
+function collectText(node) {
+  const parts = [];
+  const walk = (current) => {
+    if (current.type === "text") {
+      parts.push(current.value);
+    }
+    if (current.type === "codeSpan") {
+      return;
+    }
+    if ("children" in current && Array.isArray(current.children)) {
+      for (const child of current.children)
+        walk(child);
+    }
+  };
+  walk(node);
+  return parts.join("");
+}
 // src/utils.ts
 function countSyllables(word) {
   const w = word.toLowerCase();
@@ -45,18 +135,25 @@ function findOccurrences(regex, text) {
   const matches = [];
   const flags = regex.flags.includes("g") ? regex.flags : `${regex.flags}g`;
   const globalRegex = new RegExp(regex.source, flags);
-  let match;
-  while ((match = globalRegex.exec(text)) !== null) {
+  let match = globalRegex.exec(text);
+  while (match) {
     const start = Math.max(0, match.index - 20);
     const end = Math.min(text.length, match.index + match[0].length + 20);
     const context = text.slice(start, end);
     matches.push(`...${context}...`);
+    match = globalRegex.exec(text);
   }
   return matches;
 }
 function extractWords(content) {
   return content.split(/\s+/).filter(Boolean);
 }
+function tokenize(content) {
+  const normalized = content.toLowerCase().replace(/[^a-z0-9\s]/g, " ").replace(/\s+/g, " ").trim();
+  if (!normalized)
+    return [];
+  return normalized.split(" ").filter(Boolean);
+}
 function extractParagraphs(content) {
   return content.split(/\n\n+/).filter(Boolean);
 }
@@ -80,7 +177,7 @@ function clampScore(score) {
   return Math.max(0, Math.min(100, score));
 }
 function hasQuickAnswerPattern(text) {
-  return /\*\*quick\s*answer\*\*|>.*quick.*answer|tl;?dr|em\s+resumo|resumindo/i.test(text) || /^.*?\*\*[^*]+\*\*\s+(?:é|is|are|was|were|significa)\s/im.test(text) || /^\|.*\|.*\|$/m.test(text);
+  return /\*\*quick\s*answer\*\*|>.*quick.*answer|tl;?dr|em\s+resumo|resumindo/i.test(text) || /^.*?\*\*[^*]+\*\*\s+(?:\u00E9|is|are|was|were|significa)\s/im.test(text) || /^\|.*\|.*\|$/m.test(text);
 }
 function hasConclusionSection(content) {
   return /##\s*(?:conclus|conclusion|resumo|takeaway|key\s*takeaway|final|wrapping\s*up)/i.test(content);
@@ -129,8 +226,8 @@ function analyzeBadPatterns(content, title) {
     }
   }
   const engagementPatterns = [
-    /\b(?:não\s+esqueça\s+de|don'?t\s+forget\s+to)\s+(?:curtir|like|subscribe|seguir|compartilhar|share)/gi,
-    /\b(?:deixe\s+(?:um\s+)?comentário|leave\s+a\s+comment|comment\s+below)/gi,
+    /\b(?:n\u00E3o\s+esque\u00E7a\s+de|don'?t\s+forget\s+to)\s+(?:curtir|like|subscribe|seguir|compartilhar|share)/gi,
+    /\b(?:deixe\s+(?:um\s+)?coment\u00E1rio|leave\s+a\s+comment|comment\s+below)/gi,
     /\b(?:inscreva-se|subscribe|sign\s+up)\s+(?:para|to|for)\s+(?:nossa|my|our|the)\s+(?:newsletter|canal|channel)/gi,
     /\b(?:compartilhe\s+com|share\s+(?:this|with))\s+(?:seus\s+amigos|your\s+friends)/gi,
     /\bsmash\s+(?:that\s+)?(?:like|subscribe)\s+button\b/gi
@@ -177,7 +274,7 @@ function analyzeBadPatterns(content, title) {
     }
   }
   const clickbaitPatterns = [
-    /\b(?:you\s+won'?t\s+believe|você\s+não\s+vai\s+acreditar)\b/gi,
+    /\b(?:you\s+won'?t\s+believe|voc\u00EA\s+n\u00E3o\s+vai\s+acreditar)\b/gi,
     /\b(?:this\s+one\s+(?:trick|tip|secret))\b/gi,
     /\b(?:AMAZING|INCREDIBLE|MIND-?BLOWING)\b/g,
     /!!+|\?!+|!{3,}/g
@@ -199,8 +296,8 @@ function analyzeBadPatterns(content, title) {
     /\b(?:at\s+the\s+end\s+of\s+the\s+day|no\s+final\s+das\s+contas)\b/gi,
     /\b(?:in\s+today'?s\s+(?:digital\s+)?(?:landscape|world|age))\b/gi,
     /\b(?:(?:as\s+)?a\s+matter\s+of\s+fact)\b/gi,
-    /\b(?:needless\s+to\s+say|escusado\s+será\s+dizer)\b/gi,
-    /\b(?:in\s+(?:conclusion|summary)|em\s+(?:conclusão|resumo))(?:\s*[,:])\b/gi
+    /\b(?:needless\s+to\s+say|escusado\s+ser\u00E1\s+dizer)\b/gi,
+    /\b(?:in\s+(?:conclusion|summary)|em\s+(?:conclus\u00E3o|resumo))(?:\s*[,:])\b/gi
   ];
   for (const pattern of fillerPatterns) {
     const matches = findOccurrences(pattern, content);
@@ -245,7 +342,7 @@ function analyzeBadPatterns(content, title) {
   const wordsLower = content.toLowerCase();
   const totalWords = extractWords(content).length;
   const phraseCount = {};
-  const tokens = wordsLower.match(/\b[a-záàâãéèêíïóôõöúç]{3,}\b/g) || [];
+  const tokens = wordsLower.match(/\b[a-z\u00E1\u00E0\u00E2\u00E3\u00E9\u00E8\u00EA\u00ED\u00EF\u00F3\u00F4\u00F5\u00F6\u00FA\u00E7]{3,}\b/g) || [];
   for (let i = 0;i < tokens.length - 1; i++) {
     const bigram = `${tokens[i]} ${tokens[i + 1]}`;
     phraseCount[bigram] = (phraseCount[bigram] || 0) + 1;
@@ -276,13 +373,13 @@ function analyzeKeywords(input) {
   const { content, title, targetKeywords } = input;
   const analysis = [];
   const recommendations = [];
-  const words = extractWords(content);
+  const extracted = extractFromMarkdown(content);
+  const words = extractWords(extracted.text);
   const totalWordCount = words.length;
   const uniqueWords = new Set(words.map((w) => w.toLowerCase()));
-  const contentLower = content.toLowerCase();
+  const contentLower = extracted.text.toLowerCase();
   const titleLower = title?.toLowerCase() || "";
-  const headings = content.match(/^#{2,6}\s+(.+)$/gm) || [];
-  const headingsText = headings.join(" ").toLowerCase();
+  const headingsText = extracted.headings.map((heading) => heading.text).join(" ").toLowerCase();
   const first100Words = words.slice(0, 100).join(" ").toLowerCase();
   const last100Words = words.slice(-100).join(" ").toLowerCase();
   let totalDensity = 0;
@@ -351,9 +448,9 @@ function analyzeKeywords(input) {
   if (highKeywords.length > 0) {
     recommendations.push(`Reduce overused keywords: ${highKeywords.map((k) => k.keyword).join(", ")}`);
   }
+  const tokenList = tokenize(extracted.text);
   const phraseCount = {};
-  const tokens = contentLower.match(/\b[a-záàâãéèêíïóôõöúç]{3,}\b/g) || [];
-  for (const token of tokens) {
+  for (const token of tokenList) {
     phraseCount[token] = (phraseCount[token] || 0) + 1;
   }
   const topKeywords = Object.entries(phraseCount).filter(([word]) => word.length > 4 && !["that", "this", "with", "from", "have", "been"].includes(word)).sort(([, a], [, b]) => b - a).slice(0, 10).map(([keyword, count]) => ({
@@ -361,6 +458,52 @@ function analyzeKeywords(input) {
     count,
     density: Math.round(count / totalWordCount * 1e4) / 100
   }));
+  const stopwords = new Set([
+    "the",
+    "and",
+    "for",
+    "with",
+    "that",
+    "this",
+    "from",
+    "have",
+    "been",
+    "your",
+    "you",
+    "are",
+    "was",
+    "were",
+    "not",
+    "can",
+    "will",
+    "its",
+    "their",
+    "about",
+    "into",
+    "more",
+    "than",
+    "when",
+    "what",
+    "which",
+    "who",
+    "how",
+    "why"
+  ]);
+  const topTerms = Object.entries(phraseCount).filter(([term]) => term.length > 3 && !stopwords.has(term)).sort(([, a], [, b]) => b - a).slice(0, 10).map(([term, count]) => ({
+    term,
+    count,
+    density: Math.round(count / totalWordCount * 1e4) / 100
+  }));
+  const bigramCount = {};
+  for (let index = 0;index < tokenList.length - 1; index += 1) {
+    const phrase = `${tokenList[index]} ${tokenList[index + 1]}`;
+    bigramCount[phrase] = (bigramCount[phrase] || 0) + 1;
+  }
+  const topPhrases = Object.entries(bigramCount).filter(([phrase]) => phrase.length > 5).sort(([, a], [, b]) => b - a).slice(0, 10).map(([phrase, count]) => ({
+    phrase,
+    count,
+    density: Math.round(count / totalWordCount * 1e4) / 100
+  }));
   const metrics = {
     totalWordCount,
     uniqueWordCount: uniqueWords.size,
@@ -370,6 +513,8 @@ function analyzeKeywords(input) {
     analysis,
     overallScore,
     topKeywords,
+    topTerms,
+    topPhrases,
     recommendations,
     metrics
   };
@@ -449,15 +594,15 @@ function analyzeSeo(input) {
   const { content, title, metaDescription, targetKeywords } = input;
   const issues = [];
   const recommendations = [];
-  const words = extractWords(content);
+  const extracted = extractFromMarkdown(content);
+  const words = extractWords(extracted.text);
   const wordCount = words.length;
-  const paragraphs = extractParagraphs(content);
-  const headings = content.match(/^#{1,6}\s.+$/gm) || [];
-  const h2Headings = content.match(/^##\s.+$/gm) || [];
-  const links = content.match(/\[.+?\]\(.+?\)/g) || [];
-  const images = content.match(/!\[.+?\]\(.+?\)/g) || [];
-  const firstH2Index = content.search(/^##\s/m);
-  const firstParagraphText = firstH2Index > 0 ? content.slice(0, firstH2Index) : words.slice(0, 100).join(" ");
+  const paragraphs = extracted.paragraphs;
+  const headings = extracted.headings;
+  const h2Headings = headings.filter((heading) => heading.level === 2);
+  const links = extracted.links;
+  const images = extracted.images;
+  const firstParagraphText = paragraphs.length > 0 ? paragraphs[0] ?? "" : words.slice(0, 100).join(" ");
   let score = 100;
   if (!title) {
     issues.push({
@@ -539,7 +684,7 @@ function analyzeSeo(input) {
     });
     score -= 5;
   }
-  const h1Headings = content.match(/^#\s.+$/gm) || [];
+  const h1Headings = headings.filter((heading) => heading.level === 1);
   if (h1Headings.length > 0) {
     issues.push({
       type: "headings",
@@ -550,8 +695,8 @@ function analyzeSeo(input) {
     score -= 10;
   }
   if (targetKeywords && targetKeywords.length > 0 && h2Headings.length > 0) {
-    const h2Text = h2Headings.join(" ").toLowerCase();
-    const hasKeywordInH2 = targetKeywords.some((kw) => h2Text.includes(kw.toLowerCase()));
+    const h2Text = h2Headings.map((heading) => heading.text).join(" ");
+    const hasKeywordInH2 = targetKeywords.some((kw) => h2Text.toLowerCase().includes(kw.toLowerCase()));
     if (!hasKeywordInH2) {
       issues.push({
         type: "heading_keywords",
@@ -631,7 +776,7 @@ function analyzeSeo(input) {
   }
   const keywordDensity = {};
   if (targetKeywords && targetKeywords.length > 0) {
-    const contentLower = content.toLowerCase();
+    const contentLower = extracted.text.toLowerCase();
     for (const keyword of targetKeywords) {
       const regex = new RegExp(keyword.toLowerCase(), "gi");
       const matches = contentLower.match(regex) || [];
@@ -777,7 +922,7 @@ function analyzeStructure(content, contentType) {
     });
     score -= 5;
   }
-  const hasTableOfContents = /##\s*(?:table of contents|sumário|índice|contents)/i.test(content) || /\[.*\]\(#.*\)/.test(content.slice(0, 500));
+  const hasTableOfContents = /##\s*(?:table of contents|sum\u00E1rio|\u00EDndice|contents)/i.test(content) || /\[.*\]\(#.*\)/.test(content.slice(0, 500));
   if (wordCount > 1500 && !hasTableOfContents) {
     issues.push({
       type: "table_of_contents",
@@ -889,3 +1034,5 @@ export {
   analyzeContent,
   analyzeBadPatterns
 };
+//# debugId=803CE7EA001F6D4064756E2164756E21