@f-o-t/content-analysis 1.0.2 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 FOT (F-O-T)
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,10 @@
1
+ /**
2
+ * Bad Pattern Detection Module
3
+ * Detects problematic content patterns that hurt quality and SEO
4
+ */
5
+ import type { BadPatternResult } from "./plugins/types/index";
6
+ /**
7
+ * Analyze content for bad patterns
8
+ */
9
+ export declare function analyzeBadPatterns(content: string, title?: string): BadPatternResult;
10
+ //# sourceMappingURL=bad-patterns.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"bad-patterns.d.ts","sourceRoot":"","sources":["../src/bad-patterns.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAc,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAG1E;;GAEG;AACH,wBAAgB,kBAAkB,CAC/B,OAAO,EAAE,MAAM,EACf,KAAK,CAAC,EAAE,MAAM,GACd,gBAAgB,CAsOlB"}
@@ -0,0 +1,3 @@
1
+ // @bun
2
+
3
+ //# debugId=C976E1F5BBFB4D3F64756E2164756E21
@@ -0,0 +1,9 @@
1
+ {
2
+ "version": 3,
3
+ "sources": [],
4
+ "sourcesContent": [
5
+ ],
6
+ "mappings": "",
7
+ "debugId": "C976E1F5BBFB4D3F64756E2164756E21",
8
+ "names": []
9
+ }
package/dist/index.d.ts CHANGED
@@ -1,241 +1,46 @@
1
1
  /**
2
- * Content Analysis Types
3
- * All type definitions for SEO, readability, structure, and pattern analysis
4
- */
5
- type SeoIssueType = "title" | "meta_description" | "headings" | "keyword_density" | "content_length" | "readability" | "links" | "images" | "quick_answer" | "first_paragraph" | "heading_keywords" | "structure";
6
- type Severity = "error" | "warning" | "info";
7
- type SeoIssue = {
8
- type: SeoIssueType;
9
- severity: Severity;
10
- message: string;
11
- suggestion: string;
12
- };
13
- type SeoMetrics = {
14
- wordCount: number;
15
- headingCount: number;
16
- paragraphCount: number;
17
- linkCount: number;
18
- imageCount: number;
19
- hasQuickAnswer: boolean;
20
- keywordInFirstParagraph: boolean;
21
- keywordDensity?: Record<string, number>;
22
- };
23
- type SeoResult = {
24
- score: number;
25
- issues: SeoIssue[];
26
- recommendations: string[];
27
- metrics: SeoMetrics;
28
- };
29
- type SeoInput = {
30
- content: string;
31
- title?: string;
32
- metaDescription?: string;
33
- targetKeywords?: string[];
34
- };
35
- type TargetAudience = "general" | "technical" | "academic" | "casual";
36
- type ReadabilityMetrics = {
37
- sentenceCount: number;
38
- wordCount: number;
39
- avgWordsPerSentence: number;
40
- avgSyllablesPerWord: number;
41
- complexWordCount: number;
42
- complexWordPercentage: number;
43
- };
44
- type TargetScore = {
45
- min: number;
46
- max: number;
47
- description: string;
48
- };
49
- type ReadabilityResult = {
50
- fleschKincaidReadingEase: number;
51
- fleschKincaidGradeLevel: number;
52
- readabilityLevel: string;
53
- targetScore: TargetScore;
54
- isOnTarget: boolean;
55
- suggestions: string[];
56
- metrics: ReadabilityMetrics;
57
- };
58
- type ContentType = "how-to" | "comparison" | "explainer" | "listicle" | "general";
59
- type StructureIssue = {
60
- type: string;
61
- severity: Severity;
62
- message: string;
63
- suggestion: string;
64
- };
65
- type ContentStructure = {
66
- hasQuickAnswer: boolean;
67
- headingHierarchyValid: boolean;
68
- avgParagraphLength: number;
69
- hasTableOfContents: boolean;
70
- hasTables: boolean;
71
- hasConclusion: boolean;
72
- headingCount: number;
73
- wordCount: number;
74
- };
75
- type StructureResult = {
76
- score: number;
77
- issues: StructureIssue[];
78
- structure: ContentStructure;
79
- };
80
- type BadPatternType = "word_count_mention" | "word_count_in_title" | "meta_commentary" | "engagement_begging" | "endless_introduction" | "vague_instructions" | "clickbait_markers" | "filler_phrases" | "over_formatting" | "wall_of_text" | "keyword_stuffing";
81
- type BadPattern = {
82
- pattern: string;
83
- severity: "error" | "warning";
84
- locations: string[];
85
- suggestion: string;
86
- };
87
- type BadPatternResult = {
88
- hasIssues: boolean;
89
- issueCount: number;
90
- patterns: BadPattern[];
91
- };
92
- type KeywordLocationType = "title" | "heading" | "paragraph" | "first100words" | "last100words";
93
- type KeywordStatus = "optimal" | "low" | "high" | "missing";
94
- type KeywordLocation = {
95
- type: KeywordLocationType;
96
- index?: number;
97
- };
98
- type KeywordAnalysisItem = {
99
- keyword: string;
100
- count: number;
101
- density: number;
102
- locations: KeywordLocation[];
103
- status: KeywordStatus;
104
- suggestion?: string;
105
- };
106
- type TopKeyword = {
107
- keyword: string;
108
- count: number;
109
- density: number;
110
- };
111
- type KeywordMetrics = {
112
- totalWordCount: number;
113
- uniqueWordCount: number;
114
- avgKeywordDensity: number;
115
- };
116
- type KeywordAnalysisResult = {
117
- analysis: KeywordAnalysisItem[];
118
- overallScore: number;
119
- topKeywords: TopKeyword[];
120
- recommendations: string[];
121
- metrics: KeywordMetrics;
122
- };
123
- type KeywordInput = {
124
- content: string;
125
- title?: string;
126
- targetKeywords: string[];
127
- };
128
- type ContentAnalysisResult = {
129
- seo: SeoResult;
130
- readability: ReadabilityResult;
131
- structure: StructureResult;
132
- badPatterns: BadPatternResult;
133
- keywords: KeywordAnalysisResult | null;
134
- analyzedAt: string;
135
- };
136
- type AnalysisInput = {
137
- content: string;
138
- title?: string;
139
- description?: string;
140
- targetKeywords?: string[];
141
- };
142
- /**
143
- * Analyze content for bad patterns
144
- */
145
- declare function analyzeBadPatterns(content: string, title?: string): BadPatternResult;
146
- /**
147
- * Analyze keyword usage in content
148
- */
149
- declare function analyzeKeywords(input: KeywordInput): KeywordAnalysisResult;
150
- /**
151
- * Analyze content readability
152
- */
153
- declare function analyzeReadability(content: string, targetAudience?: TargetAudience): ReadabilityResult;
154
- /**
155
- * Analyze content for SEO optimization
156
- */
157
- declare function analyzeSeo(input: SeoInput): SeoResult;
158
- /**
159
- * Analyze content structure
160
- */
161
- declare function analyzeStructure(content: string, contentType?: ContentType): StructureResult;
162
- /**
163
- * Shared utility functions for content analysis
164
- */
165
- /**
166
- * Count syllables in a word using a simplified vowel group algorithm
167
- */
168
- declare function countSyllables(word: string): number;
169
- /**
170
- * Calculate Flesch-Kincaid readability metrics
171
- */
172
- declare function calculateFleschKincaid(text: string): {
173
- readingEase: number;
174
- gradeLevel: number;
175
- };
176
- /**
177
- * Convert reading ease score to human-readable level
178
- */
179
- declare function getReadabilityLevel(score: number): string;
180
- /**
181
- * Find all occurrences of a regex pattern with surrounding context
182
- */
183
- declare function findOccurrences(regex: RegExp, text: string): string[];
184
- /**
185
- * Extract words from content
186
- */
187
- declare function extractWords(content: string): string[];
188
- /**
189
- * Extract paragraphs from content
190
- */
191
- declare function extractParagraphs(content: string): string[];
192
- /**
193
- * Extract headings from markdown content
194
- */
195
- declare function extractHeadings(content: string): Array<{
196
- level: number;
197
- text: string;
198
- index: number;
199
- }>;
200
- /**
201
- * Clamp score between 0 and 100
202
- */
203
- declare function clampScore(score: number): number;
204
- /**
205
- * Check if content has a quick answer pattern in the first portion
206
- */
207
- declare function hasQuickAnswerPattern(text: string): boolean;
208
- /**
209
- * Check if content has a conclusion section
210
- */
211
- declare function hasConclusionSection(content: string): boolean;
212
- /**
213
- * Perform a comprehensive content analysis
214
- *
215
- * This function runs all available analyzers and returns a combined result:
216
- * - SEO analysis (title, meta, keywords, structure)
217
- * - Readability analysis (Flesch-Kincaid scores)
218
- * - Structure analysis (headings, paragraphs, quick answers)
219
- * - Bad pattern detection (filler phrases, clickbait, etc.)
220
- * - Keyword analysis (density, placement, recommendations)
221
- *
222
- * @param input - The content and metadata to analyze
223
- * @returns Combined analysis results from all analyzers
224
- *
225
- * @example
226
- * ```typescript
227
- * import { analyzeContent } from '@f-o-t/content-analysis';
228
- *
229
- * const result = analyzeContent({
230
- * content: '## Introduction\n\nThis is my blog post...',
231
- * title: 'My Blog Post Title',
232
- * description: 'A short description for SEO',
233
- * targetKeywords: ['blog', 'tutorial'],
234
- * });
235
- *
236
- * console.log(result.seo.score); // 85
237
- * console.log(result.readability.fleschKincaidReadingEase); // 65.2
238
- * ```
239
- */
240
- declare function analyzeContent(input: AnalysisInput): ContentAnalysisResult;
241
- export { hasQuickAnswerPattern, hasConclusionSection, getReadabilityLevel, findOccurrences, extractWords, extractParagraphs, extractHeadings, countSyllables, clampScore, calculateFleschKincaid, analyzeStructure, analyzeSeo, analyzeReadability, analyzeKeywords, analyzeContent, analyzeBadPatterns, TopKeyword, TargetScore, TargetAudience, StructureResult, StructureIssue, Severity, SeoResult, SeoMetrics, SeoIssueType, SeoIssue, SeoInput, ReadabilityResult, ReadabilityMetrics, KeywordStatus, KeywordMetrics, KeywordLocationType, KeywordLocation, KeywordInput, KeywordAnalysisResult, KeywordAnalysisItem, ContentType, ContentStructure, ContentAnalysisResult, BadPatternType, BadPatternResult, BadPattern, AnalysisInput };
2
+ * Content Analysis Library
3
+ *
4
+ * A comprehensive library for analyzing content quality, SEO optimization,
5
+ * readability, structure, and detecting problematic patterns.
6
+ *
7
+ * @packageDocumentation
8
+ */
9
+ export { analyzeBadPatterns } from "./bad-patterns";
10
+ export { analyzeKeywords } from "./keywords";
11
+ export { analyzeReadability } from "./readability";
12
+ export { analyzeSeo } from "./seo";
13
+ export { analyzeStructure } from "./structure";
14
+ export * from "./plugins/types/index";
15
+ export { calculateFleschKincaid, clampScore, countSyllables, extractHeadings, extractParagraphs, extractWords, findOccurrences, getReadabilityLevel, hasConclusionSection, hasQuickAnswerPattern, } from "./utils";
16
+ import type { AnalysisInput, ContentAnalysisResult } from "./plugins/types/index";
17
+ /**
18
+ * Perform a comprehensive content analysis
19
+ *
20
+ * This function runs all available analyzers and returns a combined result:
21
+ * - SEO analysis (title, meta, keywords, structure)
22
+ * - Readability analysis (Flesch-Kincaid scores)
23
+ * - Structure analysis (headings, paragraphs, quick answers)
24
+ * - Bad pattern detection (filler phrases, clickbait, etc.)
25
+ * - Keyword analysis (density, placement, recommendations)
26
+ *
27
+ * @param input - The content and metadata to analyze
28
+ * @returns Combined analysis results from all analyzers
29
+ *
30
+ * @example
31
+ * ```typescript
32
+ * import { analyzeContent } from '@f-o-t/content-analysis';
33
+ *
34
+ * const result = analyzeContent({
35
+ * content: '## Introduction\n\nThis is my blog post...',
36
+ * title: 'My Blog Post Title',
37
+ * description: 'A short description for SEO',
38
+ * targetKeywords: ['blog', 'tutorial'],
39
+ * });
40
+ *
41
+ * console.log(result.seo.score); // 85
42
+ * console.log(result.readability.fleschKincaidReadingEase); // 65.2
43
+ * ```
44
+ */
45
+ export declare function analyzeContent(input: AnalysisInput): ContentAnalysisResult;
46
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAC;AACpD,OAAO,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAC7C,OAAO,EAAE,kBAAkB,EAAE,MAAM,eAAe,CAAC;AAEnD,OAAO,EAAE,UAAU,EAAE,MAAM,OAAO,CAAC;AACnC,OAAO,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAG/C,cAAc,uBAAuB,CAAC;AAGtC,OAAO,EACJ,sBAAsB,EACtB,UAAU,EACV,cAAc,EACd,eAAe,EACf,iBAAiB,EACjB,YAAY,EACZ,eAAe,EACf,mBAAmB,EACnB,oBAAoB,EACpB,qBAAqB,GACvB,MAAM,SAAS,CAAC;AAQjB,OAAO,KAAK,EAAE,aAAa,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAElF;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AACH,wBAAgB,cAAc,CAAC,KAAK,EAAE,aAAa,GAAG,qBAAqB,CA2B1E"}
package/dist/index.js CHANGED
@@ -1,3 +1,93 @@
1
+ // @bun
2
+ import"./index-9t11m1re.js";
3
+
4
+ // src/markdown.ts
5
+ import { parseToAst } from "@f-o-t/markdown";
6
+ function extractFromMarkdown(content) {
7
+ const ast = parseToAst(content);
8
+ const headings = [];
9
+ const links = [];
10
+ const images = [];
11
+ const paragraphs = [];
12
+ let tables = 0;
13
+ const textParts = [];
14
+ let index = 0;
15
+ const walk = (node) => {
16
+ switch (node.type) {
17
+ case "heading": {
18
+ const text = collectText(node);
19
+ headings.push({ level: node.level, text, index });
20
+ textParts.push(text);
21
+ index += 1;
22
+ break;
23
+ }
24
+ case "paragraph": {
25
+ const text = collectText(node);
26
+ if (text.trim().length > 0) {
27
+ paragraphs.push(text);
28
+ textParts.push(text);
29
+ }
30
+ break;
31
+ }
32
+ case "link": {
33
+ const text = collectText(node);
34
+ links.push({ href: node.url, text });
35
+ break;
36
+ }
37
+ case "image": {
38
+ images.push({ alt: node.alt ?? "", src: node.url });
39
+ break;
40
+ }
41
+ case "codeBlock": {
42
+ return;
43
+ }
44
+ case "codeSpan": {
45
+ return;
46
+ }
47
+ case "table": {
48
+ tables += 1;
49
+ break;
50
+ }
51
+ default:
52
+ break;
53
+ }
54
+ if ("children" in node && Array.isArray(node.children)) {
55
+ for (const child of node.children)
56
+ walk(child);
57
+ }
58
+ };
59
+ for (const node of ast.children) {
60
+ walk(node);
61
+ }
62
+ return {
63
+ text: textParts.join(`
64
+
65
+ `),
66
+ headings,
67
+ links,
68
+ images,
69
+ tables,
70
+ paragraphs
71
+ };
72
+ }
73
+ function collectText(node) {
74
+ const parts = [];
75
+ const walk = (current) => {
76
+ if (current.type === "text") {
77
+ parts.push(current.value);
78
+ }
79
+ if (current.type === "codeSpan") {
80
+ return;
81
+ }
82
+ if ("children" in current && Array.isArray(current.children)) {
83
+ for (const child of current.children)
84
+ walk(child);
85
+ }
86
+ };
87
+ walk(node);
88
+ return parts.join("");
89
+ }
90
+
1
91
  // src/utils.ts
2
92
  function countSyllables(word) {
3
93
  const w = word.toLowerCase();
@@ -45,18 +135,25 @@ function findOccurrences(regex, text) {
45
135
  const matches = [];
46
136
  const flags = regex.flags.includes("g") ? regex.flags : `${regex.flags}g`;
47
137
  const globalRegex = new RegExp(regex.source, flags);
48
- let match;
49
- while ((match = globalRegex.exec(text)) !== null) {
138
+ let match = globalRegex.exec(text);
139
+ while (match) {
50
140
  const start = Math.max(0, match.index - 20);
51
141
  const end = Math.min(text.length, match.index + match[0].length + 20);
52
142
  const context = text.slice(start, end);
53
143
  matches.push(`...${context}...`);
144
+ match = globalRegex.exec(text);
54
145
  }
55
146
  return matches;
56
147
  }
57
148
  function extractWords(content) {
58
149
  return content.split(/\s+/).filter(Boolean);
59
150
  }
151
+ function tokenize(content) {
152
+ const normalized = content.toLowerCase().replace(/[^a-z0-9\s]/g, " ").replace(/\s+/g, " ").trim();
153
+ if (!normalized)
154
+ return [];
155
+ return normalized.split(" ").filter(Boolean);
156
+ }
60
157
  function extractParagraphs(content) {
61
158
  return content.split(/\n\n+/).filter(Boolean);
62
159
  }
@@ -80,7 +177,7 @@ function clampScore(score) {
80
177
  return Math.max(0, Math.min(100, score));
81
178
  }
82
179
  function hasQuickAnswerPattern(text) {
83
- return /\*\*quick\s*answer\*\*|>.*quick.*answer|tl;?dr|em\s+resumo|resumindo/i.test(text) || /^.*?\*\*[^*]+\*\*\s+(?:é|is|are|was|were|significa)\s/im.test(text) || /^\|.*\|.*\|$/m.test(text);
180
+ return /\*\*quick\s*answer\*\*|>.*quick.*answer|tl;?dr|em\s+resumo|resumindo/i.test(text) || /^.*?\*\*[^*]+\*\*\s+(?:\u00E9|is|are|was|were|significa)\s/im.test(text) || /^\|.*\|.*\|$/m.test(text);
84
181
  }
85
182
  function hasConclusionSection(content) {
86
183
  return /##\s*(?:conclus|conclusion|resumo|takeaway|key\s*takeaway|final|wrapping\s*up)/i.test(content);
@@ -129,8 +226,8 @@ function analyzeBadPatterns(content, title) {
129
226
  }
130
227
  }
131
228
  const engagementPatterns = [
132
- /\b(?:não\s+esqueça\s+de|don'?t\s+forget\s+to)\s+(?:curtir|like|subscribe|seguir|compartilhar|share)/gi,
133
- /\b(?:deixe\s+(?:um\s+)?comentário|leave\s+a\s+comment|comment\s+below)/gi,
229
+ /\b(?:n\u00E3o\s+esque\u00E7a\s+de|don'?t\s+forget\s+to)\s+(?:curtir|like|subscribe|seguir|compartilhar|share)/gi,
230
+ /\b(?:deixe\s+(?:um\s+)?coment\u00E1rio|leave\s+a\s+comment|comment\s+below)/gi,
134
231
  /\b(?:inscreva-se|subscribe|sign\s+up)\s+(?:para|to|for)\s+(?:nossa|my|our|the)\s+(?:newsletter|canal|channel)/gi,
135
232
  /\b(?:compartilhe\s+com|share\s+(?:this|with))\s+(?:seus\s+amigos|your\s+friends)/gi,
136
233
  /\bsmash\s+(?:that\s+)?(?:like|subscribe)\s+button\b/gi
@@ -177,7 +274,7 @@ function analyzeBadPatterns(content, title) {
177
274
  }
178
275
  }
179
276
  const clickbaitPatterns = [
180
- /\b(?:you\s+won'?t\s+believe|você\s+não\s+vai\s+acreditar)\b/gi,
277
+ /\b(?:you\s+won'?t\s+believe|voc\u00EA\s+n\u00E3o\s+vai\s+acreditar)\b/gi,
181
278
  /\b(?:this\s+one\s+(?:trick|tip|secret))\b/gi,
182
279
  /\b(?:AMAZING|INCREDIBLE|MIND-?BLOWING)\b/g,
183
280
  /!!+|\?!+|!{3,}/g
@@ -199,8 +296,8 @@ function analyzeBadPatterns(content, title) {
199
296
  /\b(?:at\s+the\s+end\s+of\s+the\s+day|no\s+final\s+das\s+contas)\b/gi,
200
297
  /\b(?:in\s+today'?s\s+(?:digital\s+)?(?:landscape|world|age))\b/gi,
201
298
  /\b(?:(?:as\s+)?a\s+matter\s+of\s+fact)\b/gi,
202
- /\b(?:needless\s+to\s+say|escusado\s+será\s+dizer)\b/gi,
203
- /\b(?:in\s+(?:conclusion|summary)|em\s+(?:conclusão|resumo))(?:\s*[,:])\b/gi
299
+ /\b(?:needless\s+to\s+say|escusado\s+ser\u00E1\s+dizer)\b/gi,
300
+ /\b(?:in\s+(?:conclusion|summary)|em\s+(?:conclus\u00E3o|resumo))(?:\s*[,:])\b/gi
204
301
  ];
205
302
  for (const pattern of fillerPatterns) {
206
303
  const matches = findOccurrences(pattern, content);
@@ -245,7 +342,7 @@ function analyzeBadPatterns(content, title) {
245
342
  const wordsLower = content.toLowerCase();
246
343
  const totalWords = extractWords(content).length;
247
344
  const phraseCount = {};
248
- const tokens = wordsLower.match(/\b[a-záàâãéèêíïóôõöúç]{3,}\b/g) || [];
345
+ const tokens = wordsLower.match(/\b[a-z\u00E1\u00E0\u00E2\u00E3\u00E9\u00E8\u00EA\u00ED\u00EF\u00F3\u00F4\u00F5\u00F6\u00FA\u00E7]{3,}\b/g) || [];
249
346
  for (let i = 0;i < tokens.length - 1; i++) {
250
347
  const bigram = `${tokens[i]} ${tokens[i + 1]}`;
251
348
  phraseCount[bigram] = (phraseCount[bigram] || 0) + 1;
@@ -276,13 +373,13 @@ function analyzeKeywords(input) {
276
373
  const { content, title, targetKeywords } = input;
277
374
  const analysis = [];
278
375
  const recommendations = [];
279
- const words = extractWords(content);
376
+ const extracted = extractFromMarkdown(content);
377
+ const words = extractWords(extracted.text);
280
378
  const totalWordCount = words.length;
281
379
  const uniqueWords = new Set(words.map((w) => w.toLowerCase()));
282
- const contentLower = content.toLowerCase();
380
+ const contentLower = extracted.text.toLowerCase();
283
381
  const titleLower = title?.toLowerCase() || "";
284
- const headings = content.match(/^#{2,6}\s+(.+)$/gm) || [];
285
- const headingsText = headings.join(" ").toLowerCase();
382
+ const headingsText = extracted.headings.map((heading) => heading.text).join(" ").toLowerCase();
286
383
  const first100Words = words.slice(0, 100).join(" ").toLowerCase();
287
384
  const last100Words = words.slice(-100).join(" ").toLowerCase();
288
385
  let totalDensity = 0;
@@ -351,9 +448,9 @@ function analyzeKeywords(input) {
351
448
  if (highKeywords.length > 0) {
352
449
  recommendations.push(`Reduce overused keywords: ${highKeywords.map((k) => k.keyword).join(", ")}`);
353
450
  }
451
+ const tokenList = tokenize(extracted.text);
354
452
  const phraseCount = {};
355
- const tokens = contentLower.match(/\b[a-záàâãéèêíïóôõöúç]{3,}\b/g) || [];
356
- for (const token of tokens) {
453
+ for (const token of tokenList) {
357
454
  phraseCount[token] = (phraseCount[token] || 0) + 1;
358
455
  }
359
456
  const topKeywords = Object.entries(phraseCount).filter(([word]) => word.length > 4 && !["that", "this", "with", "from", "have", "been"].includes(word)).sort(([, a], [, b]) => b - a).slice(0, 10).map(([keyword, count]) => ({
@@ -361,6 +458,52 @@ function analyzeKeywords(input) {
361
458
  count,
362
459
  density: Math.round(count / totalWordCount * 1e4) / 100
363
460
  }));
461
+ const stopwords = new Set([
462
+ "the",
463
+ "and",
464
+ "for",
465
+ "with",
466
+ "that",
467
+ "this",
468
+ "from",
469
+ "have",
470
+ "been",
471
+ "your",
472
+ "you",
473
+ "are",
474
+ "was",
475
+ "were",
476
+ "not",
477
+ "can",
478
+ "will",
479
+ "its",
480
+ "their",
481
+ "about",
482
+ "into",
483
+ "more",
484
+ "than",
485
+ "when",
486
+ "what",
487
+ "which",
488
+ "who",
489
+ "how",
490
+ "why"
491
+ ]);
492
+ const topTerms = Object.entries(phraseCount).filter(([term]) => term.length > 3 && !stopwords.has(term)).sort(([, a], [, b]) => b - a).slice(0, 10).map(([term, count]) => ({
493
+ term,
494
+ count,
495
+ density: Math.round(count / totalWordCount * 1e4) / 100
496
+ }));
497
+ const bigramCount = {};
498
+ for (let index = 0;index < tokenList.length - 1; index += 1) {
499
+ const phrase = `${tokenList[index]} ${tokenList[index + 1]}`;
500
+ bigramCount[phrase] = (bigramCount[phrase] || 0) + 1;
501
+ }
502
+ const topPhrases = Object.entries(bigramCount).filter(([phrase]) => phrase.length > 5).sort(([, a], [, b]) => b - a).slice(0, 10).map(([phrase, count]) => ({
503
+ phrase,
504
+ count,
505
+ density: Math.round(count / totalWordCount * 1e4) / 100
506
+ }));
364
507
  const metrics = {
365
508
  totalWordCount,
366
509
  uniqueWordCount: uniqueWords.size,
@@ -370,6 +513,8 @@ function analyzeKeywords(input) {
370
513
  analysis,
371
514
  overallScore,
372
515
  topKeywords,
516
+ topTerms,
517
+ topPhrases,
373
518
  recommendations,
374
519
  metrics
375
520
  };
@@ -449,15 +594,15 @@ function analyzeSeo(input) {
449
594
  const { content, title, metaDescription, targetKeywords } = input;
450
595
  const issues = [];
451
596
  const recommendations = [];
452
- const words = extractWords(content);
597
+ const extracted = extractFromMarkdown(content);
598
+ const words = extractWords(extracted.text);
453
599
  const wordCount = words.length;
454
- const paragraphs = extractParagraphs(content);
455
- const headings = content.match(/^#{1,6}\s.+$/gm) || [];
456
- const h2Headings = content.match(/^##\s.+$/gm) || [];
457
- const links = content.match(/\[.+?\]\(.+?\)/g) || [];
458
- const images = content.match(/!\[.+?\]\(.+?\)/g) || [];
459
- const firstH2Index = content.search(/^##\s/m);
460
- const firstParagraphText = firstH2Index > 0 ? content.slice(0, firstH2Index) : words.slice(0, 100).join(" ");
600
+ const paragraphs = extracted.paragraphs;
601
+ const headings = extracted.headings;
602
+ const h2Headings = headings.filter((heading) => heading.level === 2);
603
+ const links = extracted.links;
604
+ const images = extracted.images;
605
+ const firstParagraphText = paragraphs.length > 0 ? paragraphs[0] ?? "" : words.slice(0, 100).join(" ");
461
606
  let score = 100;
462
607
  if (!title) {
463
608
  issues.push({
@@ -539,7 +684,7 @@ function analyzeSeo(input) {
539
684
  });
540
685
  score -= 5;
541
686
  }
542
- const h1Headings = content.match(/^#\s.+$/gm) || [];
687
+ const h1Headings = headings.filter((heading) => heading.level === 1);
543
688
  if (h1Headings.length > 0) {
544
689
  issues.push({
545
690
  type: "headings",
@@ -550,8 +695,8 @@ function analyzeSeo(input) {
550
695
  score -= 10;
551
696
  }
552
697
  if (targetKeywords && targetKeywords.length > 0 && h2Headings.length > 0) {
553
- const h2Text = h2Headings.join(" ").toLowerCase();
554
- const hasKeywordInH2 = targetKeywords.some((kw) => h2Text.includes(kw.toLowerCase()));
698
+ const h2Text = h2Headings.map((heading) => heading.text).join(" ");
699
+ const hasKeywordInH2 = targetKeywords.some((kw) => h2Text.toLowerCase().includes(kw.toLowerCase()));
555
700
  if (!hasKeywordInH2) {
556
701
  issues.push({
557
702
  type: "heading_keywords",
@@ -631,7 +776,7 @@ function analyzeSeo(input) {
631
776
  }
632
777
  const keywordDensity = {};
633
778
  if (targetKeywords && targetKeywords.length > 0) {
634
- const contentLower = content.toLowerCase();
779
+ const contentLower = extracted.text.toLowerCase();
635
780
  for (const keyword of targetKeywords) {
636
781
  const regex = new RegExp(keyword.toLowerCase(), "gi");
637
782
  const matches = contentLower.match(regex) || [];
@@ -777,7 +922,7 @@ function analyzeStructure(content, contentType) {
777
922
  });
778
923
  score -= 5;
779
924
  }
780
- const hasTableOfContents = /##\s*(?:table of contents|sumário|índice|contents)/i.test(content) || /\[.*\]\(#.*\)/.test(content.slice(0, 500));
925
+ const hasTableOfContents = /##\s*(?:table of contents|sum\u00E1rio|\u00EDndice|contents)/i.test(content) || /\[.*\]\(#.*\)/.test(content.slice(0, 500));
781
926
  if (wordCount > 1500 && !hasTableOfContents) {
782
927
  issues.push({
783
928
  type: "table_of_contents",
@@ -889,3 +1034,5 @@ export {
889
1034
  analyzeContent,
890
1035
  analyzeBadPatterns
891
1036
  };
1037
+
1038
+ //# debugId=803CE7EA001F6D4064756E2164756E21