@f-o-t/content-analysis 1.0.2 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/bad-patterns.d.ts +10 -0
- package/dist/bad-patterns.d.ts.map +1 -0
- package/dist/index-9t11m1re.js +3 -0
- package/dist/index-9t11m1re.js.map +9 -0
- package/dist/index.d.ts +45 -240
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +175 -28
- package/dist/index.js.map +17 -0
- package/dist/keywords.d.ts +10 -0
- package/dist/keywords.d.ts.map +1 -0
- package/dist/markdown.d.ts +20 -0
- package/dist/markdown.d.ts.map +1 -0
- package/dist/plugins/types/index.d.ts +154 -0
- package/dist/plugins/types/index.d.ts.map +1 -0
- package/dist/plugins/types/index.js +4 -0
- package/dist/plugins/types/index.js.map +9 -0
- package/dist/readability.d.ts +10 -0
- package/dist/readability.d.ts.map +1 -0
- package/dist/seo.d.ts +10 -0
- package/dist/seo.d.ts.map +1 -0
- package/dist/structure.d.ts +10 -0
- package/dist/structure.d.ts.map +1 -0
- package/dist/utils.d.ts +59 -0
- package/dist/utils.d.ts.map +1 -0
- package/package.json +23 -51
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 FOT (F-O-T)
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Bad Pattern Detection Module
|
|
3
|
+
* Detects problematic content patterns that hurt quality and SEO
|
|
4
|
+
*/
|
|
5
|
+
import type { BadPatternResult } from "./plugins/types/index";
|
|
6
|
+
/**
|
|
7
|
+
* Analyze content for bad patterns
|
|
8
|
+
*/
|
|
9
|
+
export declare function analyzeBadPatterns(content: string, title?: string): BadPatternResult;
|
|
10
|
+
//# sourceMappingURL=bad-patterns.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bad-patterns.d.ts","sourceRoot":"","sources":["../src/bad-patterns.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAc,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAG1E;;GAEG;AACH,wBAAgB,kBAAkB,CAC/B,OAAO,EAAE,MAAM,EACf,KAAK,CAAC,EAAE,MAAM,GACd,gBAAgB,CAsOlB"}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,241 +1,46 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Content Analysis
|
|
3
|
-
*
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
};
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
description: string;
|
|
48
|
-
};
|
|
49
|
-
type ReadabilityResult = {
|
|
50
|
-
fleschKincaidReadingEase: number;
|
|
51
|
-
fleschKincaidGradeLevel: number;
|
|
52
|
-
readabilityLevel: string;
|
|
53
|
-
targetScore: TargetScore;
|
|
54
|
-
isOnTarget: boolean;
|
|
55
|
-
suggestions: string[];
|
|
56
|
-
metrics: ReadabilityMetrics;
|
|
57
|
-
};
|
|
58
|
-
type ContentType = "how-to" | "comparison" | "explainer" | "listicle" | "general";
|
|
59
|
-
type StructureIssue = {
|
|
60
|
-
type: string;
|
|
61
|
-
severity: Severity;
|
|
62
|
-
message: string;
|
|
63
|
-
suggestion: string;
|
|
64
|
-
};
|
|
65
|
-
type ContentStructure = {
|
|
66
|
-
hasQuickAnswer: boolean;
|
|
67
|
-
headingHierarchyValid: boolean;
|
|
68
|
-
avgParagraphLength: number;
|
|
69
|
-
hasTableOfContents: boolean;
|
|
70
|
-
hasTables: boolean;
|
|
71
|
-
hasConclusion: boolean;
|
|
72
|
-
headingCount: number;
|
|
73
|
-
wordCount: number;
|
|
74
|
-
};
|
|
75
|
-
type StructureResult = {
|
|
76
|
-
score: number;
|
|
77
|
-
issues: StructureIssue[];
|
|
78
|
-
structure: ContentStructure;
|
|
79
|
-
};
|
|
80
|
-
type BadPatternType = "word_count_mention" | "word_count_in_title" | "meta_commentary" | "engagement_begging" | "endless_introduction" | "vague_instructions" | "clickbait_markers" | "filler_phrases" | "over_formatting" | "wall_of_text" | "keyword_stuffing";
|
|
81
|
-
type BadPattern = {
|
|
82
|
-
pattern: string;
|
|
83
|
-
severity: "error" | "warning";
|
|
84
|
-
locations: string[];
|
|
85
|
-
suggestion: string;
|
|
86
|
-
};
|
|
87
|
-
type BadPatternResult = {
|
|
88
|
-
hasIssues: boolean;
|
|
89
|
-
issueCount: number;
|
|
90
|
-
patterns: BadPattern[];
|
|
91
|
-
};
|
|
92
|
-
type KeywordLocationType = "title" | "heading" | "paragraph" | "first100words" | "last100words";
|
|
93
|
-
type KeywordStatus = "optimal" | "low" | "high" | "missing";
|
|
94
|
-
type KeywordLocation = {
|
|
95
|
-
type: KeywordLocationType;
|
|
96
|
-
index?: number;
|
|
97
|
-
};
|
|
98
|
-
type KeywordAnalysisItem = {
|
|
99
|
-
keyword: string;
|
|
100
|
-
count: number;
|
|
101
|
-
density: number;
|
|
102
|
-
locations: KeywordLocation[];
|
|
103
|
-
status: KeywordStatus;
|
|
104
|
-
suggestion?: string;
|
|
105
|
-
};
|
|
106
|
-
type TopKeyword = {
|
|
107
|
-
keyword: string;
|
|
108
|
-
count: number;
|
|
109
|
-
density: number;
|
|
110
|
-
};
|
|
111
|
-
type KeywordMetrics = {
|
|
112
|
-
totalWordCount: number;
|
|
113
|
-
uniqueWordCount: number;
|
|
114
|
-
avgKeywordDensity: number;
|
|
115
|
-
};
|
|
116
|
-
type KeywordAnalysisResult = {
|
|
117
|
-
analysis: KeywordAnalysisItem[];
|
|
118
|
-
overallScore: number;
|
|
119
|
-
topKeywords: TopKeyword[];
|
|
120
|
-
recommendations: string[];
|
|
121
|
-
metrics: KeywordMetrics;
|
|
122
|
-
};
|
|
123
|
-
type KeywordInput = {
|
|
124
|
-
content: string;
|
|
125
|
-
title?: string;
|
|
126
|
-
targetKeywords: string[];
|
|
127
|
-
};
|
|
128
|
-
type ContentAnalysisResult = {
|
|
129
|
-
seo: SeoResult;
|
|
130
|
-
readability: ReadabilityResult;
|
|
131
|
-
structure: StructureResult;
|
|
132
|
-
badPatterns: BadPatternResult;
|
|
133
|
-
keywords: KeywordAnalysisResult | null;
|
|
134
|
-
analyzedAt: string;
|
|
135
|
-
};
|
|
136
|
-
type AnalysisInput = {
|
|
137
|
-
content: string;
|
|
138
|
-
title?: string;
|
|
139
|
-
description?: string;
|
|
140
|
-
targetKeywords?: string[];
|
|
141
|
-
};
|
|
142
|
-
/**
|
|
143
|
-
* Analyze content for bad patterns
|
|
144
|
-
*/
|
|
145
|
-
declare function analyzeBadPatterns(content: string, title?: string): BadPatternResult;
|
|
146
|
-
/**
|
|
147
|
-
* Analyze keyword usage in content
|
|
148
|
-
*/
|
|
149
|
-
declare function analyzeKeywords(input: KeywordInput): KeywordAnalysisResult;
|
|
150
|
-
/**
|
|
151
|
-
* Analyze content readability
|
|
152
|
-
*/
|
|
153
|
-
declare function analyzeReadability(content: string, targetAudience?: TargetAudience): ReadabilityResult;
|
|
154
|
-
/**
|
|
155
|
-
* Analyze content for SEO optimization
|
|
156
|
-
*/
|
|
157
|
-
declare function analyzeSeo(input: SeoInput): SeoResult;
|
|
158
|
-
/**
|
|
159
|
-
* Analyze content structure
|
|
160
|
-
*/
|
|
161
|
-
declare function analyzeStructure(content: string, contentType?: ContentType): StructureResult;
|
|
162
|
-
/**
|
|
163
|
-
* Shared utility functions for content analysis
|
|
164
|
-
*/
|
|
165
|
-
/**
|
|
166
|
-
* Count syllables in a word using a simplified vowel group algorithm
|
|
167
|
-
*/
|
|
168
|
-
declare function countSyllables(word: string): number;
|
|
169
|
-
/**
|
|
170
|
-
* Calculate Flesch-Kincaid readability metrics
|
|
171
|
-
*/
|
|
172
|
-
declare function calculateFleschKincaid(text: string): {
|
|
173
|
-
readingEase: number;
|
|
174
|
-
gradeLevel: number;
|
|
175
|
-
};
|
|
176
|
-
/**
|
|
177
|
-
* Convert reading ease score to human-readable level
|
|
178
|
-
*/
|
|
179
|
-
declare function getReadabilityLevel(score: number): string;
|
|
180
|
-
/**
|
|
181
|
-
* Find all occurrences of a regex pattern with surrounding context
|
|
182
|
-
*/
|
|
183
|
-
declare function findOccurrences(regex: RegExp, text: string): string[];
|
|
184
|
-
/**
|
|
185
|
-
* Extract words from content
|
|
186
|
-
*/
|
|
187
|
-
declare function extractWords(content: string): string[];
|
|
188
|
-
/**
|
|
189
|
-
* Extract paragraphs from content
|
|
190
|
-
*/
|
|
191
|
-
declare function extractParagraphs(content: string): string[];
|
|
192
|
-
/**
|
|
193
|
-
* Extract headings from markdown content
|
|
194
|
-
*/
|
|
195
|
-
declare function extractHeadings(content: string): Array<{
|
|
196
|
-
level: number;
|
|
197
|
-
text: string;
|
|
198
|
-
index: number;
|
|
199
|
-
}>;
|
|
200
|
-
/**
|
|
201
|
-
* Clamp score between 0 and 100
|
|
202
|
-
*/
|
|
203
|
-
declare function clampScore(score: number): number;
|
|
204
|
-
/**
|
|
205
|
-
* Check if content has a quick answer pattern in the first portion
|
|
206
|
-
*/
|
|
207
|
-
declare function hasQuickAnswerPattern(text: string): boolean;
|
|
208
|
-
/**
|
|
209
|
-
* Check if content has a conclusion section
|
|
210
|
-
*/
|
|
211
|
-
declare function hasConclusionSection(content: string): boolean;
|
|
212
|
-
/**
|
|
213
|
-
* Perform a comprehensive content analysis
|
|
214
|
-
*
|
|
215
|
-
* This function runs all available analyzers and returns a combined result:
|
|
216
|
-
* - SEO analysis (title, meta, keywords, structure)
|
|
217
|
-
* - Readability analysis (Flesch-Kincaid scores)
|
|
218
|
-
* - Structure analysis (headings, paragraphs, quick answers)
|
|
219
|
-
* - Bad pattern detection (filler phrases, clickbait, etc.)
|
|
220
|
-
* - Keyword analysis (density, placement, recommendations)
|
|
221
|
-
*
|
|
222
|
-
* @param input - The content and metadata to analyze
|
|
223
|
-
* @returns Combined analysis results from all analyzers
|
|
224
|
-
*
|
|
225
|
-
* @example
|
|
226
|
-
* ```typescript
|
|
227
|
-
* import { analyzeContent } from '@f-o-t/content-analysis';
|
|
228
|
-
*
|
|
229
|
-
* const result = analyzeContent({
|
|
230
|
-
* content: '## Introduction\n\nThis is my blog post...',
|
|
231
|
-
* title: 'My Blog Post Title',
|
|
232
|
-
* description: 'A short description for SEO',
|
|
233
|
-
* targetKeywords: ['blog', 'tutorial'],
|
|
234
|
-
* });
|
|
235
|
-
*
|
|
236
|
-
* console.log(result.seo.score); // 85
|
|
237
|
-
* console.log(result.readability.fleschKincaidReadingEase); // 65.2
|
|
238
|
-
* ```
|
|
239
|
-
*/
|
|
240
|
-
declare function analyzeContent(input: AnalysisInput): ContentAnalysisResult;
|
|
241
|
-
export { hasQuickAnswerPattern, hasConclusionSection, getReadabilityLevel, findOccurrences, extractWords, extractParagraphs, extractHeadings, countSyllables, clampScore, calculateFleschKincaid, analyzeStructure, analyzeSeo, analyzeReadability, analyzeKeywords, analyzeContent, analyzeBadPatterns, TopKeyword, TargetScore, TargetAudience, StructureResult, StructureIssue, Severity, SeoResult, SeoMetrics, SeoIssueType, SeoIssue, SeoInput, ReadabilityResult, ReadabilityMetrics, KeywordStatus, KeywordMetrics, KeywordLocationType, KeywordLocation, KeywordInput, KeywordAnalysisResult, KeywordAnalysisItem, ContentType, ContentStructure, ContentAnalysisResult, BadPatternType, BadPatternResult, BadPattern, AnalysisInput };
|
|
2
|
+
* Content Analysis Library
|
|
3
|
+
*
|
|
4
|
+
* A comprehensive library for analyzing content quality, SEO optimization,
|
|
5
|
+
* readability, structure, and detecting problematic patterns.
|
|
6
|
+
*
|
|
7
|
+
* @packageDocumentation
|
|
8
|
+
*/
|
|
9
|
+
export { analyzeBadPatterns } from "./bad-patterns";
|
|
10
|
+
export { analyzeKeywords } from "./keywords";
|
|
11
|
+
export { analyzeReadability } from "./readability";
|
|
12
|
+
export { analyzeSeo } from "./seo";
|
|
13
|
+
export { analyzeStructure } from "./structure";
|
|
14
|
+
export * from "./plugins/types/index";
|
|
15
|
+
export { calculateFleschKincaid, clampScore, countSyllables, extractHeadings, extractParagraphs, extractWords, findOccurrences, getReadabilityLevel, hasConclusionSection, hasQuickAnswerPattern, } from "./utils";
|
|
16
|
+
import type { AnalysisInput, ContentAnalysisResult } from "./plugins/types/index";
|
|
17
|
+
/**
|
|
18
|
+
* Perform a comprehensive content analysis
|
|
19
|
+
*
|
|
20
|
+
* This function runs all available analyzers and returns a combined result:
|
|
21
|
+
* - SEO analysis (title, meta, keywords, structure)
|
|
22
|
+
* - Readability analysis (Flesch-Kincaid scores)
|
|
23
|
+
* - Structure analysis (headings, paragraphs, quick answers)
|
|
24
|
+
* - Bad pattern detection (filler phrases, clickbait, etc.)
|
|
25
|
+
* - Keyword analysis (density, placement, recommendations)
|
|
26
|
+
*
|
|
27
|
+
* @param input - The content and metadata to analyze
|
|
28
|
+
* @returns Combined analysis results from all analyzers
|
|
29
|
+
*
|
|
30
|
+
* @example
|
|
31
|
+
* ```typescript
|
|
32
|
+
* import { analyzeContent } from '@f-o-t/content-analysis';
|
|
33
|
+
*
|
|
34
|
+
* const result = analyzeContent({
|
|
35
|
+
* content: '## Introduction\n\nThis is my blog post...',
|
|
36
|
+
* title: 'My Blog Post Title',
|
|
37
|
+
* description: 'A short description for SEO',
|
|
38
|
+
* targetKeywords: ['blog', 'tutorial'],
|
|
39
|
+
* });
|
|
40
|
+
*
|
|
41
|
+
* console.log(result.seo.score); // 85
|
|
42
|
+
* console.log(result.readability.fleschKincaidReadingEase); // 65.2
|
|
43
|
+
* ```
|
|
44
|
+
*/
|
|
45
|
+
export declare function analyzeContent(input: AnalysisInput): ContentAnalysisResult;
|
|
46
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAC;AACpD,OAAO,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAC7C,OAAO,EAAE,kBAAkB,EAAE,MAAM,eAAe,CAAC;AAEnD,OAAO,EAAE,UAAU,EAAE,MAAM,OAAO,CAAC;AACnC,OAAO,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAG/C,cAAc,uBAAuB,CAAC;AAGtC,OAAO,EACJ,sBAAsB,EACtB,UAAU,EACV,cAAc,EACd,eAAe,EACf,iBAAiB,EACjB,YAAY,EACZ,eAAe,EACf,mBAAmB,EACnB,oBAAoB,EACpB,qBAAqB,GACvB,MAAM,SAAS,CAAC;AAQjB,OAAO,KAAK,EAAE,aAAa,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAElF;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AACH,wBAAgB,cAAc,CAAC,KAAK,EAAE,aAAa,GAAG,qBAAqB,CA2B1E"}
|
package/dist/index.js
CHANGED
|
@@ -1,3 +1,93 @@
|
|
|
1
|
+
// @bun
|
|
2
|
+
import"./index-9t11m1re.js";
|
|
3
|
+
|
|
4
|
+
// src/markdown.ts
|
|
5
|
+
import { parseToAst } from "@f-o-t/markdown";
|
|
6
|
+
function extractFromMarkdown(content) {
|
|
7
|
+
const ast = parseToAst(content);
|
|
8
|
+
const headings = [];
|
|
9
|
+
const links = [];
|
|
10
|
+
const images = [];
|
|
11
|
+
const paragraphs = [];
|
|
12
|
+
let tables = 0;
|
|
13
|
+
const textParts = [];
|
|
14
|
+
let index = 0;
|
|
15
|
+
const walk = (node) => {
|
|
16
|
+
switch (node.type) {
|
|
17
|
+
case "heading": {
|
|
18
|
+
const text = collectText(node);
|
|
19
|
+
headings.push({ level: node.level, text, index });
|
|
20
|
+
textParts.push(text);
|
|
21
|
+
index += 1;
|
|
22
|
+
break;
|
|
23
|
+
}
|
|
24
|
+
case "paragraph": {
|
|
25
|
+
const text = collectText(node);
|
|
26
|
+
if (text.trim().length > 0) {
|
|
27
|
+
paragraphs.push(text);
|
|
28
|
+
textParts.push(text);
|
|
29
|
+
}
|
|
30
|
+
break;
|
|
31
|
+
}
|
|
32
|
+
case "link": {
|
|
33
|
+
const text = collectText(node);
|
|
34
|
+
links.push({ href: node.url, text });
|
|
35
|
+
break;
|
|
36
|
+
}
|
|
37
|
+
case "image": {
|
|
38
|
+
images.push({ alt: node.alt ?? "", src: node.url });
|
|
39
|
+
break;
|
|
40
|
+
}
|
|
41
|
+
case "codeBlock": {
|
|
42
|
+
return;
|
|
43
|
+
}
|
|
44
|
+
case "codeSpan": {
|
|
45
|
+
return;
|
|
46
|
+
}
|
|
47
|
+
case "table": {
|
|
48
|
+
tables += 1;
|
|
49
|
+
break;
|
|
50
|
+
}
|
|
51
|
+
default:
|
|
52
|
+
break;
|
|
53
|
+
}
|
|
54
|
+
if ("children" in node && Array.isArray(node.children)) {
|
|
55
|
+
for (const child of node.children)
|
|
56
|
+
walk(child);
|
|
57
|
+
}
|
|
58
|
+
};
|
|
59
|
+
for (const node of ast.children) {
|
|
60
|
+
walk(node);
|
|
61
|
+
}
|
|
62
|
+
return {
|
|
63
|
+
text: textParts.join(`
|
|
64
|
+
|
|
65
|
+
`),
|
|
66
|
+
headings,
|
|
67
|
+
links,
|
|
68
|
+
images,
|
|
69
|
+
tables,
|
|
70
|
+
paragraphs
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
function collectText(node) {
|
|
74
|
+
const parts = [];
|
|
75
|
+
const walk = (current) => {
|
|
76
|
+
if (current.type === "text") {
|
|
77
|
+
parts.push(current.value);
|
|
78
|
+
}
|
|
79
|
+
if (current.type === "codeSpan") {
|
|
80
|
+
return;
|
|
81
|
+
}
|
|
82
|
+
if ("children" in current && Array.isArray(current.children)) {
|
|
83
|
+
for (const child of current.children)
|
|
84
|
+
walk(child);
|
|
85
|
+
}
|
|
86
|
+
};
|
|
87
|
+
walk(node);
|
|
88
|
+
return parts.join("");
|
|
89
|
+
}
|
|
90
|
+
|
|
1
91
|
// src/utils.ts
|
|
2
92
|
function countSyllables(word) {
|
|
3
93
|
const w = word.toLowerCase();
|
|
@@ -45,18 +135,25 @@ function findOccurrences(regex, text) {
|
|
|
45
135
|
const matches = [];
|
|
46
136
|
const flags = regex.flags.includes("g") ? regex.flags : `${regex.flags}g`;
|
|
47
137
|
const globalRegex = new RegExp(regex.source, flags);
|
|
48
|
-
let match;
|
|
49
|
-
while (
|
|
138
|
+
let match = globalRegex.exec(text);
|
|
139
|
+
while (match) {
|
|
50
140
|
const start = Math.max(0, match.index - 20);
|
|
51
141
|
const end = Math.min(text.length, match.index + match[0].length + 20);
|
|
52
142
|
const context = text.slice(start, end);
|
|
53
143
|
matches.push(`...${context}...`);
|
|
144
|
+
match = globalRegex.exec(text);
|
|
54
145
|
}
|
|
55
146
|
return matches;
|
|
56
147
|
}
|
|
57
148
|
function extractWords(content) {
|
|
58
149
|
return content.split(/\s+/).filter(Boolean);
|
|
59
150
|
}
|
|
151
|
+
function tokenize(content) {
|
|
152
|
+
const normalized = content.toLowerCase().replace(/[^a-z0-9\s]/g, " ").replace(/\s+/g, " ").trim();
|
|
153
|
+
if (!normalized)
|
|
154
|
+
return [];
|
|
155
|
+
return normalized.split(" ").filter(Boolean);
|
|
156
|
+
}
|
|
60
157
|
function extractParagraphs(content) {
|
|
61
158
|
return content.split(/\n\n+/).filter(Boolean);
|
|
62
159
|
}
|
|
@@ -80,7 +177,7 @@ function clampScore(score) {
|
|
|
80
177
|
return Math.max(0, Math.min(100, score));
|
|
81
178
|
}
|
|
82
179
|
function hasQuickAnswerPattern(text) {
|
|
83
|
-
return /\*\*quick\s*answer\*\*|>.*quick.*answer|tl;?dr|em\s+resumo|resumindo/i.test(text) || /^.*?\*\*[^*]+\*\*\s+(
|
|
180
|
+
return /\*\*quick\s*answer\*\*|>.*quick.*answer|tl;?dr|em\s+resumo|resumindo/i.test(text) || /^.*?\*\*[^*]+\*\*\s+(?:\u00E9|is|are|was|were|significa)\s/im.test(text) || /^\|.*\|.*\|$/m.test(text);
|
|
84
181
|
}
|
|
85
182
|
function hasConclusionSection(content) {
|
|
86
183
|
return /##\s*(?:conclus|conclusion|resumo|takeaway|key\s*takeaway|final|wrapping\s*up)/i.test(content);
|
|
@@ -129,8 +226,8 @@ function analyzeBadPatterns(content, title) {
|
|
|
129
226
|
}
|
|
130
227
|
}
|
|
131
228
|
const engagementPatterns = [
|
|
132
|
-
/\b(?:
|
|
133
|
-
/\b(?:deixe\s+(?:um\s+)?
|
|
229
|
+
/\b(?:n\u00E3o\s+esque\u00E7a\s+de|don'?t\s+forget\s+to)\s+(?:curtir|like|subscribe|seguir|compartilhar|share)/gi,
|
|
230
|
+
/\b(?:deixe\s+(?:um\s+)?coment\u00E1rio|leave\s+a\s+comment|comment\s+below)/gi,
|
|
134
231
|
/\b(?:inscreva-se|subscribe|sign\s+up)\s+(?:para|to|for)\s+(?:nossa|my|our|the)\s+(?:newsletter|canal|channel)/gi,
|
|
135
232
|
/\b(?:compartilhe\s+com|share\s+(?:this|with))\s+(?:seus\s+amigos|your\s+friends)/gi,
|
|
136
233
|
/\bsmash\s+(?:that\s+)?(?:like|subscribe)\s+button\b/gi
|
|
@@ -177,7 +274,7 @@ function analyzeBadPatterns(content, title) {
|
|
|
177
274
|
}
|
|
178
275
|
}
|
|
179
276
|
const clickbaitPatterns = [
|
|
180
|
-
/\b(?:you\s+won'?t\s+believe|voc
|
|
277
|
+
/\b(?:you\s+won'?t\s+believe|voc\u00EA\s+n\u00E3o\s+vai\s+acreditar)\b/gi,
|
|
181
278
|
/\b(?:this\s+one\s+(?:trick|tip|secret))\b/gi,
|
|
182
279
|
/\b(?:AMAZING|INCREDIBLE|MIND-?BLOWING)\b/g,
|
|
183
280
|
/!!+|\?!+|!{3,}/g
|
|
@@ -199,8 +296,8 @@ function analyzeBadPatterns(content, title) {
|
|
|
199
296
|
/\b(?:at\s+the\s+end\s+of\s+the\s+day|no\s+final\s+das\s+contas)\b/gi,
|
|
200
297
|
/\b(?:in\s+today'?s\s+(?:digital\s+)?(?:landscape|world|age))\b/gi,
|
|
201
298
|
/\b(?:(?:as\s+)?a\s+matter\s+of\s+fact)\b/gi,
|
|
202
|
-
/\b(?:needless\s+to\s+say|escusado\s+ser
|
|
203
|
-
/\b(?:in\s+(?:conclusion|summary)|em\s+(?:
|
|
299
|
+
/\b(?:needless\s+to\s+say|escusado\s+ser\u00E1\s+dizer)\b/gi,
|
|
300
|
+
/\b(?:in\s+(?:conclusion|summary)|em\s+(?:conclus\u00E3o|resumo))(?:\s*[,:])\b/gi
|
|
204
301
|
];
|
|
205
302
|
for (const pattern of fillerPatterns) {
|
|
206
303
|
const matches = findOccurrences(pattern, content);
|
|
@@ -245,7 +342,7 @@ function analyzeBadPatterns(content, title) {
|
|
|
245
342
|
const wordsLower = content.toLowerCase();
|
|
246
343
|
const totalWords = extractWords(content).length;
|
|
247
344
|
const phraseCount = {};
|
|
248
|
-
const tokens = wordsLower.match(/\b[a-
|
|
345
|
+
const tokens = wordsLower.match(/\b[a-z\u00E1\u00E0\u00E2\u00E3\u00E9\u00E8\u00EA\u00ED\u00EF\u00F3\u00F4\u00F5\u00F6\u00FA\u00E7]{3,}\b/g) || [];
|
|
249
346
|
for (let i = 0;i < tokens.length - 1; i++) {
|
|
250
347
|
const bigram = `${tokens[i]} ${tokens[i + 1]}`;
|
|
251
348
|
phraseCount[bigram] = (phraseCount[bigram] || 0) + 1;
|
|
@@ -276,13 +373,13 @@ function analyzeKeywords(input) {
|
|
|
276
373
|
const { content, title, targetKeywords } = input;
|
|
277
374
|
const analysis = [];
|
|
278
375
|
const recommendations = [];
|
|
279
|
-
const
|
|
376
|
+
const extracted = extractFromMarkdown(content);
|
|
377
|
+
const words = extractWords(extracted.text);
|
|
280
378
|
const totalWordCount = words.length;
|
|
281
379
|
const uniqueWords = new Set(words.map((w) => w.toLowerCase()));
|
|
282
|
-
const contentLower =
|
|
380
|
+
const contentLower = extracted.text.toLowerCase();
|
|
283
381
|
const titleLower = title?.toLowerCase() || "";
|
|
284
|
-
const
|
|
285
|
-
const headingsText = headings.join(" ").toLowerCase();
|
|
382
|
+
const headingsText = extracted.headings.map((heading) => heading.text).join(" ").toLowerCase();
|
|
286
383
|
const first100Words = words.slice(0, 100).join(" ").toLowerCase();
|
|
287
384
|
const last100Words = words.slice(-100).join(" ").toLowerCase();
|
|
288
385
|
let totalDensity = 0;
|
|
@@ -351,9 +448,9 @@ function analyzeKeywords(input) {
|
|
|
351
448
|
if (highKeywords.length > 0) {
|
|
352
449
|
recommendations.push(`Reduce overused keywords: ${highKeywords.map((k) => k.keyword).join(", ")}`);
|
|
353
450
|
}
|
|
451
|
+
const tokenList = tokenize(extracted.text);
|
|
354
452
|
const phraseCount = {};
|
|
355
|
-
const
|
|
356
|
-
for (const token of tokens) {
|
|
453
|
+
for (const token of tokenList) {
|
|
357
454
|
phraseCount[token] = (phraseCount[token] || 0) + 1;
|
|
358
455
|
}
|
|
359
456
|
const topKeywords = Object.entries(phraseCount).filter(([word]) => word.length > 4 && !["that", "this", "with", "from", "have", "been"].includes(word)).sort(([, a], [, b]) => b - a).slice(0, 10).map(([keyword, count]) => ({
|
|
@@ -361,6 +458,52 @@ function analyzeKeywords(input) {
|
|
|
361
458
|
count,
|
|
362
459
|
density: Math.round(count / totalWordCount * 1e4) / 100
|
|
363
460
|
}));
|
|
461
|
+
const stopwords = new Set([
|
|
462
|
+
"the",
|
|
463
|
+
"and",
|
|
464
|
+
"for",
|
|
465
|
+
"with",
|
|
466
|
+
"that",
|
|
467
|
+
"this",
|
|
468
|
+
"from",
|
|
469
|
+
"have",
|
|
470
|
+
"been",
|
|
471
|
+
"your",
|
|
472
|
+
"you",
|
|
473
|
+
"are",
|
|
474
|
+
"was",
|
|
475
|
+
"were",
|
|
476
|
+
"not",
|
|
477
|
+
"can",
|
|
478
|
+
"will",
|
|
479
|
+
"its",
|
|
480
|
+
"their",
|
|
481
|
+
"about",
|
|
482
|
+
"into",
|
|
483
|
+
"more",
|
|
484
|
+
"than",
|
|
485
|
+
"when",
|
|
486
|
+
"what",
|
|
487
|
+
"which",
|
|
488
|
+
"who",
|
|
489
|
+
"how",
|
|
490
|
+
"why"
|
|
491
|
+
]);
|
|
492
|
+
const topTerms = Object.entries(phraseCount).filter(([term]) => term.length > 3 && !stopwords.has(term)).sort(([, a], [, b]) => b - a).slice(0, 10).map(([term, count]) => ({
|
|
493
|
+
term,
|
|
494
|
+
count,
|
|
495
|
+
density: Math.round(count / totalWordCount * 1e4) / 100
|
|
496
|
+
}));
|
|
497
|
+
const bigramCount = {};
|
|
498
|
+
for (let index = 0;index < tokenList.length - 1; index += 1) {
|
|
499
|
+
const phrase = `${tokenList[index]} ${tokenList[index + 1]}`;
|
|
500
|
+
bigramCount[phrase] = (bigramCount[phrase] || 0) + 1;
|
|
501
|
+
}
|
|
502
|
+
const topPhrases = Object.entries(bigramCount).filter(([phrase]) => phrase.length > 5).sort(([, a], [, b]) => b - a).slice(0, 10).map(([phrase, count]) => ({
|
|
503
|
+
phrase,
|
|
504
|
+
count,
|
|
505
|
+
density: Math.round(count / totalWordCount * 1e4) / 100
|
|
506
|
+
}));
|
|
364
507
|
const metrics = {
|
|
365
508
|
totalWordCount,
|
|
366
509
|
uniqueWordCount: uniqueWords.size,
|
|
@@ -370,6 +513,8 @@ function analyzeKeywords(input) {
|
|
|
370
513
|
analysis,
|
|
371
514
|
overallScore,
|
|
372
515
|
topKeywords,
|
|
516
|
+
topTerms,
|
|
517
|
+
topPhrases,
|
|
373
518
|
recommendations,
|
|
374
519
|
metrics
|
|
375
520
|
};
|
|
@@ -449,15 +594,15 @@ function analyzeSeo(input) {
|
|
|
449
594
|
const { content, title, metaDescription, targetKeywords } = input;
|
|
450
595
|
const issues = [];
|
|
451
596
|
const recommendations = [];
|
|
452
|
-
const
|
|
597
|
+
const extracted = extractFromMarkdown(content);
|
|
598
|
+
const words = extractWords(extracted.text);
|
|
453
599
|
const wordCount = words.length;
|
|
454
|
-
const paragraphs =
|
|
455
|
-
const headings =
|
|
456
|
-
const h2Headings =
|
|
457
|
-
const links =
|
|
458
|
-
const images =
|
|
459
|
-
const
|
|
460
|
-
const firstParagraphText = firstH2Index > 0 ? content.slice(0, firstH2Index) : words.slice(0, 100).join(" ");
|
|
600
|
+
const paragraphs = extracted.paragraphs;
|
|
601
|
+
const headings = extracted.headings;
|
|
602
|
+
const h2Headings = headings.filter((heading) => heading.level === 2);
|
|
603
|
+
const links = extracted.links;
|
|
604
|
+
const images = extracted.images;
|
|
605
|
+
const firstParagraphText = paragraphs.length > 0 ? paragraphs[0] ?? "" : words.slice(0, 100).join(" ");
|
|
461
606
|
let score = 100;
|
|
462
607
|
if (!title) {
|
|
463
608
|
issues.push({
|
|
@@ -539,7 +684,7 @@ function analyzeSeo(input) {
|
|
|
539
684
|
});
|
|
540
685
|
score -= 5;
|
|
541
686
|
}
|
|
542
|
-
const h1Headings =
|
|
687
|
+
const h1Headings = headings.filter((heading) => heading.level === 1);
|
|
543
688
|
if (h1Headings.length > 0) {
|
|
544
689
|
issues.push({
|
|
545
690
|
type: "headings",
|
|
@@ -550,8 +695,8 @@ function analyzeSeo(input) {
|
|
|
550
695
|
score -= 10;
|
|
551
696
|
}
|
|
552
697
|
if (targetKeywords && targetKeywords.length > 0 && h2Headings.length > 0) {
|
|
553
|
-
const h2Text = h2Headings.join(" ")
|
|
554
|
-
const hasKeywordInH2 = targetKeywords.some((kw) => h2Text.includes(kw.toLowerCase()));
|
|
698
|
+
const h2Text = h2Headings.map((heading) => heading.text).join(" ");
|
|
699
|
+
const hasKeywordInH2 = targetKeywords.some((kw) => h2Text.toLowerCase().includes(kw.toLowerCase()));
|
|
555
700
|
if (!hasKeywordInH2) {
|
|
556
701
|
issues.push({
|
|
557
702
|
type: "heading_keywords",
|
|
@@ -631,7 +776,7 @@ function analyzeSeo(input) {
|
|
|
631
776
|
}
|
|
632
777
|
const keywordDensity = {};
|
|
633
778
|
if (targetKeywords && targetKeywords.length > 0) {
|
|
634
|
-
const contentLower =
|
|
779
|
+
const contentLower = extracted.text.toLowerCase();
|
|
635
780
|
for (const keyword of targetKeywords) {
|
|
636
781
|
const regex = new RegExp(keyword.toLowerCase(), "gi");
|
|
637
782
|
const matches = contentLower.match(regex) || [];
|
|
@@ -777,7 +922,7 @@ function analyzeStructure(content, contentType) {
|
|
|
777
922
|
});
|
|
778
923
|
score -= 5;
|
|
779
924
|
}
|
|
780
|
-
const hasTableOfContents = /##\s*(?:table of contents|
|
|
925
|
+
const hasTableOfContents = /##\s*(?:table of contents|sum\u00E1rio|\u00EDndice|contents)/i.test(content) || /\[.*\]\(#.*\)/.test(content.slice(0, 500));
|
|
781
926
|
if (wordCount > 1500 && !hasTableOfContents) {
|
|
782
927
|
issues.push({
|
|
783
928
|
type: "table_of_contents",
|
|
@@ -889,3 +1034,5 @@ export {
|
|
|
889
1034
|
analyzeContent,
|
|
890
1035
|
analyzeBadPatterns
|
|
891
1036
|
};
|
|
1037
|
+
|
|
1038
|
+
//# debugId=803CE7EA001F6D4064756E2164756E21
|