@f-o-t/content-analysis 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,371 @@
1
+ # @f-o-t/content-analysis
2
+
3
+ A comprehensive content analysis library for SEO optimization, readability scoring, structure validation, and problematic pattern detection.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ # npm
9
+ npm install @f-o-t/content-analysis
10
+
11
+ # pnpm
12
+ pnpm add @f-o-t/content-analysis
13
+
14
+ # bun
15
+ bun add @f-o-t/content-analysis
16
+ ```
17
+
18
+ ## Features
19
+
20
+ - **SEO Analysis** - Title, meta description, headings, keywords, content length, links, images
21
+ - **Readability Scoring** - Flesch-Kincaid Reading Ease and Grade Level with audience targeting
22
+ - **Structure Validation** - Heading hierarchy, paragraph lengths, table of contents, conclusions
23
+ - **Bad Pattern Detection** - Clickbait, filler phrases, keyword stuffing, engagement begging
24
+ - **Keyword Analysis** - Density, placement, and optimization recommendations
25
+ - **Zero dependencies** - Lightweight and fast
26
+ - **Full TypeScript support** - All types exported
27
+
28
+ ## Quick Start
29
+
30
+ ### Combined Analysis
31
+
32
+ Run all analyzers at once with `analyzeContent()`:
33
+
34
+ ```typescript
35
+ import { analyzeContent } from "@f-o-t/content-analysis";
36
+
37
+ const result = analyzeContent({
38
+ content: "## Introduction\n\nThis is my blog post about TypeScript...",
39
+ title: "Complete Guide to TypeScript in 2024",
40
+ description: "Learn TypeScript from scratch with practical examples",
41
+ targetKeywords: ["typescript", "tutorial"],
42
+ });
43
+
44
+ console.log(result.seo.score); // 85
45
+ console.log(result.readability.fleschKincaidReadingEase); // 65.2
46
+ console.log(result.structure.structure.hasQuickAnswer); // false
47
+ console.log(result.badPatterns.hasIssues); // true
48
+ console.log(result.keywords?.overallScore); // 90
49
+ ```
50
+
51
+ ### Individual Analyzers
52
+
53
+ Use specific analyzers when you only need certain metrics:
54
+
55
+ ```typescript
56
+ import {
57
+ analyzeSeo,
58
+ analyzeReadability,
59
+ analyzeStructure,
60
+ analyzeBadPatterns,
61
+ analyzeKeywords,
62
+ } from "@f-o-t/content-analysis";
63
+
64
+ // SEO analysis
65
+ const seo = analyzeSeo({
66
+ content: "## Getting Started\n\nLearn how to...",
67
+ title: "TypeScript Tutorial",
68
+ metaDescription: "A comprehensive guide to TypeScript",
69
+ targetKeywords: ["typescript", "tutorial"],
70
+ });
71
+
72
+ // Readability analysis
73
+ const readability = analyzeReadability(content, "general");
74
+
75
+ // Structure analysis
76
+ const structure = analyzeStructure(content, "how-to");
77
+
78
+ // Bad pattern detection
79
+ const badPatterns = analyzeBadPatterns(content, title);
80
+
81
+ // Keyword analysis
82
+ const keywords = analyzeKeywords({
83
+ content,
84
+ title,
85
+ targetKeywords: ["typescript", "tutorial"],
86
+ });
87
+ ```
88
+
89
+ ## API Reference
90
+
91
+ ### `analyzeContent(input)`
92
+
93
+ Performs comprehensive content analysis using all available analyzers.
94
+
95
+ ```typescript
96
+ type AnalysisInput = {
97
+ content: string; // Markdown content to analyze
98
+ title?: string; // Page title
99
+ description?: string; // Meta description
100
+ targetKeywords?: string[]; // Keywords to track
101
+ };
102
+
103
+ type ContentAnalysisResult = {
104
+ seo: SeoResult;
105
+ readability: ReadabilityResult;
106
+ structure: StructureResult;
107
+ badPatterns: BadPatternResult;
108
+ keywords: KeywordAnalysisResult | null;
109
+ analyzedAt: string; // ISO timestamp
110
+ };
111
+ ```
112
+
113
+ ---
114
+
115
+ ### `analyzeSeo(input)`
116
+
117
+ Analyzes content for search engine optimization factors.
118
+
119
+ **Checks performed:**
120
+ - Title length (optimal: 50-60 characters)
121
+ - Title contains target keyword
122
+ - Meta description length (optimal: 150-160 characters)
123
+ - Heading structure (H1 should not be in content)
124
+ - H2 heading frequency (1 per 200-300 words)
125
+ - Keywords in H2 headings
126
+ - Content length (minimum: 600-1000 words)
127
+ - Internal/external links
128
+ - Images with alt text
129
+ - Quick answer in first 100 words
130
+ - Keyword in first paragraph
131
+ - Keyword density (optimal: 1-2%)
132
+ - Conclusion section
133
+
134
+ ```typescript
135
+ type SeoInput = {
136
+ content: string;
137
+ title?: string;
138
+ metaDescription?: string;
139
+ targetKeywords?: string[];
140
+ };
141
+
142
+ type SeoResult = {
143
+ score: number; // 0-100
144
+ issues: SeoIssue[]; // Problems found
145
+ recommendations: string[]; // Action items
146
+ metrics: SeoMetrics; // Counts and flags
147
+ };
148
+ ```
149
+
150
+ ---
151
+
152
+ ### `analyzeReadability(content, targetAudience)`
153
+
154
+ Analyzes content readability using Flesch-Kincaid algorithms.
155
+
156
+ **Target audiences:**
157
+ | Audience | Reading Ease Range | Description |
158
+ |----------|-------------------|-------------|
159
+ | `general` | 60-70 | Easy to read for general audience |
160
+ | `technical` | 40-60 | Technical but accessible |
161
+ | `academic` | 30-50 | Academic/professional level |
162
+ | `casual` | 70-80 | Very easy, conversational |
163
+
164
+ ```typescript
165
+ type ReadabilityResult = {
166
+ fleschKincaidReadingEase: number; // 0-100 (higher = easier)
167
+ fleschKincaidGradeLevel: number; // US grade level
168
+ readabilityLevel: string; // Human-readable description
169
+ targetScore: TargetScore; // Target range for audience
170
+ isOnTarget: boolean; // Within target range
171
+ suggestions: string[]; // Improvement suggestions
172
+ metrics: ReadabilityMetrics; // Detailed metrics
173
+ };
174
+ ```
175
+
176
+ **Reading Ease Scale:**
177
+ | Score | Level |
178
+ |-------|-------|
179
+ | 90-100 | Very Easy (5th grade) |
180
+ | 80-89 | Easy (6th grade) |
181
+ | 70-79 | Fairly Easy (7th grade) |
182
+ | 60-69 | Standard (8th-9th grade) |
183
+ | 50-59 | Fairly Difficult (10th-12th grade) |
184
+ | 30-49 | Difficult (College) |
185
+ | 0-29 | Very Difficult (College Graduate) |
186
+
187
+ ---
188
+
189
+ ### `analyzeStructure(content, contentType?)`
190
+
191
+ Analyzes content structure for SEO and readability best practices.
192
+
193
+ **Content types:**
194
+ - `how-to` - Expects numbered steps
195
+ - `comparison` - Expects comparison tables
196
+ - `listicle` - Expects multiple list items
197
+ - `explainer` - General explanatory content
198
+ - `general` - Default
199
+
200
+ **Checks performed:**
201
+ - H1 heading not in content body
202
+ - Heading hierarchy (no skipped levels)
203
+ - Quick answer in first 100 words
204
+ - Paragraph lengths (max 4 sentences recommended)
205
+ - H2 frequency (1 per 250 words)
206
+ - Table of contents for long content (1500+ words)
207
+ - Conclusion section
208
+
209
+ ```typescript
210
+ type StructureResult = {
211
+ score: number; // 0-100
212
+ issues: StructureIssue[]; // Problems found
213
+ structure: ContentStructure; // Structure metrics
214
+ };
215
+
216
+ type ContentStructure = {
217
+ hasQuickAnswer: boolean;
218
+ headingHierarchyValid: boolean;
219
+ avgParagraphLength: number;
220
+ hasTableOfContents: boolean;
221
+ hasTables: boolean;
222
+ hasConclusion: boolean;
223
+ headingCount: number;
224
+ wordCount: number;
225
+ };
226
+ ```
227
+
228
+ ---
229
+
230
+ ### `analyzeBadPatterns(content, title?)`
231
+
232
+ Detects problematic content patterns that hurt quality and SEO.
233
+
234
+ **Patterns detected:**
235
+
236
+ | Pattern | Description |
237
+ |---------|-------------|
238
+ | `word_count_mention` | References to word count in content |
239
+ | `word_count_in_title` | Word count claims in title |
240
+ | `meta_commentary` | "In this article...", "As mentioned above..." |
241
+ | `engagement_begging` | "Don't forget to like and subscribe" |
242
+ | `endless_introduction` | Introduction over 150 words |
243
+ | `vague_instructions` | "Configure appropriately", "Set up as needed" |
244
+ | `clickbait_markers` | "You won't believe...", excessive punctuation |
245
+ | `filler_phrases` | "Without further ado", "At the end of the day" |
246
+ | `over_formatting` | Excessive consecutive bold/italic |
247
+ | `wall_of_text` | Paragraphs over 100 words |
248
+ | `keyword_stuffing` | Phrase density over 3% |
249
+
250
+ ```typescript
251
+ type BadPatternResult = {
252
+ hasIssues: boolean;
253
+ issueCount: number;
254
+ patterns: BadPattern[];
255
+ };
256
+
257
+ type BadPattern = {
258
+ pattern: string; // Pattern type identifier
259
+ severity: "error" | "warning";
260
+ locations: string[]; // Context snippets
261
+ suggestion: string; // How to fix
262
+ };
263
+ ```
264
+
265
+ ---
266
+
267
+ ### `analyzeKeywords(input)`
268
+
269
+ Analyzes keyword usage, density, and placement.
270
+
271
+ **Checks performed:**
272
+ - Keyword count and density (optimal: 0.5-3%)
273
+ - Keyword locations (title, headings, first/last 100 words)
274
+ - Missing keywords
275
+ - Overused keywords
276
+ - Top 10 most frequent words in content
277
+
278
+ ```typescript
279
+ type KeywordInput = {
280
+ content: string;
281
+ title?: string;
282
+ targetKeywords: string[];
283
+ };
284
+
285
+ type KeywordAnalysisResult = {
286
+ analysis: KeywordAnalysisItem[]; // Per-keyword analysis
287
+ overallScore: number; // 0-100
288
+ topKeywords: TopKeyword[]; // Most frequent words
289
+ recommendations: string[]; // Action items
290
+ metrics: KeywordMetrics; // Word counts
291
+ };
292
+
293
+ type KeywordAnalysisItem = {
294
+ keyword: string;
295
+ count: number;
296
+ density: number; // Percentage
297
+ locations: KeywordLocation[]; // Where found
298
+ status: "optimal" | "low" | "high" | "missing";
299
+ suggestion?: string;
300
+ };
301
+ ```
302
+
303
+ ---
304
+
305
+ ### Utility Functions
306
+
307
+ Low-level utilities exported for advanced usage:
308
+
309
+ ```typescript
310
+ import {
311
+ calculateFleschKincaid, // Get reading ease and grade level
312
+ countSyllables, // Count syllables in a word
313
+ getReadabilityLevel, // Convert score to description
314
+ extractWords, // Split content into words
315
+ extractParagraphs, // Split content into paragraphs
316
+ extractHeadings, // Extract headings with levels
317
+ findOccurrences, // Find regex matches with context
318
+ hasQuickAnswerPattern, // Check for quick answer patterns
319
+ hasConclusionSection, // Check for conclusion heading
320
+ clampScore, // Clamp value to 0-100
321
+ } from "@f-o-t/content-analysis";
322
+
323
+ // Examples
324
+ const { readingEase, gradeLevel } = calculateFleschKincaid(text);
325
+ const syllables = countSyllables("comprehensive"); // 4
326
+ const level = getReadabilityLevel(65); // "Standard (8th-9th grade)"
327
+ const headings = extractHeadings(content);
328
+ // [{ level: 2, text: "Introduction", index: 0 }, ...]
329
+ ```
330
+
331
+ ## Types
332
+
333
+ All types are exported from the main package and from `@f-o-t/content-analysis/types`:
334
+
335
+ ```typescript
336
+ import type {
337
+ // Input types
338
+ AnalysisInput,
339
+ SeoInput,
340
+ KeywordInput,
341
+
342
+ // Result types
343
+ ContentAnalysisResult,
344
+ SeoResult,
345
+ ReadabilityResult,
346
+ StructureResult,
347
+ BadPatternResult,
348
+ KeywordAnalysisResult,
349
+
350
+ // Detail types
351
+ SeoIssue,
352
+ SeoMetrics,
353
+ ReadabilityMetrics,
354
+ StructureIssue,
355
+ ContentStructure,
356
+ BadPattern,
357
+ KeywordAnalysisItem,
358
+ TopKeyword,
359
+
360
+ // Enums/unions
361
+ Severity,
362
+ TargetAudience,
363
+ ContentType,
364
+ BadPatternType,
365
+ KeywordStatus,
366
+ } from "@f-o-t/content-analysis";
367
+ ```
368
+
369
+ ## License
370
+
371
+ MIT
@@ -0,0 +1,241 @@
1
+ /**
2
+ * Content Analysis Types
3
+ * All type definitions for SEO, readability, structure, and pattern analysis
4
+ */
5
+ type SeoIssueType = "title" | "meta_description" | "headings" | "keyword_density" | "content_length" | "readability" | "links" | "images" | "quick_answer" | "first_paragraph" | "heading_keywords" | "structure";
6
+ type Severity = "error" | "warning" | "info";
7
+ type SeoIssue = {
8
+ type: SeoIssueType;
9
+ severity: Severity;
10
+ message: string;
11
+ suggestion: string;
12
+ };
13
+ type SeoMetrics = {
14
+ wordCount: number;
15
+ headingCount: number;
16
+ paragraphCount: number;
17
+ linkCount: number;
18
+ imageCount: number;
19
+ hasQuickAnswer: boolean;
20
+ keywordInFirstParagraph: boolean;
21
+ keywordDensity?: Record<string, number>;
22
+ };
23
+ type SeoResult = {
24
+ score: number;
25
+ issues: SeoIssue[];
26
+ recommendations: string[];
27
+ metrics: SeoMetrics;
28
+ };
29
+ type SeoInput = {
30
+ content: string;
31
+ title?: string;
32
+ metaDescription?: string;
33
+ targetKeywords?: string[];
34
+ };
35
+ type TargetAudience = "general" | "technical" | "academic" | "casual";
36
+ type ReadabilityMetrics = {
37
+ sentenceCount: number;
38
+ wordCount: number;
39
+ avgWordsPerSentence: number;
40
+ avgSyllablesPerWord: number;
41
+ complexWordCount: number;
42
+ complexWordPercentage: number;
43
+ };
44
+ type TargetScore = {
45
+ min: number;
46
+ max: number;
47
+ description: string;
48
+ };
49
+ type ReadabilityResult = {
50
+ fleschKincaidReadingEase: number;
51
+ fleschKincaidGradeLevel: number;
52
+ readabilityLevel: string;
53
+ targetScore: TargetScore;
54
+ isOnTarget: boolean;
55
+ suggestions: string[];
56
+ metrics: ReadabilityMetrics;
57
+ };
58
+ type ContentType = "how-to" | "comparison" | "explainer" | "listicle" | "general";
59
+ type StructureIssue = {
60
+ type: string;
61
+ severity: Severity;
62
+ message: string;
63
+ suggestion: string;
64
+ };
65
+ type ContentStructure = {
66
+ hasQuickAnswer: boolean;
67
+ headingHierarchyValid: boolean;
68
+ avgParagraphLength: number;
69
+ hasTableOfContents: boolean;
70
+ hasTables: boolean;
71
+ hasConclusion: boolean;
72
+ headingCount: number;
73
+ wordCount: number;
74
+ };
75
+ type StructureResult = {
76
+ score: number;
77
+ issues: StructureIssue[];
78
+ structure: ContentStructure;
79
+ };
80
+ type BadPatternType = "word_count_mention" | "word_count_in_title" | "meta_commentary" | "engagement_begging" | "endless_introduction" | "vague_instructions" | "clickbait_markers" | "filler_phrases" | "over_formatting" | "wall_of_text" | "keyword_stuffing";
81
+ type BadPattern = {
82
+ pattern: string;
83
+ severity: "error" | "warning";
84
+ locations: string[];
85
+ suggestion: string;
86
+ };
87
+ type BadPatternResult = {
88
+ hasIssues: boolean;
89
+ issueCount: number;
90
+ patterns: BadPattern[];
91
+ };
92
+ type KeywordLocationType = "title" | "heading" | "paragraph" | "first100words" | "last100words";
93
+ type KeywordStatus = "optimal" | "low" | "high" | "missing";
94
+ type KeywordLocation = {
95
+ type: KeywordLocationType;
96
+ index?: number;
97
+ };
98
+ type KeywordAnalysisItem = {
99
+ keyword: string;
100
+ count: number;
101
+ density: number;
102
+ locations: KeywordLocation[];
103
+ status: KeywordStatus;
104
+ suggestion?: string;
105
+ };
106
+ type TopKeyword = {
107
+ keyword: string;
108
+ count: number;
109
+ density: number;
110
+ };
111
+ type KeywordMetrics = {
112
+ totalWordCount: number;
113
+ uniqueWordCount: number;
114
+ avgKeywordDensity: number;
115
+ };
116
+ type KeywordAnalysisResult = {
117
+ analysis: KeywordAnalysisItem[];
118
+ overallScore: number;
119
+ topKeywords: TopKeyword[];
120
+ recommendations: string[];
121
+ metrics: KeywordMetrics;
122
+ };
123
+ type KeywordInput = {
124
+ content: string;
125
+ title?: string;
126
+ targetKeywords: string[];
127
+ };
128
+ type ContentAnalysisResult = {
129
+ seo: SeoResult;
130
+ readability: ReadabilityResult;
131
+ structure: StructureResult;
132
+ badPatterns: BadPatternResult;
133
+ keywords: KeywordAnalysisResult | null;
134
+ analyzedAt: string;
135
+ };
136
+ type AnalysisInput = {
137
+ content: string;
138
+ title?: string;
139
+ description?: string;
140
+ targetKeywords?: string[];
141
+ };
142
+ /**
143
+ * Analyze content for bad patterns
144
+ */
145
+ declare function analyzeBadPatterns(content: string, title?: string): BadPatternResult;
146
+ /**
147
+ * Analyze keyword usage in content
148
+ */
149
+ declare function analyzeKeywords(input: KeywordInput): KeywordAnalysisResult;
150
+ /**
151
+ * Analyze content readability
152
+ */
153
+ declare function analyzeReadability(content: string, targetAudience?: TargetAudience): ReadabilityResult;
154
+ /**
155
+ * Analyze content for SEO optimization
156
+ */
157
+ declare function analyzeSeo(input: SeoInput): SeoResult;
158
+ /**
159
+ * Analyze content structure
160
+ */
161
+ declare function analyzeStructure(content: string, contentType?: ContentType): StructureResult;
162
+ /**
163
+ * Shared utility functions for content analysis
164
+ */
165
+ /**
166
+ * Count syllables in a word using a simplified vowel group algorithm
167
+ */
168
+ declare function countSyllables(word: string): number;
169
+ /**
170
+ * Calculate Flesch-Kincaid readability metrics
171
+ */
172
+ declare function calculateFleschKincaid(text: string): {
173
+ readingEase: number;
174
+ gradeLevel: number;
175
+ };
176
+ /**
177
+ * Convert reading ease score to human-readable level
178
+ */
179
+ declare function getReadabilityLevel(score: number): string;
180
+ /**
181
+ * Find all occurrences of a regex pattern with surrounding context
182
+ */
183
+ declare function findOccurrences(regex: RegExp, text: string): string[];
184
+ /**
185
+ * Extract words from content
186
+ */
187
+ declare function extractWords(content: string): string[];
188
+ /**
189
+ * Extract paragraphs from content
190
+ */
191
+ declare function extractParagraphs(content: string): string[];
192
+ /**
193
+ * Extract headings from markdown content
194
+ */
195
+ declare function extractHeadings(content: string): Array<{
196
+ level: number;
197
+ text: string;
198
+ index: number;
199
+ }>;
200
+ /**
201
+ * Clamp score between 0 and 100
202
+ */
203
+ declare function clampScore(score: number): number;
204
+ /**
205
+ * Check if content has a quick answer pattern in the first portion
206
+ */
207
+ declare function hasQuickAnswerPattern(text: string): boolean;
208
+ /**
209
+ * Check if content has a conclusion section
210
+ */
211
+ declare function hasConclusionSection(content: string): boolean;
212
+ /**
213
+ * Perform a comprehensive content analysis
214
+ *
215
+ * This function runs all available analyzers and returns a combined result:
216
+ * - SEO analysis (title, meta, keywords, structure)
217
+ * - Readability analysis (Flesch-Kincaid scores)
218
+ * - Structure analysis (headings, paragraphs, quick answers)
219
+ * - Bad pattern detection (filler phrases, clickbait, etc.)
220
+ * - Keyword analysis (density, placement, recommendations)
221
+ *
222
+ * @param input - The content and metadata to analyze
223
+ * @returns Combined analysis results from all analyzers
224
+ *
225
+ * @example
226
+ * ```typescript
227
+ * import { analyzeContent } from '@f-o-t/content-analysis';
228
+ *
229
+ * const result = analyzeContent({
230
+ * content: '## Introduction\n\nThis is my blog post...',
231
+ * title: 'My Blog Post Title',
232
+ * description: 'A short description for SEO',
233
+ * targetKeywords: ['blog', 'tutorial'],
234
+ * });
235
+ *
236
+ * console.log(result.seo.score); // 85
237
+ * console.log(result.readability.fleschKincaidReadingEase); // 65.2
238
+ * ```
239
+ */
240
+ declare function analyzeContent(input: AnalysisInput): ContentAnalysisResult;
241
+ export { hasQuickAnswerPattern, hasConclusionSection, getReadabilityLevel, findOccurrences, extractWords, extractParagraphs, extractHeadings, countSyllables, clampScore, calculateFleschKincaid, analyzeStructure, analyzeSeo, analyzeReadability, analyzeKeywords, analyzeContent, analyzeBadPatterns, TopKeyword, TargetScore, TargetAudience, StructureResult, StructureIssue, Severity, SeoResult, SeoMetrics, SeoIssueType, SeoIssue, SeoInput, ReadabilityResult, ReadabilityMetrics, KeywordStatus, KeywordMetrics, KeywordLocationType, KeywordLocation, KeywordInput, KeywordAnalysisResult, KeywordAnalysisItem, ContentType, ContentStructure, ContentAnalysisResult, BadPatternType, BadPatternResult, BadPattern, AnalysisInput };