nca-ai-cms-astro-plugin 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/.claude/settings.local.json +9 -0
  2. package/README.md +87 -0
  3. package/package.json +53 -0
  4. package/src/api/_utils.ts +20 -0
  5. package/src/api/articles/[id]/apply.ts +89 -0
  6. package/src/api/articles/[id]/regenerate-image.ts +49 -0
  7. package/src/api/articles/[id]/regenerate-text.ts +57 -0
  8. package/src/api/articles/[id].ts +53 -0
  9. package/src/api/auth/check.ts +6 -0
  10. package/src/api/auth/login.ts +43 -0
  11. package/src/api/auth/logout.ts +6 -0
  12. package/src/api/generate-content.ts +43 -0
  13. package/src/api/generate-image.ts +33 -0
  14. package/src/api/prompts.ts +45 -0
  15. package/src/api/save-image.ts +38 -0
  16. package/src/api/save.ts +49 -0
  17. package/src/api/scheduler/[id].ts +31 -0
  18. package/src/api/scheduler/generate.ts +94 -0
  19. package/src/api/scheduler/publish.ts +96 -0
  20. package/src/api/scheduler.ts +51 -0
  21. package/src/components/Editor.tsx +115 -0
  22. package/src/components/editor/GenerateTab.tsx +384 -0
  23. package/src/components/editor/PlannerTab.tsx +345 -0
  24. package/src/components/editor/SettingsTab.tsx +185 -0
  25. package/src/components/editor/styles.ts +597 -0
  26. package/src/components/editor/types.ts +49 -0
  27. package/src/components/editor/useTabNavigation.ts +69 -0
  28. package/src/config.d.ts +4 -0
  29. package/src/db/tables.ts +39 -0
  30. package/src/domain/entities/Article.test.ts +138 -0
  31. package/src/domain/entities/Article.ts +90 -0
  32. package/src/domain/entities/ScheduledPost.test.ts +228 -0
  33. package/src/domain/entities/ScheduledPost.ts +152 -0
  34. package/src/domain/entities/Source.test.ts +57 -0
  35. package/src/domain/entities/Source.ts +43 -0
  36. package/src/domain/entities/index.ts +9 -0
  37. package/src/domain/index.ts +16 -0
  38. package/src/domain/value-objects/ArticleFinder.test.ts +104 -0
  39. package/src/domain/value-objects/ArticleFinder.ts +61 -0
  40. package/src/domain/value-objects/SEOMetadata.test.ts +48 -0
  41. package/src/domain/value-objects/SEOMetadata.ts +19 -0
  42. package/src/domain/value-objects/Slug.test.ts +51 -0
  43. package/src/domain/value-objects/Slug.ts +33 -0
  44. package/src/domain/value-objects/index.ts +4 -0
  45. package/src/index.ts +146 -0
  46. package/src/middleware.ts +30 -0
  47. package/src/pages/editor.astro +22 -0
  48. package/src/pages/login.astro +117 -0
  49. package/src/services/ArticleService.test.ts +148 -0
  50. package/src/services/ArticleService.ts +150 -0
  51. package/src/services/AutoPublisher.ts +122 -0
  52. package/src/services/ContentFetcher.ts +89 -0
  53. package/src/services/ContentGenerator.ts +320 -0
  54. package/src/services/FileWriter.test.ts +80 -0
  55. package/src/services/FileWriter.ts +59 -0
  56. package/src/services/ImageConverter.ts +15 -0
  57. package/src/services/ImageGenerator.ts +108 -0
  58. package/src/services/PromptService.ts +84 -0
  59. package/src/services/SchedulerDBAdapter.ts +75 -0
  60. package/src/services/SchedulerService.test.ts +286 -0
  61. package/src/services/SchedulerService.ts +149 -0
  62. package/src/services/index.ts +27 -0
  63. package/src/utils/authUtils.test.ts +60 -0
  64. package/src/utils/authUtils.ts +25 -0
  65. package/src/utils/envUtils.test.ts +40 -0
  66. package/src/utils/envUtils.ts +26 -0
  67. package/src/utils/index.ts +7 -0
  68. package/src/utils/markdown.test.ts +65 -0
  69. package/src/utils/markdown.ts +13 -0
  70. package/src/utils/sanitize.test.ts +180 -0
  71. package/src/utils/sanitize.ts +98 -0
  72. package/tsconfig.json +22 -0
  73. package/vitest.config.ts +14 -0
@@ -0,0 +1,117 @@
1
+ ---
2
+ const authCookie = Astro.cookies.get('editor-auth')?.value;
3
+ if (authCookie) {
4
+ return Astro.redirect('/editor', 302);
5
+ }
6
+ ---
7
+ <html lang="de">
8
+ <head>
9
+ <meta charset="UTF-8" />
10
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
11
+ <title>Login - Content Editor</title>
12
+ <style>
13
+ * { box-sizing: border-box; margin: 0; padding: 0; }
14
+ body {
15
+ background: #0a0a0b;
16
+ color: #faf9f7;
17
+ font-family: system-ui, sans-serif;
18
+ display: flex;
19
+ align-items: center;
20
+ justify-content: center;
21
+ min-height: 100vh;
22
+ }
23
+ .login-card {
24
+ background: #1a1a1d;
25
+ border: 1px solid #2a2a2d;
26
+ border-radius: 12px;
27
+ padding: 2.5rem;
28
+ width: 100%;
29
+ max-width: 400px;
30
+ }
31
+ h1 { font-size: 1.5rem; margin-bottom: 1.5rem; text-align: center; }
32
+ label { display: block; margin-bottom: 0.5rem; font-size: 0.875rem; color: #aaa; }
33
+ input {
34
+ width: 100%;
35
+ padding: 0.75rem;
36
+ margin-bottom: 1rem;
37
+ background: #0a0a0b;
38
+ border: 1px solid #333;
39
+ border-radius: 6px;
40
+ color: #faf9f7;
41
+ font-size: 1rem;
42
+ }
43
+ input:focus { outline: none; border-color: #666; }
44
+ button {
45
+ width: 100%;
46
+ padding: 0.75rem;
47
+ background: #faf9f7;
48
+ color: #0a0a0b;
49
+ border: none;
50
+ border-radius: 6px;
51
+ font-size: 1rem;
52
+ font-weight: 600;
53
+ cursor: pointer;
54
+ }
55
+ button:hover { background: #e0e0e0; }
56
+ button:disabled { opacity: 0.5; cursor: not-allowed; }
57
+ .error {
58
+ background: #3a1a1a;
59
+ border: 1px solid #662222;
60
+ color: #ff6b6b;
61
+ padding: 0.75rem;
62
+ border-radius: 6px;
63
+ margin-bottom: 1rem;
64
+ font-size: 0.875rem;
65
+ display: none;
66
+ }
67
+ </style>
68
+ </head>
69
+ <body>
70
+ <div class="login-card">
71
+ <h1>Content Editor</h1>
72
+ <div id="error" class="error"></div>
73
+ <form id="login-form">
74
+ <label for="username">Benutzername</label>
75
+ <input type="text" id="username" name="username" required autocomplete="username" />
76
+ <label for="password">Passwort</label>
77
+ <input type="password" id="password" name="password" required autocomplete="current-password" />
78
+ <button type="submit">Anmelden</button>
79
+ </form>
80
+ </div>
81
+ <script>
82
+ const form = document.getElementById('login-form') as HTMLFormElement;
83
+ const errorEl = document.getElementById('error') as HTMLDivElement;
84
+
85
+ form.addEventListener('submit', async (e) => {
86
+ e.preventDefault();
87
+ errorEl.style.display = 'none';
88
+
89
+ const username = (document.getElementById('username') as HTMLInputElement).value;
90
+ const password = (document.getElementById('password') as HTMLInputElement).value;
91
+ const button = form.querySelector('button') as HTMLButtonElement;
92
+ button.disabled = true;
93
+
94
+ try {
95
+ const res = await fetch('/api/auth/login', {
96
+ method: 'POST',
97
+ headers: { 'Content-Type': 'application/json' },
98
+ body: JSON.stringify({ username, password }),
99
+ });
100
+
101
+ if (res.ok) {
102
+ window.location.href = '/editor';
103
+ } else {
104
+ const data = await res.json();
105
+ errorEl.textContent = data.error || 'Anmeldung fehlgeschlagen';
106
+ errorEl.style.display = 'block';
107
+ }
108
+ } catch {
109
+ errorEl.textContent = 'Netzwerkfehler';
110
+ errorEl.style.display = 'block';
111
+ } finally {
112
+ button.disabled = false;
113
+ }
114
+ });
115
+ </script>
116
+ </body>
117
+ </html>
@@ -0,0 +1,148 @@
1
+ import { describe, it, expect, beforeEach, afterEach } from 'vitest';
2
+ import { ArticleService, ArticleNotFoundError } from './ArticleService';
3
+ import * as fs from 'fs/promises';
4
+ import * as path from 'path';
5
+ import * as os from 'os';
6
+
7
+ describe('ArticleService', () => {
8
+ let tempDir: string;
9
+ let service: ArticleService;
10
+
11
+ beforeEach(async () => {
12
+ tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'article-service-test-'));
13
+ service = new ArticleService(tempDir);
14
+ });
15
+
16
+ afterEach(async () => {
17
+ await fs.rm(tempDir, { recursive: true, force: true });
18
+ });
19
+
20
+ async function createTestArticle(
21
+ slug: string,
22
+ year = '2026',
23
+ month = '01'
24
+ ): Promise<string> {
25
+ const folderPath = path.join(tempDir, year, month, slug);
26
+ await fs.mkdir(folderPath, { recursive: true });
27
+ await fs.writeFile(
28
+ path.join(folderPath, 'index.md'),
29
+ `---
30
+ title: "Test Article"
31
+ description: "Test description"
32
+ date: "${year}-${month}-15"
33
+ tags: ["Test"]
34
+ image: "./hero.webp"
35
+ imageAlt: "Test image"
36
+ ---
37
+
38
+ # Test Article
39
+
40
+ Content here.
41
+ `
42
+ );
43
+ // Create a dummy image file
44
+ await fs.writeFile(path.join(folderPath, 'hero.webp'), 'fake-image-data');
45
+ return folderPath;
46
+ }
47
+
48
+ describe('delete', () => {
49
+ it('removes article folder recursively', async () => {
50
+ const folderPath = await createTestArticle('test-article');
51
+
52
+ // Verify folder exists
53
+ const existsBefore = await fs
54
+ .access(folderPath)
55
+ .then(() => true)
56
+ .catch(() => false);
57
+ expect(existsBefore).toBe(true);
58
+
59
+ await service.delete('test-article');
60
+
61
+ // Verify folder no longer exists
62
+ const existsAfter = await fs
63
+ .access(folderPath)
64
+ .then(() => true)
65
+ .catch(() => false);
66
+ expect(existsAfter).toBe(false);
67
+ });
68
+
69
+ it('throws ArticleNotFoundError for invalid slug', async () => {
70
+ await expect(service.delete('non-existent-article')).rejects.toThrow(
71
+ ArticleNotFoundError
72
+ );
73
+ });
74
+
75
+ it('removes both index.md and hero.webp', async () => {
76
+ const folderPath = await createTestArticle('full-article');
77
+
78
+ await service.delete('full-article');
79
+
80
+ // Article folder should no longer exist
81
+ const articleExists = await fs
82
+ .access(folderPath)
83
+ .then(() => true)
84
+ .catch(() => false);
85
+ expect(articleExists).toBe(false);
86
+ });
87
+ });
88
+
89
+ describe('read', () => {
90
+ it('reads article metadata from frontmatter', async () => {
91
+ await createTestArticle('readable-article');
92
+
93
+ const article = await service.read('readable-article');
94
+
95
+ expect(article).not.toBeNull();
96
+ expect(article?.title).toBe('Test Article');
97
+ expect(article?.description).toBe('Test description');
98
+ expect(article?.tags).toContain('Test');
99
+ });
100
+
101
+ it('returns null for non-existent article', async () => {
102
+ const article = await service.read('does-not-exist');
103
+ expect(article).toBeNull();
104
+ });
105
+
106
+ it('includes article ID in result', async () => {
107
+ await createTestArticle('id-test', '2025', '06');
108
+
109
+ const article = await service.read('id-test');
110
+
111
+ expect(article?.articleId).toBe('2025/06/id-test');
112
+ });
113
+ });
114
+
115
+ describe('updateContent', () => {
116
+ it('updates article content preserving frontmatter fields', async () => {
117
+ await createTestArticle('update-test');
118
+
119
+ await service.updateContent('update-test', {
120
+ content: '# Updated Title\n\nNew content here.',
121
+ });
122
+
123
+ const article = await service.read('update-test');
124
+ expect(article?.title).toBe('Test Article'); // Title preserved
125
+ expect(article?.content).toContain('New content here.');
126
+ });
127
+
128
+ it('can update title and description', async () => {
129
+ await createTestArticle('metadata-update');
130
+
131
+ await service.updateContent('metadata-update', {
132
+ title: 'New Title',
133
+ description: 'New description',
134
+ content: '# New Title\n\nContent.',
135
+ });
136
+
137
+ const article = await service.read('metadata-update');
138
+ expect(article?.title).toBe('New Title');
139
+ expect(article?.description).toBe('New description');
140
+ });
141
+
142
+ it('throws ArticleNotFoundError for invalid slug', async () => {
143
+ await expect(
144
+ service.updateContent('invalid', { content: 'test' })
145
+ ).rejects.toThrow(ArticleNotFoundError);
146
+ });
147
+ });
148
+ });
@@ -0,0 +1,150 @@
1
+ import * as fs from 'fs/promises';
2
+ import * as path from 'path';
3
+ import matter from 'gray-matter';
4
+ import { ArticleFinder } from '../domain/value-objects/ArticleFinder';
5
+
6
+ export class ArticleNotFoundError extends Error {
7
+ constructor(slug: string) {
8
+ super(`Article not found: ${slug}`);
9
+ this.name = 'ArticleNotFoundError';
10
+ }
11
+ }
12
+
13
+ export interface ArticleData {
14
+ articleId: string;
15
+ title: string;
16
+ description: string;
17
+ date: Date;
18
+ createdAt?: Date;
19
+ tags: string[];
20
+ image?: string;
21
+ imageAlt?: string;
22
+ content: string;
23
+ folderPath: string;
24
+ }
25
+
26
+ export interface UpdateContentOptions {
27
+ title?: string;
28
+ description?: string;
29
+ content?: string;
30
+ }
31
+
32
+ export class ArticleService {
33
+ private readonly finder: ArticleFinder;
34
+ private readonly basePath: string;
35
+
36
+ constructor(basePath: string = 'nca-ai-cms-content') {
37
+ this.finder = new ArticleFinder(basePath);
38
+ this.basePath = basePath;
39
+ }
40
+
41
+ async list(): Promise<ArticleData[]> {
42
+ const articles: ArticleData[] = [];
43
+ const fullBasePath = path.join(process.cwd(), this.basePath);
44
+
45
+ try {
46
+ const years = await fs.readdir(fullBasePath);
47
+
48
+ for (const year of years) {
49
+ const yearPath = path.join(fullBasePath, year);
50
+ const yearStat = await fs.stat(yearPath).catch(() => null);
51
+ if (!yearStat?.isDirectory()) continue;
52
+
53
+ const months = await fs.readdir(yearPath);
54
+
55
+ for (const month of months) {
56
+ const monthPath = path.join(yearPath, month);
57
+ const monthStat = await fs.stat(monthPath).catch(() => null);
58
+ if (!monthStat?.isDirectory()) continue;
59
+
60
+ const slugs = await fs.readdir(monthPath);
61
+
62
+ for (const slug of slugs) {
63
+ const slugPath = path.join(monthPath, slug);
64
+ const slugStat = await fs.stat(slugPath).catch(() => null);
65
+ if (!slugStat?.isDirectory()) continue;
66
+
67
+ const article = await this.read(slug);
68
+ if (article) articles.push(article);
69
+ }
70
+ }
71
+ }
72
+ } catch {
73
+ // Return empty array if base path doesn't exist
74
+ }
75
+
76
+ return articles;
77
+ }
78
+
79
+ async delete(slug: string): Promise<void> {
80
+ const location = await this.finder.findBySlug(slug);
81
+
82
+ if (!location) {
83
+ throw new ArticleNotFoundError(slug);
84
+ }
85
+
86
+ await fs.rm(location.folderPath, { recursive: true, force: true });
87
+ }
88
+
89
+ async read(slug: string): Promise<ArticleData | null> {
90
+ const location = await this.finder.findBySlug(slug);
91
+
92
+ if (!location) {
93
+ return null;
94
+ }
95
+
96
+ try {
97
+ const fileContent = await fs.readFile(location.indexPath, 'utf-8');
98
+ const { data, content } = matter(fileContent);
99
+
100
+ const result: ArticleData = {
101
+ articleId: location.articleId,
102
+ title: data.title,
103
+ description: data.description,
104
+ date: new Date(data.date),
105
+ tags: data.tags || [],
106
+ image: data.image,
107
+ imageAlt: data.imageAlt,
108
+ content: content.trim(),
109
+ folderPath: location.folderPath,
110
+ };
111
+
112
+ if (data.createdAt) {
113
+ result.createdAt = new Date(data.createdAt);
114
+ }
115
+
116
+ return result;
117
+ } catch {
118
+ return null;
119
+ }
120
+ }
121
+
122
+ async updateContent(
123
+ slug: string,
124
+ options: UpdateContentOptions
125
+ ): Promise<void> {
126
+ const location = await this.finder.findBySlug(slug);
127
+
128
+ if (!location) {
129
+ throw new ArticleNotFoundError(slug);
130
+ }
131
+
132
+ const fileContent = await fs.readFile(location.indexPath, 'utf-8');
133
+ const { data, content } = matter(fileContent);
134
+
135
+ // Update frontmatter fields if provided
136
+ const updatedData = {
137
+ ...data,
138
+ ...(options.title && { title: options.title }),
139
+ ...(options.description && { description: options.description }),
140
+ };
141
+
142
+ // Use new content or keep existing
143
+ const updatedContent = options.content ?? content;
144
+
145
+ // Rebuild the file with frontmatter
146
+ const newFileContent = matter.stringify(updatedContent, updatedData);
147
+
148
+ await fs.writeFile(location.indexPath, newFileContent, 'utf-8');
149
+ }
150
+ }
@@ -0,0 +1,122 @@
1
+ import { SchedulerService } from './SchedulerService';
2
+ import { AstroSchedulerDBAdapter } from './SchedulerDBAdapter';
3
+ import { Article } from '../domain/entities/Article';
4
+ import { FileWriter } from './FileWriter';
5
+ import { convertToWebP } from './ImageConverter';
6
+ import * as path from 'path';
7
+
8
+ const INTERVAL_MS = 60 * 60 * 1000; // 60 minutes
9
+
10
+ let intervalId: ReturnType<typeof setInterval> | null = null;
11
+ let isRunning = false;
12
+
13
+ async function publishDuePosts(
14
+ contentPath: string = 'nca-ai-cms-content'
15
+ ): Promise<void> {
16
+ if (isRunning) return;
17
+ isRunning = true;
18
+
19
+ try {
20
+ const service = new SchedulerService(new AstroSchedulerDBAdapter());
21
+ const duePosts = await service.getDuePosts();
22
+
23
+ if (duePosts.length === 0) {
24
+ return;
25
+ }
26
+
27
+ console.log(`[AutoPublisher] Found ${duePosts.length} due post(s)`);
28
+
29
+ let published = 0;
30
+ let failed = 0;
31
+
32
+ for (const post of duePosts) {
33
+ try {
34
+ if (!post.generatedTitle || !post.generatedContent) {
35
+ console.warn(
36
+ `[AutoPublisher] Skipping ${post.id}: missing generated content`
37
+ );
38
+ failed++;
39
+ continue;
40
+ }
41
+
42
+ const articleProps = {
43
+ title: post.generatedTitle,
44
+ description: post.generatedDescription || '',
45
+ content: post.generatedContent,
46
+ date: post.scheduledDate,
47
+ tags: post.parsedTags,
48
+ image: './hero.webp',
49
+ contentPath,
50
+ ...(post.generatedImageAlt
51
+ ? { imageAlt: post.generatedImageAlt }
52
+ : {}),
53
+ };
54
+ const article = new Article(articleProps);
55
+
56
+ const writer = new FileWriter();
57
+ await writer.write(article);
58
+
59
+ if (post.generatedImageData) {
60
+ const imagePath = path.join(
61
+ process.cwd(),
62
+ article.folderPath,
63
+ 'hero.webp'
64
+ );
65
+ await convertToWebP(post.generatedImageData, imagePath);
66
+ }
67
+
68
+ await service.markPublished(post.id, article.folderPath);
69
+ published++;
70
+ console.log(
71
+ `[AutoPublisher] Published ${post.id} -> ${article.folderPath}`
72
+ );
73
+ } catch (error) {
74
+ failed++;
75
+ console.error(
76
+ `[AutoPublisher] Failed to publish ${post.id}:`,
77
+ error
78
+ );
79
+ }
80
+ }
81
+
82
+ console.log(
83
+ `[AutoPublisher] Done: ${published} published, ${failed} failed`
84
+ );
85
+ } catch (error) {
86
+ console.error('[AutoPublisher] Error checking due posts:', error);
87
+ } finally {
88
+ isRunning = false;
89
+ }
90
+ }
91
+
92
+ let configuredContentPath = 'nca-ai-cms-content';
93
+
94
+ export function startAutoPublisher(
95
+ contentPath: string = 'nca-ai-cms-content'
96
+ ): void {
97
+ if (intervalId) return;
98
+
99
+ configuredContentPath = contentPath;
100
+
101
+ console.log(
102
+ `[AutoPublisher] Starting (interval: ${INTERVAL_MS / 1000 / 60} minutes)`
103
+ );
104
+
105
+ // Run once on startup after a short delay to let the DB initialize
106
+ setTimeout(() => {
107
+ publishDuePosts(configuredContentPath);
108
+ }, 10_000);
109
+
110
+ intervalId = setInterval(
111
+ () => publishDuePosts(configuredContentPath),
112
+ INTERVAL_MS
113
+ );
114
+ }
115
+
116
+ export function stopAutoPublisher(): void {
117
+ if (intervalId) {
118
+ clearInterval(intervalId);
119
+ intervalId = null;
120
+ console.log('[AutoPublisher] Stopped');
121
+ }
122
+ }
@@ -0,0 +1,89 @@
1
+ import TurndownService from 'turndown';
2
+ import { Source } from '../domain/entities/Source';
3
+
4
+ export type FetchedContent = {
5
+ title: string;
6
+ content: string;
7
+ url: string;
8
+ };
9
+
10
+ export class ContentFetcher {
11
+ private turndown: TurndownService;
12
+
13
+ constructor() {
14
+ this.turndown = new TurndownService({
15
+ headingStyle: 'atx',
16
+ codeBlockStyle: 'fenced',
17
+ bulletListMarker: '-',
18
+ });
19
+
20
+ // Remove unwanted elements
21
+ this.turndown.remove([
22
+ 'script',
23
+ 'style',
24
+ 'nav',
25
+ 'footer',
26
+ 'aside',
27
+ 'noscript',
28
+ ]);
29
+ }
30
+
31
+ async fetch(source: Source): Promise<FetchedContent> {
32
+ const response = await fetch(source.url, {
33
+ headers: {
34
+ 'User-Agent':
35
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
36
+ Accept:
37
+ 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
38
+ 'Accept-Language': 'de-DE,de;q=0.9,en;q=0.8',
39
+ },
40
+ });
41
+
42
+ if (!response.ok) {
43
+ throw new Error(`Failed to fetch ${source.url}: ${response.status}`);
44
+ }
45
+
46
+ const html = await response.text();
47
+ const title = this.extractTitle(html);
48
+ const content = this.htmlToMarkdown(html);
49
+
50
+ return {
51
+ title,
52
+ content,
53
+ url: source.url,
54
+ };
55
+ }
56
+
57
+ private extractTitle(html: string): string {
58
+ // Try og:title first, then title tag
59
+ const ogMatch = html.match(
60
+ /<meta[^>]*property="og:title"[^>]*content="([^"]+)"/i
61
+ );
62
+ if (ogMatch?.[1]) return ogMatch[1].trim();
63
+
64
+ const titleMatch = html.match(/<title[^>]*>([^<]+)<\/title>/i);
65
+ return titleMatch?.[1]?.trim() ?? 'Untitled';
66
+ }
67
+
68
+ private htmlToMarkdown(html: string): string {
69
+ // Extract main content area first
70
+ let content = html;
71
+
72
+ const mainMatch =
73
+ content.match(/<main[^>]*>([\s\S]*?)<\/main>/i) ||
74
+ content.match(/<article[^>]*>([\s\S]*?)<\/article>/i) ||
75
+ content.match(
76
+ /<div[^>]*class="[^"]*(?:content|article|post|entry)[^"]*"[^>]*>([\s\S]*?)<\/div>/i
77
+ );
78
+
79
+ if (mainMatch?.[1]) {
80
+ content = mainMatch[1];
81
+ }
82
+
83
+ // Convert to markdown using turndown
84
+ const markdown = this.turndown.turndown(content);
85
+
86
+ // Normalize whitespace
87
+ return markdown.replace(/\n{3,}/g, '\n\n').trim();
88
+ }
89
+ }