@twelvehart/supermemory-runtime 1.0.0-next.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +57 -0
- package/README.md +374 -0
- package/dist/index.js +189 -0
- package/dist/mcp/index.js +1132 -0
- package/docker-compose.prod.yml +91 -0
- package/docker-compose.yml +358 -0
- package/drizzle/0000_dapper_the_professor.sql +159 -0
- package/drizzle/0001_api_keys.sql +51 -0
- package/drizzle/meta/0000_snapshot.json +1532 -0
- package/drizzle/meta/_journal.json +13 -0
- package/drizzle.config.ts +20 -0
- package/package.json +114 -0
- package/scripts/add-extraction-job.ts +122 -0
- package/scripts/benchmark-pgvector.ts +122 -0
- package/scripts/bootstrap.sh +209 -0
- package/scripts/check-runtime-pack.ts +111 -0
- package/scripts/claude-mcp-config.ts +336 -0
- package/scripts/docker-entrypoint.sh +183 -0
- package/scripts/doctor.ts +377 -0
- package/scripts/init-db.sql +33 -0
- package/scripts/install.sh +1110 -0
- package/scripts/mcp-setup.ts +271 -0
- package/scripts/migrations/001_create_pgvector_extension.sql +31 -0
- package/scripts/migrations/002_create_memory_embeddings_table.sql +75 -0
- package/scripts/migrations/003_create_hnsw_index.sql +94 -0
- package/scripts/migrations/004_create_memory_embeddings_standalone.sql +70 -0
- package/scripts/migrations/005_create_chunks_table.sql +95 -0
- package/scripts/migrations/006_create_processing_queue.sql +45 -0
- package/scripts/migrations/generate_test_data.sql +42 -0
- package/scripts/migrations/phase1_comprehensive_test.sql +204 -0
- package/scripts/migrations/run_migrations.sh +286 -0
- package/scripts/migrations/test_hnsw_index.sql +255 -0
- package/scripts/pre-commit-secrets +282 -0
- package/scripts/run-extraction-worker.ts +46 -0
- package/scripts/run-phase1-tests.sh +291 -0
- package/scripts/setup.ts +222 -0
- package/scripts/smoke-install.sh +12 -0
- package/scripts/test-health-endpoint.sh +328 -0
- package/src/api/index.ts +2 -0
- package/src/api/middleware/auth.ts +80 -0
- package/src/api/middleware/csrf.ts +308 -0
- package/src/api/middleware/errorHandler.ts +166 -0
- package/src/api/middleware/rateLimit.ts +360 -0
- package/src/api/middleware/validation.ts +514 -0
- package/src/api/routes/documents.ts +286 -0
- package/src/api/routes/profiles.ts +237 -0
- package/src/api/routes/search.ts +71 -0
- package/src/api/stores/index.ts +58 -0
- package/src/config/bootstrap-env.ts +3 -0
- package/src/config/env.ts +71 -0
- package/src/config/feature-flags.ts +25 -0
- package/src/config/index.ts +140 -0
- package/src/config/secrets.config.ts +291 -0
- package/src/db/client.ts +92 -0
- package/src/db/index.ts +73 -0
- package/src/db/postgres.ts +72 -0
- package/src/db/schema/chunks.schema.ts +31 -0
- package/src/db/schema/containers.schema.ts +46 -0
- package/src/db/schema/documents.schema.ts +49 -0
- package/src/db/schema/embeddings.schema.ts +32 -0
- package/src/db/schema/index.ts +11 -0
- package/src/db/schema/memories.schema.ts +72 -0
- package/src/db/schema/profiles.schema.ts +34 -0
- package/src/db/schema/queue.schema.ts +59 -0
- package/src/db/schema/relationships.schema.ts +42 -0
- package/src/db/schema.ts +223 -0
- package/src/db/worker-connection.ts +47 -0
- package/src/index.ts +235 -0
- package/src/mcp/CLAUDE.md +1 -0
- package/src/mcp/index.ts +1380 -0
- package/src/mcp/legacyState.ts +22 -0
- package/src/mcp/rateLimit.ts +358 -0
- package/src/mcp/resources.ts +309 -0
- package/src/mcp/results.ts +104 -0
- package/src/mcp/tools.ts +401 -0
- package/src/queues/config.ts +119 -0
- package/src/queues/index.ts +289 -0
- package/src/sdk/client.ts +225 -0
- package/src/sdk/errors.ts +266 -0
- package/src/sdk/http.ts +560 -0
- package/src/sdk/index.ts +244 -0
- package/src/sdk/resources/base.ts +65 -0
- package/src/sdk/resources/connections.ts +204 -0
- package/src/sdk/resources/documents.ts +163 -0
- package/src/sdk/resources/index.ts +10 -0
- package/src/sdk/resources/memories.ts +150 -0
- package/src/sdk/resources/search.ts +60 -0
- package/src/sdk/resources/settings.ts +36 -0
- package/src/sdk/types.ts +674 -0
- package/src/services/chunking/index.ts +451 -0
- package/src/services/chunking.service.ts +650 -0
- package/src/services/csrf.service.ts +252 -0
- package/src/services/documents.repository.ts +219 -0
- package/src/services/documents.service.ts +191 -0
- package/src/services/embedding.service.ts +404 -0
- package/src/services/extraction.service.ts +300 -0
- package/src/services/extractors/code.extractor.ts +451 -0
- package/src/services/extractors/index.ts +9 -0
- package/src/services/extractors/markdown.extractor.ts +461 -0
- package/src/services/extractors/pdf.extractor.ts +315 -0
- package/src/services/extractors/text.extractor.ts +118 -0
- package/src/services/extractors/url.extractor.ts +243 -0
- package/src/services/index.ts +235 -0
- package/src/services/ingestion.service.ts +177 -0
- package/src/services/llm/anthropic.ts +400 -0
- package/src/services/llm/base.ts +460 -0
- package/src/services/llm/contradiction-detector.service.ts +526 -0
- package/src/services/llm/heuristics.ts +148 -0
- package/src/services/llm/index.ts +309 -0
- package/src/services/llm/memory-classifier.service.ts +383 -0
- package/src/services/llm/memory-extension-detector.service.ts +523 -0
- package/src/services/llm/mock.ts +470 -0
- package/src/services/llm/openai.ts +398 -0
- package/src/services/llm/prompts.ts +438 -0
- package/src/services/llm/types.ts +373 -0
- package/src/services/memory.repository.ts +1769 -0
- package/src/services/memory.service.ts +1338 -0
- package/src/services/memory.types.ts +234 -0
- package/src/services/persistence/index.ts +295 -0
- package/src/services/pipeline.service.ts +509 -0
- package/src/services/profile.repository.ts +436 -0
- package/src/services/profile.service.ts +560 -0
- package/src/services/profile.types.ts +270 -0
- package/src/services/relationships/detector.ts +1128 -0
- package/src/services/relationships/index.ts +268 -0
- package/src/services/relationships/memory-integration.ts +459 -0
- package/src/services/relationships/strategies.ts +132 -0
- package/src/services/relationships/types.ts +370 -0
- package/src/services/search.service.ts +761 -0
- package/src/services/search.types.ts +220 -0
- package/src/services/secrets.service.ts +384 -0
- package/src/services/vectorstore/base.ts +327 -0
- package/src/services/vectorstore/index.ts +444 -0
- package/src/services/vectorstore/memory.ts +286 -0
- package/src/services/vectorstore/migration.ts +295 -0
- package/src/services/vectorstore/mock.ts +403 -0
- package/src/services/vectorstore/pgvector.ts +695 -0
- package/src/services/vectorstore/types.ts +247 -0
- package/src/startup.ts +389 -0
- package/src/types/api.types.ts +193 -0
- package/src/types/document.types.ts +103 -0
- package/src/types/index.ts +241 -0
- package/src/types/profile.base.ts +133 -0
- package/src/utils/errors.ts +447 -0
- package/src/utils/id.ts +15 -0
- package/src/utils/index.ts +101 -0
- package/src/utils/logger.ts +313 -0
- package/src/utils/sanitization.ts +501 -0
- package/src/utils/secret-validation.ts +273 -0
- package/src/utils/synonyms.ts +188 -0
- package/src/utils/validation.ts +581 -0
- package/src/workers/chunking.worker.ts +242 -0
- package/src/workers/embedding.worker.ts +358 -0
- package/src/workers/extraction.worker.ts +346 -0
- package/src/workers/indexing.worker.ts +505 -0
- package/tsconfig.json +38 -0
|
@@ -0,0 +1,461 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Markdown extractor - parses markdown by headings and structure
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import yaml from 'js-yaml'
|
|
6
|
+
import { ExtractionResult, ExtractorInterface, ContentType } from '../../types/document.types.js'
|
|
7
|
+
|
|
8
|
+
export interface MarkdownSection {
|
|
9
|
+
level: number
|
|
10
|
+
heading: string
|
|
11
|
+
content: string
|
|
12
|
+
startLine: number
|
|
13
|
+
endLine: number
|
|
14
|
+
children: MarkdownSection[]
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export class MarkdownExtractor implements ExtractorInterface {
|
|
18
|
+
// Patterns for markdown detection
|
|
19
|
+
private readonly headingPattern = /^#{1,6}\s+.+$/m
|
|
20
|
+
private readonly codeBlockPattern = /```[\s\S]*?```/
|
|
21
|
+
private readonly linkPattern = /\[([^\]]+)\]\([^)]+\)/
|
|
22
|
+
private readonly listPattern = /^[\s]*[-*+]\s+/m
|
|
23
|
+
private readonly boldPattern = /\*\*[^*]+\*\*/
|
|
24
|
+
private readonly italicPattern = /\*[^*]+\*/
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Check if content appears to be markdown
|
|
28
|
+
*/
|
|
29
|
+
canHandle(content: string): boolean {
|
|
30
|
+
if (typeof content !== 'string' || content.length === 0) {
|
|
31
|
+
return false
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// Count markdown features
|
|
35
|
+
let score = 0
|
|
36
|
+
|
|
37
|
+
if (this.headingPattern.test(content)) score += 3
|
|
38
|
+
if (this.codeBlockPattern.test(content)) score += 2
|
|
39
|
+
if (this.linkPattern.test(content)) score += 1
|
|
40
|
+
if (this.listPattern.test(content)) score += 1
|
|
41
|
+
if (this.boldPattern.test(content)) score += 1
|
|
42
|
+
if (this.italicPattern.test(content)) score += 1
|
|
43
|
+
|
|
44
|
+
return score >= 2
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Extract and parse markdown content
|
|
49
|
+
*/
|
|
50
|
+
async extract(content: string, options?: Record<string, unknown>): Promise<ExtractionResult> {
|
|
51
|
+
const sections = this.parseSections(content)
|
|
52
|
+
const plainText = this.toPlainText(content)
|
|
53
|
+
const metadata = this.extractMetadata(content, sections)
|
|
54
|
+
|
|
55
|
+
return {
|
|
56
|
+
content: options?.preserveMarkdown ? content : plainText,
|
|
57
|
+
contentType: 'markdown' as ContentType,
|
|
58
|
+
metadata: {
|
|
59
|
+
...metadata,
|
|
60
|
+
sections: sections.map((s) => ({
|
|
61
|
+
level: s.level,
|
|
62
|
+
heading: s.heading,
|
|
63
|
+
charCount: s.content.length,
|
|
64
|
+
})),
|
|
65
|
+
},
|
|
66
|
+
rawContent: content,
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Parse markdown into hierarchical sections
|
|
72
|
+
*/
|
|
73
|
+
parseSections(content: string): MarkdownSection[] {
|
|
74
|
+
const lines = content.split('\n')
|
|
75
|
+
const sections: MarkdownSection[] = []
|
|
76
|
+
const stack: MarkdownSection[] = []
|
|
77
|
+
|
|
78
|
+
let currentContent: string[] = []
|
|
79
|
+
let contentStartLine = 0
|
|
80
|
+
|
|
81
|
+
for (let i = 0; i < lines.length; i++) {
|
|
82
|
+
const line = lines[i] ?? ''
|
|
83
|
+
const headingMatch = line.match(/^(#{1,6})\s+(.+)$/)
|
|
84
|
+
|
|
85
|
+
if (headingMatch) {
|
|
86
|
+
// Save accumulated content to previous section
|
|
87
|
+
const lastInStack = stack[stack.length - 1]
|
|
88
|
+
if (lastInStack && currentContent.length > 0) {
|
|
89
|
+
lastInStack.content = currentContent.join('\n').trim()
|
|
90
|
+
} else if (currentContent.length > 0 && sections.length === 0) {
|
|
91
|
+
// Content before first heading - create implicit section
|
|
92
|
+
sections.push({
|
|
93
|
+
level: 0,
|
|
94
|
+
heading: '',
|
|
95
|
+
content: currentContent.join('\n').trim(),
|
|
96
|
+
startLine: contentStartLine,
|
|
97
|
+
endLine: i - 1,
|
|
98
|
+
children: [],
|
|
99
|
+
})
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
const level = headingMatch[1]?.length ?? 1
|
|
103
|
+
const heading = headingMatch[2]?.trim() ?? ''
|
|
104
|
+
|
|
105
|
+
const section: MarkdownSection = {
|
|
106
|
+
level,
|
|
107
|
+
heading,
|
|
108
|
+
content: '',
|
|
109
|
+
startLine: i,
|
|
110
|
+
endLine: i,
|
|
111
|
+
children: [],
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Pop stack until we find a parent with lower level
|
|
115
|
+
while (stack.length > 0) {
|
|
116
|
+
const top = stack[stack.length - 1]
|
|
117
|
+
if (top && top.level >= level) {
|
|
118
|
+
const completed = stack.pop()
|
|
119
|
+
if (completed) {
|
|
120
|
+
completed.endLine = i - 1
|
|
121
|
+
}
|
|
122
|
+
} else {
|
|
123
|
+
break
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// Add as child to parent or to root
|
|
128
|
+
const parent = stack[stack.length - 1]
|
|
129
|
+
if (parent) {
|
|
130
|
+
parent.children.push(section)
|
|
131
|
+
} else {
|
|
132
|
+
sections.push(section)
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
stack.push(section)
|
|
136
|
+
currentContent = []
|
|
137
|
+
contentStartLine = i + 1
|
|
138
|
+
} else {
|
|
139
|
+
currentContent.push(line)
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// Finalize remaining content and sections
|
|
144
|
+
const lastInStack = stack[stack.length - 1]
|
|
145
|
+
if (lastInStack && currentContent.length > 0) {
|
|
146
|
+
lastInStack.content = currentContent.join('\n').trim()
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
while (stack.length > 0) {
|
|
150
|
+
const completed = stack.pop()
|
|
151
|
+
if (completed) {
|
|
152
|
+
completed.endLine = lines.length - 1
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
return sections
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* Convert markdown to plain text
|
|
161
|
+
*/
|
|
162
|
+
toPlainText(markdown: string): string {
|
|
163
|
+
let text = markdown
|
|
164
|
+
|
|
165
|
+
// Remove code blocks (preserve content)
|
|
166
|
+
text = text.replace(/```[\w]*\n([\s\S]*?)```/g, '$1')
|
|
167
|
+
text = text.replace(/`([^`]+)`/g, '$1')
|
|
168
|
+
|
|
169
|
+
// Convert headings to text
|
|
170
|
+
text = text.replace(/^#{1,6}\s+(.+)$/gm, '$1')
|
|
171
|
+
|
|
172
|
+
// Convert links
|
|
173
|
+
text = text.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1')
|
|
174
|
+
|
|
175
|
+
// Remove images
|
|
176
|
+
text = text.replace(/!\[([^\]]*)\]\([^)]+\)/g, '$1')
|
|
177
|
+
|
|
178
|
+
// Convert bold and italic
|
|
179
|
+
text = text.replace(/\*\*\*([^*]+)\*\*\*/g, '$1')
|
|
180
|
+
text = text.replace(/\*\*([^*]+)\*\*/g, '$1')
|
|
181
|
+
text = text.replace(/\*([^*]+)\*/g, '$1')
|
|
182
|
+
text = text.replace(/___([^_]+)___/g, '$1')
|
|
183
|
+
text = text.replace(/__([^_]+)__/g, '$1')
|
|
184
|
+
text = text.replace(/_([^_]+)_/g, '$1')
|
|
185
|
+
|
|
186
|
+
// Convert strikethrough
|
|
187
|
+
text = text.replace(/~~([^~]+)~~/g, '$1')
|
|
188
|
+
|
|
189
|
+
// Convert blockquotes
|
|
190
|
+
text = text.replace(/^>\s+/gm, '')
|
|
191
|
+
|
|
192
|
+
// Convert horizontal rules
|
|
193
|
+
text = text.replace(/^[-*_]{3,}$/gm, '')
|
|
194
|
+
|
|
195
|
+
// Simplify lists
|
|
196
|
+
text = text.replace(/^[\s]*[-*+]\s+/gm, '- ')
|
|
197
|
+
text = text.replace(/^[\s]*\d+\.\s+/gm, '- ')
|
|
198
|
+
|
|
199
|
+
// Remove HTML comments
|
|
200
|
+
text = text.replace(/<!--[\s\S]*?-->/g, '')
|
|
201
|
+
|
|
202
|
+
// Clean up whitespace
|
|
203
|
+
text = text
|
|
204
|
+
.replace(/[ \t]+/g, ' ')
|
|
205
|
+
.replace(/\n{3,}/g, '\n\n')
|
|
206
|
+
.trim()
|
|
207
|
+
|
|
208
|
+
return text
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* Extract metadata from markdown
|
|
213
|
+
*/
|
|
214
|
+
private extractMetadata(content: string, sections: MarkdownSection[]): ExtractionResult['metadata'] {
|
|
215
|
+
const plainText = this.toPlainText(content)
|
|
216
|
+
const words = plainText.split(/\s+/).filter((w) => w.length > 0)
|
|
217
|
+
|
|
218
|
+
// Try to extract title from first H1
|
|
219
|
+
let title: string | undefined
|
|
220
|
+
const h1 = sections.find((s) => s.level === 1)
|
|
221
|
+
if (h1) {
|
|
222
|
+
title = h1.heading
|
|
223
|
+
} else {
|
|
224
|
+
// Try frontmatter title
|
|
225
|
+
const frontmatter = this.parseFrontmatter(content)
|
|
226
|
+
if (frontmatter && typeof frontmatter['title'] === 'string') {
|
|
227
|
+
title = frontmatter['title']
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
// Extract tags from frontmatter or inline
|
|
232
|
+
const frontmatter = this.parseFrontmatter(content)
|
|
233
|
+
let tags: string[] | undefined
|
|
234
|
+
if (frontmatter && Array.isArray(frontmatter['tags'])) {
|
|
235
|
+
tags = frontmatter['tags'] as string[]
|
|
236
|
+
} else {
|
|
237
|
+
tags = this.extractInlineTags(content)
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
// Count code blocks
|
|
241
|
+
const codeBlocks = (content.match(/```[\s\S]*?```/g) ?? []).length
|
|
242
|
+
|
|
243
|
+
// Count links
|
|
244
|
+
const links = (content.match(/\[([^\]]+)\]\([^)]+\)/g) ?? []).length
|
|
245
|
+
|
|
246
|
+
const result: ExtractionResult['metadata'] = {
|
|
247
|
+
title,
|
|
248
|
+
tags,
|
|
249
|
+
source: 'markdown',
|
|
250
|
+
mimeType: 'text/markdown',
|
|
251
|
+
wordCount: words.length,
|
|
252
|
+
charCount: plainText.length,
|
|
253
|
+
sectionCount: this.countAllSections(sections),
|
|
254
|
+
codeBlockCount: codeBlocks,
|
|
255
|
+
linkCount: links,
|
|
256
|
+
hasTableOfContents: content.includes('[TOC]') || content.includes('[[toc]]'),
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
if (frontmatter && typeof frontmatter['author'] === 'string') {
|
|
260
|
+
result['author'] = frontmatter['author']
|
|
261
|
+
}
|
|
262
|
+
if (frontmatter && typeof frontmatter['description'] === 'string') {
|
|
263
|
+
result['description'] = frontmatter['description']
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
return result
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
/**
|
|
270
|
+
* Parse YAML frontmatter using js-yaml for proper parsing
|
|
271
|
+
* Handles multi-line values, nested objects, arrays, and all YAML features
|
|
272
|
+
*/
|
|
273
|
+
private parseFrontmatter(content: string): Record<string, unknown> | undefined {
|
|
274
|
+
// Match frontmatter block with flexible newline handling
|
|
275
|
+
const match = content.match(/^---\r?\n([\s\S]*?)\r?\n---/)
|
|
276
|
+
if (!match?.[1]) return undefined
|
|
277
|
+
|
|
278
|
+
try {
|
|
279
|
+
// Use js-yaml for proper YAML parsing
|
|
280
|
+
const parsed = yaml.load(match[1], {
|
|
281
|
+
// Safe schema - doesn't allow JS functions
|
|
282
|
+
schema: yaml.DEFAULT_SCHEMA,
|
|
283
|
+
// Return undefined for empty documents
|
|
284
|
+
json: false,
|
|
285
|
+
})
|
|
286
|
+
|
|
287
|
+
// Validate result is an object
|
|
288
|
+
if (parsed === null || parsed === undefined) {
|
|
289
|
+
return undefined
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
if (typeof parsed !== 'object' || Array.isArray(parsed)) {
|
|
293
|
+
// YAML returned a non-object (e.g., a string or array at root)
|
|
294
|
+
return undefined
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
return parsed as Record<string, unknown>
|
|
298
|
+
} catch (error) {
|
|
299
|
+
// YAML parsing failed, try fallback simple parser
|
|
300
|
+
console.warn('YAML frontmatter parsing failed, using fallback parser:', error)
|
|
301
|
+
return this.parseFrontmatterFallback(match[1])
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
/**
|
|
306
|
+
* Fallback frontmatter parser for simple key-value pairs
|
|
307
|
+
* Used when js-yaml fails
|
|
308
|
+
*/
|
|
309
|
+
private parseFrontmatterFallback(frontmatterContent: string): Record<string, unknown> | undefined {
|
|
310
|
+
const frontmatter: Record<string, unknown> = {}
|
|
311
|
+
const lines = frontmatterContent.split('\n')
|
|
312
|
+
let currentKey: string | null = null
|
|
313
|
+
let currentArrayItems: string[] = []
|
|
314
|
+
let inMultilineArray = false
|
|
315
|
+
|
|
316
|
+
for (const line of lines) {
|
|
317
|
+
// Check if this is an array item (starts with -)
|
|
318
|
+
if (inMultilineArray && /^\s*-\s+/.test(line)) {
|
|
319
|
+
const itemValue = line.replace(/^\s*-\s+/, '').trim()
|
|
320
|
+
currentArrayItems.push(this.cleanYamlValue(itemValue))
|
|
321
|
+
continue
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
// If we were in a multiline array and hit a new key, save the array
|
|
325
|
+
if (inMultilineArray && currentKey && /^\w+\s*:/.test(line)) {
|
|
326
|
+
frontmatter[currentKey] = currentArrayItems
|
|
327
|
+
currentArrayItems = []
|
|
328
|
+
inMultilineArray = false
|
|
329
|
+
currentKey = null
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
const colonIndex = line.indexOf(':')
|
|
333
|
+
if (colonIndex > 0 && !line.startsWith(' ') && !line.startsWith('\t')) {
|
|
334
|
+
const key = line.slice(0, colonIndex).trim()
|
|
335
|
+
const value: string = line.slice(colonIndex + 1).trim()
|
|
336
|
+
|
|
337
|
+
// Check for multi-line array or value
|
|
338
|
+
if (value === '') {
|
|
339
|
+
// Could be multi-line array or block scalar
|
|
340
|
+
currentKey = key
|
|
341
|
+
inMultilineArray = true
|
|
342
|
+
currentArrayItems = []
|
|
343
|
+
continue
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
// Handle inline arrays [a, b, c]
|
|
347
|
+
if (value.startsWith('[') && value.endsWith(']')) {
|
|
348
|
+
frontmatter[key] = value
|
|
349
|
+
.slice(1, -1)
|
|
350
|
+
.split(',')
|
|
351
|
+
.map((v) => this.cleanYamlValue(v.trim()))
|
|
352
|
+
continue
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
// Handle inline objects (basic)
|
|
356
|
+
if (value.startsWith('{') && value.endsWith('}')) {
|
|
357
|
+
try {
|
|
358
|
+
frontmatter[key] = JSON.parse(value.replace(/'/g, '"'))
|
|
359
|
+
} catch {
|
|
360
|
+
frontmatter[key] = this.cleanYamlValue(value)
|
|
361
|
+
}
|
|
362
|
+
continue
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
// Handle booleans
|
|
366
|
+
if (value.toLowerCase() === 'true') {
|
|
367
|
+
frontmatter[key] = true
|
|
368
|
+
continue
|
|
369
|
+
}
|
|
370
|
+
if (value.toLowerCase() === 'false') {
|
|
371
|
+
frontmatter[key] = false
|
|
372
|
+
continue
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
// Handle numbers
|
|
376
|
+
if (/^-?\d+$/.test(value)) {
|
|
377
|
+
frontmatter[key] = parseInt(value, 10)
|
|
378
|
+
continue
|
|
379
|
+
}
|
|
380
|
+
if (/^-?\d+\.\d+$/.test(value)) {
|
|
381
|
+
frontmatter[key] = parseFloat(value)
|
|
382
|
+
continue
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
// Handle null
|
|
386
|
+
if (value.toLowerCase() === 'null' || value === '~') {
|
|
387
|
+
frontmatter[key] = null
|
|
388
|
+
continue
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
// Handle dates
|
|
392
|
+
if (/^\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2})?/.test(value)) {
|
|
393
|
+
const date = new Date(value)
|
|
394
|
+
if (!Number.isNaN(date.getTime())) {
|
|
395
|
+
frontmatter[key] = date.toISOString()
|
|
396
|
+
continue
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
// Default: treat as string
|
|
401
|
+
frontmatter[key] = this.cleanYamlValue(value)
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
// Save any pending multiline array
|
|
406
|
+
if (inMultilineArray && currentKey && currentArrayItems.length > 0) {
|
|
407
|
+
frontmatter[currentKey] = currentArrayItems
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
return Object.keys(frontmatter).length > 0 ? frontmatter : undefined
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
/**
|
|
414
|
+
* Clean a YAML value (remove quotes, trim)
|
|
415
|
+
*/
|
|
416
|
+
private cleanYamlValue(value: string): string {
|
|
417
|
+
// Remove surrounding quotes
|
|
418
|
+
if ((value.startsWith('"') && value.endsWith('"')) || (value.startsWith("'") && value.endsWith("'"))) {
|
|
419
|
+
return value.slice(1, -1)
|
|
420
|
+
}
|
|
421
|
+
return value
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
/**
|
|
425
|
+
* Extract inline tags (hashtags)
|
|
426
|
+
*/
|
|
427
|
+
private extractInlineTags(content: string): string[] | undefined {
|
|
428
|
+
const tags = content.match(/#[\w-]+/g)
|
|
429
|
+
if (!tags || tags.length === 0) return undefined
|
|
430
|
+
|
|
431
|
+
return [...new Set(tags.map((t) => t.slice(1)))]
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
/**
|
|
435
|
+
* Count all sections including nested
|
|
436
|
+
*/
|
|
437
|
+
private countAllSections(sections: MarkdownSection[]): number {
|
|
438
|
+
let count = sections.length
|
|
439
|
+
for (const section of sections) {
|
|
440
|
+
count += this.countAllSections(section.children)
|
|
441
|
+
}
|
|
442
|
+
return count
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
/**
|
|
446
|
+
* Get flat list of all sections
|
|
447
|
+
*/
|
|
448
|
+
flattenSections(sections: MarkdownSection[]): MarkdownSection[] {
|
|
449
|
+
const flat: MarkdownSection[] = []
|
|
450
|
+
|
|
451
|
+
const traverse = (secs: MarkdownSection[]) => {
|
|
452
|
+
for (const section of secs) {
|
|
453
|
+
flat.push(section)
|
|
454
|
+
traverse(section.children)
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
traverse(sections)
|
|
459
|
+
return flat
|
|
460
|
+
}
|
|
461
|
+
}
|