stellavault 0.3.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/CHANGELOG.md +84 -0
  2. package/README.md +109 -119
  3. package/package.json +2 -2
  4. package/packages/cli/dist/commands/ask-cmd.d.ts +4 -0
  5. package/packages/cli/dist/commands/ask-cmd.js +35 -0
  6. package/packages/cli/dist/commands/autopilot-cmd.d.ts +4 -0
  7. package/packages/cli/dist/commands/autopilot-cmd.js +76 -0
  8. package/packages/cli/dist/commands/compile-cmd.d.ts +6 -0
  9. package/packages/cli/dist/commands/compile-cmd.js +30 -0
  10. package/packages/cli/dist/commands/digest-cmd.d.ts +1 -0
  11. package/packages/cli/dist/commands/digest-cmd.js +57 -0
  12. package/packages/cli/dist/commands/draft-cmd.d.ts +5 -0
  13. package/packages/cli/dist/commands/draft-cmd.js +99 -0
  14. package/packages/cli/dist/commands/fleeting-cmd.d.ts +4 -0
  15. package/packages/cli/dist/commands/fleeting-cmd.js +45 -0
  16. package/packages/cli/dist/commands/graph-cmd.js +13 -1
  17. package/packages/cli/dist/commands/ingest-cmd.d.ts +9 -0
  18. package/packages/cli/dist/commands/ingest-cmd.js +161 -0
  19. package/packages/cli/dist/commands/init-cmd.js +39 -1
  20. package/packages/cli/dist/commands/lint-cmd.d.ts +2 -0
  21. package/packages/cli/dist/commands/lint-cmd.js +61 -0
  22. package/packages/cli/dist/index.js +53 -1
  23. package/packages/cli/package.json +1 -1
  24. package/packages/core/dist/api/server.js +393 -0
  25. package/packages/core/dist/config.d.ts +8 -0
  26. package/packages/core/dist/config.js +9 -1
  27. package/packages/core/dist/i18n/note-strings.d.ts +5 -0
  28. package/packages/core/dist/i18n/note-strings.js +94 -0
  29. package/packages/core/dist/index.d.ts +11 -2
  30. package/packages/core/dist/index.js +6 -1
  31. package/packages/core/dist/intelligence/ask-engine.d.ts +23 -0
  32. package/packages/core/dist/intelligence/ask-engine.js +108 -0
  33. package/packages/core/dist/intelligence/draft-generator.d.ts +19 -0
  34. package/packages/core/dist/intelligence/draft-generator.js +161 -0
  35. package/packages/core/dist/intelligence/file-extractors.d.ts +18 -0
  36. package/packages/core/dist/intelligence/file-extractors.js +127 -0
  37. package/packages/core/dist/intelligence/ingest-pipeline.d.ts +32 -0
  38. package/packages/core/dist/intelligence/ingest-pipeline.js +209 -0
  39. package/packages/core/dist/intelligence/knowledge-lint.d.ts +27 -0
  40. package/packages/core/dist/intelligence/knowledge-lint.js +132 -0
  41. package/packages/core/dist/intelligence/wiki-compiler.d.ts +30 -0
  42. package/packages/core/dist/intelligence/wiki-compiler.js +222 -0
  43. package/packages/core/dist/intelligence/youtube-extractor.d.ts +29 -0
  44. package/packages/core/dist/intelligence/youtube-extractor.js +311 -0
  45. package/packages/core/dist/intelligence/zettelkasten.d.ts +59 -0
  46. package/packages/core/dist/intelligence/zettelkasten.js +234 -0
  47. package/packages/core/dist/mcp/server.d.ts +2 -0
  48. package/packages/core/dist/mcp/server.js +24 -1
  49. package/packages/core/dist/mcp/tools/agentic-graph.d.ts +6 -0
  50. package/packages/core/dist/mcp/tools/agentic-graph.js +35 -7
  51. package/packages/core/dist/mcp/tools/ask.d.ts +29 -0
  52. package/packages/core/dist/mcp/tools/ask.js +40 -0
  53. package/packages/core/dist/mcp/tools/generate-draft.d.ts +34 -0
  54. package/packages/core/dist/mcp/tools/generate-draft.js +120 -0
  55. package/packages/core/package.json +21 -2
@@ -0,0 +1,108 @@
1
+ // Feature: stellavault ask — Q&A + auto-filing
2
+ // vault 대상 자연어 질문 → 검색 → 답변 구조화 → vault에 .md 저장
3
+ import { writeFileSync, mkdirSync, existsSync } from 'node:fs';
4
+ import { join, resolve } from 'node:path';
5
+ /**
6
+ * 질문에 대해 vault를 검색하고 구조화된 답변을 생성.
7
+ * LLM 없이 검색 결과를 구조화하는 버전 (LLM 연동은 MCP ask tool에서 처리).
8
+ */
9
+ export async function askVault(searchEngine, question, options = {}) {
10
+ const { limit = 10, save = false, vaultPath, outputDir = '_stellavault/answers' } = options;
11
+ // 1. 검색
12
+ const results = await searchEngine.search({ query: question, limit });
13
+ // 2. 소스 정리
14
+ const sources = results.map((r) => ({
15
+ title: r.document.title,
16
+ filePath: r.document.filePath,
17
+ score: Math.round(r.score * 1000) / 1000,
18
+ snippet: r.chunk?.content?.substring(0, 200) ?? '',
19
+ }));
20
+ // 3. 답변 구성 (검색 결과 기반 구조화)
21
+ const answer = composeAnswer(question, results);
22
+ // 4. vault에 저장 (선택)
23
+ let savedTo = null;
24
+ if (save && vaultPath) {
25
+ savedTo = saveAnswerToVault(question, answer, sources, vaultPath, outputDir);
26
+ }
27
+ return { question, answer, sources, savedTo };
28
+ }
29
+ /**
30
+ * 검색 결과를 구조화된 답변으로 구성.
31
+ * LLM 없이도 유용한 요약을 만듦.
32
+ */
33
+ function composeAnswer(question, results) {
34
+ if (results.length === 0) {
35
+ return `No results found for "${question}". Try different keywords or create a note on this topic.`;
36
+ }
37
+ const lines = [];
38
+ lines.push(`## ${question}\n`);
39
+ // Related documents
40
+ lines.push(`### Related Documents (${results.length})\n`);
41
+ for (const r of results.slice(0, 5)) {
42
+ const score = Math.round(r.score * 100);
43
+ lines.push(`- **${r.document.title}** (${score}% 관련)`);
44
+ if (r.chunk?.content) {
45
+ const snippet = r.chunk.content.substring(0, 150).replace(/\n/g, ' ').trim();
46
+ lines.push(` > ${snippet}...`);
47
+ }
48
+ if (r.document.tags.length > 0) {
49
+ lines.push(` Tags: ${r.document.tags.map(t => `#${t}`).join(' ')}`);
50
+ }
51
+ lines.push('');
52
+ }
53
+ // Related tags
54
+ const allTags = new Set();
55
+ for (const r of results) {
56
+ r.document.tags.forEach((t) => allTags.add(t));
57
+ }
58
+ if (allTags.size > 0) {
59
+ lines.push(`### Related Tags`);
60
+ lines.push([...allTags].map(t => `#${t}`).join(' '));
61
+ lines.push('');
62
+ }
63
+ // Explore further
64
+ lines.push(`### Explore Further`);
65
+ lines.push(`- Dig deeper: \`stellavault ask "${question} advanced"\``);
66
+ lines.push(`- Find knowledge gaps: \`stellavault gaps\``);
67
+ return lines.join('\n');
68
+ }
69
+ /**
70
+ * 답변을 vault에 .md 파일로 저장.
71
+ * "_stellavault/answers/YYYY-MM-DD-question.md" 형식.
72
+ */
73
+ function saveAnswerToVault(question, answer, sources, vaultPath, outputDir) {
74
+ const dir = resolve(vaultPath, outputDir);
75
+ if (!existsSync(dir)) {
76
+ mkdirSync(dir, { recursive: true });
77
+ }
78
+ const date = new Date().toISOString().split('T')[0];
79
+ const slug = question
80
+ .toLowerCase()
81
+ .replace(/[^a-z0-9가-힣\s]/g, '')
82
+ .replace(/\s+/g, '-')
83
+ .substring(0, 60);
84
+ const filename = `${date}-${slug}.md`;
85
+ const filePath = join(outputDir, filename);
86
+ const fullPath = resolve(vaultPath, filePath);
87
+ // vault 경로 내인지 확인 (path traversal 방지)
88
+ if (!fullPath.startsWith(resolve(vaultPath))) {
89
+ throw new Error('Invalid output path');
90
+ }
91
+ const content = [
92
+ '---',
93
+ `title: "${question}"`,
94
+ `date: ${new Date().toISOString()}`,
95
+ 'type: answer',
96
+ 'source: stellavault-ask',
97
+ `tags: [${sources.slice(0, 5).flatMap(s => s.title.split(/\s+/).slice(0, 2)).map(t => `"${t}"`).join(', ')}]`,
98
+ '---',
99
+ '',
100
+ answer,
101
+ '',
102
+ '---',
103
+ '*Generated by `stellavault ask`. Sources from your vault.*',
104
+ ].join('\n');
105
+ writeFileSync(fullPath, content, 'utf-8');
106
+ return filePath;
107
+ }
108
+ //# sourceMappingURL=ask-engine.js.map
@@ -0,0 +1,19 @@
1
+ import { type FolderNames } from '../config.js';
2
+ export interface DraftOptions {
3
+ topic?: string;
4
+ format?: 'blog' | 'report' | 'outline';
5
+ maxSections?: number;
6
+ }
7
+ export interface DraftResult {
8
+ title: string;
9
+ filePath: string;
10
+ wordCount: number;
11
+ sourceCount: number;
12
+ concepts: string[];
13
+ }
14
+ /**
15
+ * vault의 wiki 데이터를 기반으로 초안(draft)을 생성.
16
+ * Express 단계: 지식이 vault에서 나가는 출구.
17
+ */
18
+ export declare function generateDraft(vaultPath: string, options?: DraftOptions, folders?: FolderNames): DraftResult;
19
+ //# sourceMappingURL=draft-generator.d.ts.map
@@ -0,0 +1,161 @@
1
+ // Express 단계: stellavault draft — wiki 기반 초안 생성
2
+ // Plan SC: SC3 (Express)
3
+ // 카파시의 "자가 컴파일" 결과물을 외부로 표현하는 출구
4
+ import { writeFileSync, mkdirSync, existsSync } from 'node:fs';
5
+ import { join, resolve, basename, extname } from 'node:path';
6
+ import { scanRawDirectory, extractConcepts } from './wiki-compiler.js';
7
+ import { DEFAULT_FOLDERS } from '../config.js';
8
+ /**
9
+ * vault의 wiki 데이터를 기반으로 초안(draft)을 생성.
10
+ * Express 단계: 지식이 vault에서 나가는 출구.
11
+ */
12
+ export function generateDraft(vaultPath, options = {}, folders = DEFAULT_FOLDERS) {
13
+ const { topic, format = 'blog', maxSections = 8 } = options;
14
+ // raw + wiki 문서 스캔
15
+ const rawDir = resolve(vaultPath, folders.fleeting);
16
+ const wikiDir = resolve(vaultPath, folders.wiki);
17
+ const litDir = resolve(vaultPath, folders.literature);
18
+ const allDocs = [];
19
+ for (const dir of [rawDir, wikiDir, litDir]) {
20
+ if (existsSync(dir)) {
21
+ allDocs.push(...scanRawDirectory(dir));
22
+ }
23
+ }
24
+ if (allDocs.length === 0) {
25
+ throw new Error('No documents found in vault. Run `stellavault ingest` first.');
26
+ }
27
+ // 토픽 필터
28
+ const filteredDocs = topic
29
+ ? allDocs.filter(d => d.tags.some(t => t.toLowerCase().includes(topic.toLowerCase())) ||
30
+ d.title.toLowerCase().includes(topic.toLowerCase()) ||
31
+ d.content.toLowerCase().includes(topic.toLowerCase()))
32
+ : allDocs;
33
+ if (filteredDocs.length === 0) {
34
+ throw new Error(`No documents found for topic "${topic}". Try a broader term.`);
35
+ }
36
+ // 개념 추출
37
+ const concepts = extractConcepts(filteredDocs);
38
+ const topConcepts = [...concepts.entries()]
39
+ .sort((a, b) => b[1].length - a[1].length)
40
+ .slice(0, maxSections);
41
+ // 초안 생성
42
+ const draftTitle = topic
43
+ ? `Draft: ${topic}`
44
+ : `Knowledge Draft — ${new Date().toISOString().split('T')[0]}`;
45
+ let body;
46
+ switch (format) {
47
+ case 'outline':
48
+ body = generateOutline(draftTitle, topConcepts, filteredDocs);
49
+ break;
50
+ case 'report':
51
+ body = generateReport(draftTitle, topConcepts, filteredDocs);
52
+ break;
53
+ case 'blog':
54
+ default:
55
+ body = generateBlog(draftTitle, topConcepts, filteredDocs);
56
+ break;
57
+ }
58
+ const wordCount = body.split(/\s+/).filter(Boolean).length;
59
+ // _drafts/ 폴더에 저장
60
+ const draftsDir = resolve(vaultPath, '_drafts');
61
+ if (!existsSync(draftsDir))
62
+ mkdirSync(draftsDir, { recursive: true });
63
+ const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19);
64
+ const slug = (topic ?? 'knowledge').replace(/[^a-zA-Z0-9가-힣\s]/g, '').replace(/\s+/g, '-').toLowerCase().slice(0, 40);
65
+ const filename = `${timestamp}-${slug}.md`;
66
+ const filePath = join('_drafts', filename);
67
+ const fullPath = resolve(vaultPath, filePath);
68
+ writeFileSync(fullPath, body, 'utf-8');
69
+ return {
70
+ title: draftTitle,
71
+ filePath,
72
+ wordCount,
73
+ sourceCount: filteredDocs.length,
74
+ concepts: topConcepts.map(([c]) => c),
75
+ };
76
+ }
77
+ // ─── 포맷별 생성기 ───
78
+ function generateBlog(title, concepts, docs) {
79
+ const lines = [];
80
+ lines.push(`# ${title}`, '');
81
+ lines.push(`> Auto-generated from ${docs.length} knowledge notes`, '');
82
+ // 도입부: 전체 주제 요약
83
+ lines.push('## Introduction', '');
84
+ const topicWords = concepts.slice(0, 3).map(([c]) => c).join(', ');
85
+ lines.push(`This post explores ${topicWords} based on ${docs.length} curated knowledge notes.`, '');
86
+ // 섹션별: 개념 + 관련 문서 발췌
87
+ for (const [concept, docPaths] of concepts) {
88
+ lines.push(`## ${capitalize(concept)}`, '');
89
+ const relatedDocs = docs.filter(d => docPaths.includes(d.filePath) ||
90
+ d.tags.includes(concept) ||
91
+ d.title.toLowerCase().includes(concept.toLowerCase())).slice(0, 3);
92
+ for (const doc of relatedDocs) {
93
+ const excerpt = extractExcerpt(doc.content, 150);
94
+ if (excerpt) {
95
+ lines.push(`> ${excerpt}`, `> — *${doc.title}*`, '');
96
+ }
97
+ }
98
+ lines.push('<!-- TODO: Add your analysis and insights here -->', '');
99
+ }
100
+ // 참고 자료
101
+ lines.push('## References', '');
102
+ const uniqueDocs = [...new Map(docs.map(d => [d.filePath, d])).values()].slice(0, 20);
103
+ for (const doc of uniqueDocs) {
104
+ lines.push(`- [[${basename(doc.filePath, extname(doc.filePath))}|${doc.title}]]`);
105
+ }
106
+ lines.push('');
107
+ lines.push(`---`, `*Generated by \`stellavault draft\` at ${new Date().toISOString()}*`);
108
+ return lines.join('\n');
109
+ }
110
+ function generateReport(title, concepts, docs) {
111
+ const lines = [];
112
+ lines.push(`# ${title}`, '');
113
+ lines.push(`**Date:** ${new Date().toISOString().split('T')[0]}`);
114
+ lines.push(`**Sources:** ${docs.length} documents`);
115
+ lines.push(`**Key Topics:** ${concepts.map(([c]) => c).join(', ')}`, '');
116
+ lines.push('## Executive Summary', '');
117
+ lines.push('<!-- TODO: Write 2-3 sentence summary -->', '');
118
+ for (const [concept, docPaths] of concepts) {
119
+ lines.push(`## ${capitalize(concept)}`, '');
120
+ lines.push(`**Related documents:** ${docPaths.length}`, '');
121
+ const relatedDocs = docs.filter(d => docPaths.includes(d.filePath)).slice(0, 3);
122
+ for (const doc of relatedDocs) {
123
+ const excerpt = extractExcerpt(doc.content, 200);
124
+ if (excerpt)
125
+ lines.push(`- ${excerpt} *(${doc.title})*`);
126
+ }
127
+ lines.push('');
128
+ lines.push('**Analysis:** <!-- TODO -->', '');
129
+ }
130
+ lines.push('## Conclusion', '', '<!-- TODO -->', '');
131
+ lines.push(`---`, `*Generated by \`stellavault draft\` at ${new Date().toISOString()}*`);
132
+ return lines.join('\n');
133
+ }
134
+ function generateOutline(title, concepts, docs) {
135
+ const lines = [];
136
+ lines.push(`# ${title} — Outline`, '');
137
+ lines.push(`Sources: ${docs.length} documents`, '');
138
+ for (let i = 0; i < concepts.length; i++) {
139
+ const [concept, docPaths] = concepts[i];
140
+ lines.push(`${i + 1}. **${capitalize(concept)}** (${docPaths.length} sources)`);
141
+ const relatedDocs = docs.filter(d => docPaths.includes(d.filePath)).slice(0, 3);
142
+ for (const doc of relatedDocs) {
143
+ lines.push(` - ${doc.title}`);
144
+ }
145
+ }
146
+ lines.push('', `---`, `*Generated by \`stellavault draft\` at ${new Date().toISOString()}*`);
147
+ return lines.join('\n');
148
+ }
149
+ // ─── 유틸 ───
150
+ function extractExcerpt(content, maxLen) {
151
+ // frontmatter 제거
152
+ const body = content.replace(/^---[\s\S]*?---\n?/, '').replace(/^#+\s+.+\n/m, '').trim();
153
+ // 첫 의미 있는 문단
154
+ const paragraphs = body.split(/\n\n+/).filter(p => p.length > 20 && !p.startsWith('> ') && !p.startsWith('- '));
155
+ const first = paragraphs[0] ?? '';
156
+ return first.length > maxLen ? first.slice(0, maxLen) + '...' : first;
157
+ }
158
+ function capitalize(s) {
159
+ return s.charAt(0).toUpperCase() + s.slice(1);
160
+ }
161
+ //# sourceMappingURL=draft-generator.js.map
@@ -0,0 +1,18 @@
1
+ export interface ExtractedContent {
2
+ text: string;
3
+ metadata: {
4
+ title?: string;
5
+ author?: string;
6
+ pageCount?: number;
7
+ wordCount: number;
8
+ };
9
+ sourceFormat: 'pdf' | 'docx' | 'pptx' | 'xlsx' | 'xls' | 'text';
10
+ }
11
+ export declare function isBinaryFormat(filePath: string): boolean;
12
+ /**
13
+ * 파일 경로에서 텍스트 추출. 확장자 기반 파서 디스패치.
14
+ * 지원: .pdf, .docx, .pptx, .xlsx, .xls
15
+ * 미지원 확장자: utf-8 텍스트로 읽기
16
+ */
17
+ export declare function extractFileContent(filePath: string): Promise<ExtractedContent>;
18
+ //# sourceMappingURL=file-extractors.d.ts.map
@@ -0,0 +1,127 @@
1
+ // Design Ref: §file-ingest-v2 — Binary file text extraction dispatchers
2
+ // Plan SC: SC1-SC5 (format-specific extraction + fallback)
3
+ import { readFileSync } from 'node:fs';
4
+ import { extname, basename } from 'node:path';
5
+ const BINARY_EXTS = new Set(['.pdf', '.docx', '.pptx', '.xlsx', '.xls']);
6
+ export function isBinaryFormat(filePath) {
7
+ return BINARY_EXTS.has(extname(filePath).toLowerCase());
8
+ }
9
+ /**
10
+ * 파일 경로에서 텍스트 추출. 확장자 기반 파서 디스패치.
11
+ * 지원: .pdf, .docx, .pptx, .xlsx, .xls
12
+ * 미지원 확장자: utf-8 텍스트로 읽기
13
+ */
14
+ export async function extractFileContent(filePath) {
15
+ const ext = extname(filePath).toLowerCase();
16
+ const buffer = readFileSync(filePath);
17
+ switch (ext) {
18
+ case '.pdf': return extractPdf(buffer, filePath);
19
+ case '.docx': return extractDocx(buffer, filePath);
20
+ case '.pptx': return extractPptx(buffer, filePath);
21
+ case '.xlsx': return extractXlsx(buffer, filePath);
22
+ case '.xls': return extractXlsx(buffer, filePath);
23
+ default: return extractText(filePath);
24
+ }
25
+ }
26
+ async function extractPdf(buffer, filePath) {
27
+ try {
28
+ const { extractText } = await import('unpdf');
29
+ const result = await extractText(new Uint8Array(buffer));
30
+ const text = Array.isArray(result.text) ? result.text.join('\n\n') : (result.text ?? '');
31
+ return {
32
+ text,
33
+ metadata: {
34
+ title: basename(filePath, '.pdf'),
35
+ pageCount: result.totalPages,
36
+ wordCount: text.split(/\s+/).filter(Boolean).length,
37
+ },
38
+ sourceFormat: 'pdf',
39
+ };
40
+ }
41
+ catch (err) {
42
+ console.error(`PDF extraction failed: ${err instanceof Error ? err.message : 'unknown'}`);
43
+ return fallback(filePath, 'pdf');
44
+ }
45
+ }
46
+ async function extractDocx(buffer, filePath) {
47
+ try {
48
+ const mammoth = await import('mammoth');
49
+ const result = await mammoth.default.extractRawText({ buffer });
50
+ const text = result.value ?? '';
51
+ return {
52
+ text,
53
+ metadata: {
54
+ title: basename(filePath, '.docx'),
55
+ wordCount: text.split(/\s+/).filter(Boolean).length,
56
+ },
57
+ sourceFormat: 'docx',
58
+ };
59
+ }
60
+ catch (err) {
61
+ console.error(`DOCX extraction failed: ${err instanceof Error ? err.message : 'unknown'}`);
62
+ return fallback(filePath, 'docx');
63
+ }
64
+ }
65
+ async function extractPptx(buffer, filePath) {
66
+ try {
67
+ const officeparser = await import('officeparser');
68
+ const text = String(await officeparser.default.parseOffice(buffer));
69
+ return {
70
+ text: text ?? '',
71
+ metadata: {
72
+ title: basename(filePath, '.pptx'),
73
+ wordCount: (text ?? '').split(/\s+/).filter(Boolean).length,
74
+ },
75
+ sourceFormat: 'pptx',
76
+ };
77
+ }
78
+ catch (err) {
79
+ console.error(`PPTX extraction failed: ${err instanceof Error ? err.message : 'unknown'}`);
80
+ return fallback(filePath, 'pptx');
81
+ }
82
+ }
83
+ async function extractXlsx(buffer, filePath) {
84
+ const ext = extname(filePath).toLowerCase();
85
+ const format = ext === '.xls' ? 'xls' : 'xlsx';
86
+ try {
87
+ const XLSX = await import('xlsx');
88
+ const workbook = XLSX.read(buffer);
89
+ const text = workbook.SheetNames
90
+ .map((name) => {
91
+ const csv = XLSX.utils.sheet_to_csv(workbook.Sheets[name]);
92
+ return `## ${name}\n\n${csv}`;
93
+ })
94
+ .join('\n\n');
95
+ return {
96
+ text,
97
+ metadata: {
98
+ title: basename(filePath, ext),
99
+ wordCount: text.split(/\s+/).filter(Boolean).length,
100
+ },
101
+ sourceFormat: format,
102
+ };
103
+ }
104
+ catch (err) {
105
+ console.error(`XLSX extraction failed: ${err instanceof Error ? err.message : 'unknown'}`);
106
+ return fallback(filePath, format);
107
+ }
108
+ }
109
+ function extractText(filePath) {
110
+ const text = readFileSync(filePath, 'utf-8');
111
+ return {
112
+ text,
113
+ metadata: {
114
+ title: basename(filePath),
115
+ wordCount: text.split(/\s+/).filter(Boolean).length,
116
+ },
117
+ sourceFormat: 'text',
118
+ };
119
+ }
120
+ function fallback(filePath, format) {
121
+ return {
122
+ text: `[Failed to extract text from ${basename(filePath)}. Install required parser or convert to a text format.]`,
123
+ metadata: { title: basename(filePath), wordCount: 0 },
124
+ sourceFormat: format,
125
+ };
126
+ }
127
+ //# sourceMappingURL=file-extractors.js.map
@@ -0,0 +1,32 @@
1
+ import { type FolderNames } from '../config.js';
2
+ export type NoteStage = 'fleeting' | 'literature' | 'permanent';
3
+ export interface IngestInput {
4
+ type: 'url' | 'text' | 'file' | 'youtube' | 'pdf-text' | 'pdf' | 'docx' | 'pptx' | 'xlsx' | 'xls';
5
+ content: string;
6
+ title?: string;
7
+ tags?: string[];
8
+ source?: string;
9
+ stage?: NoteStage;
10
+ }
11
+ export interface IngestResult {
12
+ savedTo: string;
13
+ stage: NoteStage;
14
+ title: string;
15
+ indexCode?: string;
16
+ tags: string[];
17
+ wordCount: number;
18
+ }
19
+ /**
20
+ * 어떤 입력이든 Stellavault 표준 포맷으로 변환하여 저장.
21
+ */
22
+ export declare function ingest(vaultPath: string, input: IngestInput, folders?: FolderNames): IngestResult;
23
+ /**
24
+ * 여러 입력을 배치 처리.
25
+ */
26
+ export declare function ingestBatch(vaultPath: string, inputs: IngestInput[]): IngestResult[];
27
+ /**
28
+ * 노트 승격: fleeting → literature → permanent.
29
+ * 내용이 충분히 정제되면 다음 단계로 이동.
30
+ */
31
+ export declare function promoteNote(vaultPath: string, filePath: string, targetStage: NoteStage, folders?: FolderNames): string;
32
+ //# sourceMappingURL=ingest-pipeline.d.ts.map
@@ -0,0 +1,209 @@
1
+ // 통합 인제스트 파이프라인
2
+ // 어떤 입력이든 → Stellavault 포맷으로 자동 변환 + 분류 + 연결
3
+ //
4
+ // 지원 입력: URL, PDF 텍스트, 마크다운, 플레인텍스트, YouTube
5
+ // 출력: frontmatter 포맷 .md → raw/ → compile → lint
6
+ import { writeFileSync, mkdirSync, existsSync, readFileSync } from 'node:fs';
7
+ import { join, resolve, basename } from 'node:path';
8
+ import { scanFrontmatter, assignIndexCodes, archiveFile } from './zettelkasten.js';
9
+ import { compileWiki } from './wiki-compiler.js';
10
+ import { DEFAULT_FOLDERS } from '../config.js';
11
+ /** YAML 값에서 위험한 문자를 이스케이프 */
12
+ function sanitizeYaml(val) {
13
+ return val.replace(/["\\]/g, '\\$&').replace(/\n/g, ' ').slice(0, 200);
14
+ }
15
+ /**
16
+ * 어떤 입력이든 Stellavault 표준 포맷으로 변환하여 저장.
17
+ */
18
+ export function ingest(vaultPath, input, folders = DEFAULT_FOLDERS) {
19
+ const stage = input.stage ?? 'fleeting';
20
+ const title = input.title ?? extractTitleFromContent(input.content, input.type);
21
+ const tags = input.tags ?? extractAutoTags(input.content, input.type);
22
+ const source = input.source ?? (input.type === 'url' || input.type === 'youtube' ? input.content.split('\n')[0] : 'manual');
23
+ // 본문 정리
24
+ const body = cleanContent(input.content, input.type);
25
+ const wordCount = body.split(/\s+/).length;
26
+ // 자동 분류: 길이/구조에 따라 stage 결정
27
+ const autoStage = classifyStage(body, stage, wordCount);
28
+ // 폴더 결정 (config-driven)
29
+ const folderMap = {
30
+ fleeting: folders.fleeting,
31
+ literature: folders.literature,
32
+ permanent: folders.permanent,
33
+ };
34
+ const folder = folderMap[autoStage];
35
+ const dir = resolve(vaultPath, folder);
36
+ if (!existsSync(dir))
37
+ mkdirSync(dir, { recursive: true });
38
+ // 파일명 생성
39
+ const now = new Date();
40
+ const timestamp = now.toISOString().replace(/[:.]/g, '-').slice(0, 19);
41
+ const slug = title.slice(0, 50).replace(/[^a-zA-Z0-9가-힣\s]/g, '').replace(/\s+/g, '-').toLowerCase();
42
+ const filename = `${timestamp}-${slug}.md`;
43
+ const filePath = join(folder, filename);
44
+ const fullPath = resolve(vaultPath, filePath);
45
+ // path traversal 방지
46
+ if (!fullPath.startsWith(resolve(vaultPath))) {
47
+ throw new Error('Invalid path');
48
+ }
49
+ // 인덱스 코드 생성 (lazy — 전체 스캔 건너뛸 수 있음)
50
+ let indexCode;
51
+ try {
52
+ // 성능: raw/ 폴더만 스캔 (전체 vault 스캔 대신)
53
+ const rawEntries = scanFrontmatter(resolve(vaultPath, folder));
54
+ const assignments = assignIndexCodes([...rawEntries, {
55
+ filePath, title, tags, connections: [], wordCount,
56
+ }]);
57
+ indexCode = assignments.get(filePath);
58
+ }
59
+ catch { /* index code is optional */ }
60
+ // Stellavault 표준 포맷으로 저장
61
+ const md = buildStandardNote({
62
+ title,
63
+ body,
64
+ tags,
65
+ stage: autoStage,
66
+ source,
67
+ indexCode,
68
+ created: now.toISOString(),
69
+ inputType: input.type,
70
+ });
71
+ writeFileSync(fullPath, md, 'utf-8');
72
+ // 자동 compile: fleeting → wiki (rule-based, <100ms)
73
+ try {
74
+ const rawDir = resolve(vaultPath, folders.fleeting);
75
+ const wikiDir = resolve(vaultPath, folders.wiki);
76
+ if (existsSync(rawDir)) {
77
+ compileWiki(rawDir, wikiDir);
78
+ }
79
+ }
80
+ catch { /* compile 실패해도 ingest 성공 */ }
81
+ return {
82
+ savedTo: filePath,
83
+ stage: autoStage,
84
+ title,
85
+ indexCode,
86
+ tags,
87
+ wordCount,
88
+ };
89
+ }
90
+ /**
91
+ * 여러 입력을 배치 처리.
92
+ */
93
+ export function ingestBatch(vaultPath, inputs) {
94
+ return inputs.map(input => ingest(vaultPath, input));
95
+ }
96
+ /**
97
+ * 노트 승격: fleeting → literature → permanent.
98
+ * 내용이 충분히 정제되면 다음 단계로 이동.
99
+ */
100
+ export function promoteNote(vaultPath, filePath, targetStage, folders = DEFAULT_FOLDERS) {
101
+ const fullPath = resolve(vaultPath, filePath);
102
+ if (!existsSync(fullPath))
103
+ throw new Error(`File not found: ${filePath}`);
104
+ const content = readFileSync(fullPath, 'utf-8');
105
+ // frontmatter의 type 변경
106
+ const updated = content.replace(/^type:\s*.+$/m, `type: ${targetStage}`);
107
+ // 대상 폴더로 이동 (config-driven)
108
+ const folderMap = {
109
+ fleeting: folders.fleeting,
110
+ literature: folders.literature,
111
+ permanent: folders.permanent,
112
+ };
113
+ const newDir = resolve(vaultPath, folderMap[targetStage]);
114
+ if (!existsSync(newDir))
115
+ mkdirSync(newDir, { recursive: true });
116
+ const newPath = join(folderMap[targetStage], basename(filePath));
117
+ const newFullPath = resolve(vaultPath, newPath);
118
+ if (!newFullPath.startsWith(resolve(vaultPath))) {
119
+ throw new Error('Invalid path');
120
+ }
121
+ writeFileSync(newFullPath, updated, 'utf-8');
122
+ // 원본에 archive 플래그
123
+ archiveFile(fullPath);
124
+ return newPath;
125
+ }
126
+ // ─── 내부 헬퍼 ───
127
+ function extractTitleFromContent(content, type) {
128
+ if (type === 'url' || type === 'youtube') {
129
+ // URL에서 도메인 + 경로 추출
130
+ try {
131
+ const url = new URL(content.split('\n')[0]);
132
+ return url.hostname + url.pathname.slice(0, 40);
133
+ }
134
+ catch {
135
+ return 'Untitled Clip';
136
+ }
137
+ }
138
+ // 첫 heading 또는 첫 줄
139
+ const heading = content.match(/^#\s+(.+)$/m);
140
+ if (heading)
141
+ return heading[1];
142
+ const firstLine = content.split('\n')[0].trim();
143
+ return firstLine.slice(0, 80) || 'Untitled';
144
+ }
145
+ function extractAutoTags(content, type) {
146
+ const tags = new Set();
147
+ // 입력 타입 태그
148
+ if (type === 'url')
149
+ tags.add('web-clip');
150
+ if (type === 'youtube')
151
+ tags.add('youtube');
152
+ if (type === 'pdf-text' || type === 'pdf')
153
+ tags.add('pdf');
154
+ if (type === 'docx')
155
+ tags.add('document');
156
+ if (type === 'pptx')
157
+ tags.add('presentation');
158
+ if (type === 'xlsx' || type === 'xls')
159
+ tags.add('spreadsheet');
160
+ // 인라인 #태그 추출
161
+ const inline = content.match(/#([a-zA-Z가-힣][a-zA-Z0-9가-힣_-]{2,})/g) ?? [];
162
+ inline.forEach(t => tags.add(t.slice(1)));
163
+ return [...tags].slice(0, 10);
164
+ }
165
+ function cleanContent(content, type) {
166
+ if (type === 'url' || type === 'youtube') {
167
+ // URL은 첫 줄이 URL, 나머지가 내용
168
+ const lines = content.split('\n');
169
+ return lines.slice(1).join('\n').trim() || lines[0];
170
+ }
171
+ return content.trim();
172
+ }
173
+ function classifyStage(body, requestedStage, wordCount) {
174
+ // 요청된 단계가 permanent면 그대로
175
+ if (requestedStage === 'permanent')
176
+ return 'permanent';
177
+ // 자동 분류 기준:
178
+ // - 200단어 미만 + 구조 없음 → fleeting
179
+ // - 200-1000단어 + 출처 있음 → literature
180
+ // - 1000단어+ + 구조 있음 → permanent 후보 (but 수동 승격 권장)
181
+ if (wordCount < 200 && !body.includes('## '))
182
+ return 'fleeting';
183
+ if (wordCount >= 200 && (body.includes('## ') || body.includes('> ')))
184
+ return 'literature';
185
+ return requestedStage;
186
+ }
187
+ function buildStandardNote(params) {
188
+ const lines = [
189
+ '---',
190
+ `title: "${sanitizeYaml(params.title)}"`,
191
+ `type: ${params.stage}`,
192
+ `source: ${params.source}`,
193
+ `input_type: ${params.inputType}`,
194
+ params.indexCode ? `zettel_id: "${params.indexCode}"` : null,
195
+ `tags: [${params.tags.map(t => `"${t}"`).join(', ')}]`,
196
+ `created: ${params.created}`,
197
+ `summary: "${sanitizeYaml(params.body.slice(0, 100))}"`,
198
+ '---',
199
+ '',
200
+ `# ${params.title}`,
201
+ '',
202
+ params.body,
203
+ '',
204
+ '---',
205
+ `*Ingested via \`stellavault ingest\` (${params.inputType}) at ${params.created}*`,
206
+ ];
207
+ return lines.filter(l => l !== null).join('\n');
208
+ }
209
+ //# sourceMappingURL=ingest-pipeline.js.map