escribano 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +297 -0
  3. package/dist/0_types.js +279 -0
  4. package/dist/actions/classify-session.js +77 -0
  5. package/dist/actions/create-contexts.js +44 -0
  6. package/dist/actions/create-topic-blocks.js +68 -0
  7. package/dist/actions/extract-metadata.js +24 -0
  8. package/dist/actions/generate-artifact-v3.js +296 -0
  9. package/dist/actions/generate-artifact.js +61 -0
  10. package/dist/actions/generate-summary-v3.js +260 -0
  11. package/dist/actions/outline-index.js +204 -0
  12. package/dist/actions/process-recording-v2.js +494 -0
  13. package/dist/actions/process-recording-v3.js +412 -0
  14. package/dist/actions/process-session.js +183 -0
  15. package/dist/actions/publish-summary-v3.js +303 -0
  16. package/dist/actions/sync-to-outline.js +196 -0
  17. package/dist/adapters/audio.silero.adapter.js +69 -0
  18. package/dist/adapters/cap.adapter.js +94 -0
  19. package/dist/adapters/capture.cap.adapter.js +107 -0
  20. package/dist/adapters/capture.filesystem.adapter.js +124 -0
  21. package/dist/adapters/embedding.ollama.adapter.js +141 -0
  22. package/dist/adapters/intelligence.adapter.js +202 -0
  23. package/dist/adapters/intelligence.mlx.adapter.js +395 -0
  24. package/dist/adapters/intelligence.ollama.adapter.js +741 -0
  25. package/dist/adapters/publishing.outline.adapter.js +75 -0
  26. package/dist/adapters/storage.adapter.js +81 -0
  27. package/dist/adapters/storage.fs.adapter.js +83 -0
  28. package/dist/adapters/transcription.whisper.adapter.js +206 -0
  29. package/dist/adapters/video.ffmpeg.adapter.js +405 -0
  30. package/dist/adapters/whisper.adapter.js +168 -0
  31. package/dist/batch-context.js +329 -0
  32. package/dist/db/helpers.js +50 -0
  33. package/dist/db/index.js +95 -0
  34. package/dist/db/migrate.js +80 -0
  35. package/dist/db/repositories/artifact.sqlite.js +77 -0
  36. package/dist/db/repositories/cluster.sqlite.js +92 -0
  37. package/dist/db/repositories/context.sqlite.js +75 -0
  38. package/dist/db/repositories/index.js +10 -0
  39. package/dist/db/repositories/observation.sqlite.js +70 -0
  40. package/dist/db/repositories/recording.sqlite.js +56 -0
  41. package/dist/db/repositories/subject.sqlite.js +64 -0
  42. package/dist/db/repositories/topic-block.sqlite.js +45 -0
  43. package/dist/db/types.js +4 -0
  44. package/dist/domain/classification.js +60 -0
  45. package/dist/domain/context.js +97 -0
  46. package/dist/domain/index.js +2 -0
  47. package/dist/domain/observation.js +17 -0
  48. package/dist/domain/recording.js +41 -0
  49. package/dist/domain/segment.js +93 -0
  50. package/dist/domain/session.js +93 -0
  51. package/dist/domain/time-range.js +38 -0
  52. package/dist/domain/transcript.js +79 -0
  53. package/dist/index.js +173 -0
  54. package/dist/pipeline/context.js +162 -0
  55. package/dist/pipeline/events.js +2 -0
  56. package/dist/prerequisites.js +226 -0
  57. package/dist/scripts/rebuild-index.js +53 -0
  58. package/dist/scripts/seed-fixtures.js +290 -0
  59. package/dist/services/activity-segmentation.js +333 -0
  60. package/dist/services/activity-segmentation.test.js +191 -0
  61. package/dist/services/app-normalization.js +212 -0
  62. package/dist/services/cluster-merge.js +69 -0
  63. package/dist/services/clustering.js +237 -0
  64. package/dist/services/debug.js +58 -0
  65. package/dist/services/frame-sampling.js +318 -0
  66. package/dist/services/signal-extraction.js +106 -0
  67. package/dist/services/subject-grouping.js +342 -0
  68. package/dist/services/temporal-alignment.js +99 -0
  69. package/dist/services/vlm-enrichment.js +84 -0
  70. package/dist/services/vlm-service.js +130 -0
  71. package/dist/stats/index.js +3 -0
  72. package/dist/stats/observer.js +65 -0
  73. package/dist/stats/repository.js +36 -0
  74. package/dist/stats/resource-tracker.js +86 -0
  75. package/dist/stats/types.js +1 -0
  76. package/dist/test-classification-prompts.js +181 -0
  77. package/dist/tests/cap.adapter.test.js +75 -0
  78. package/dist/tests/capture.cap.adapter.test.js +69 -0
  79. package/dist/tests/classify-session.test.js +140 -0
  80. package/dist/tests/db/repositories.test.js +243 -0
  81. package/dist/tests/domain/time-range.test.js +31 -0
  82. package/dist/tests/integration.test.js +84 -0
  83. package/dist/tests/intelligence.adapter.test.js +102 -0
  84. package/dist/tests/intelligence.ollama.adapter.test.js +178 -0
  85. package/dist/tests/process-v2.test.js +90 -0
  86. package/dist/tests/services/clustering.test.js +112 -0
  87. package/dist/tests/services/frame-sampling.test.js +152 -0
  88. package/dist/tests/utils/ocr.test.js +76 -0
  89. package/dist/tests/utils/parallel.test.js +57 -0
  90. package/dist/tests/visual-observer.test.js +175 -0
  91. package/dist/utils/id-normalization.js +15 -0
  92. package/dist/utils/index.js +9 -0
  93. package/dist/utils/model-detector.js +154 -0
  94. package/dist/utils/ocr.js +80 -0
  95. package/dist/utils/parallel.js +32 -0
  96. package/migrations/001_initial.sql +109 -0
  97. package/migrations/002_clusters.sql +41 -0
  98. package/migrations/003_observations_vlm_fields.sql +14 -0
  99. package/migrations/004_observations_unique.sql +18 -0
  100. package/migrations/005_processing_stats.sql +29 -0
  101. package/migrations/006_vlm_raw_response.sql +6 -0
  102. package/migrations/007_subjects.sql +23 -0
  103. package/migrations/008_artifacts_recording.sql +6 -0
  104. package/migrations/009_artifact_subjects.sql +10 -0
  105. package/package.json +82 -0
  106. package/prompts/action-items.md +55 -0
  107. package/prompts/blog-draft.md +54 -0
  108. package/prompts/blog-research.md +87 -0
  109. package/prompts/card.md +54 -0
  110. package/prompts/classify-segment.md +38 -0
  111. package/prompts/classify.md +37 -0
  112. package/prompts/code-snippets.md +163 -0
  113. package/prompts/extract-metadata.md +149 -0
  114. package/prompts/notes.md +83 -0
  115. package/prompts/runbook.md +123 -0
  116. package/prompts/standup.md +50 -0
  117. package/prompts/step-by-step.md +125 -0
  118. package/prompts/subject-grouping.md +31 -0
  119. package/prompts/summary-v3.md +89 -0
  120. package/prompts/summary.md +77 -0
  121. package/prompts/topic-classifier.md +24 -0
  122. package/prompts/topic-extract.md +13 -0
  123. package/prompts/vlm-batch.md +21 -0
  124. package/prompts/vlm-single.md +19 -0
@@ -0,0 +1,329 @@
1
+ /**
2
+ * Batch Processing Context
3
+ *
4
+ * Provides reusable initialization and video processing functions
5
+ * for batch operations (e.g., quality testing multiple recordings).
6
+ *
7
+ * Key Design Decisions:
8
+ * - Adapters initialized ONCE and reused across recordings
9
+ * - MLX bridge spawns once, reused for all videos (no socket conflicts)
10
+ * - Filesystem capture source created per-video (hardcoded to file input)
11
+ * - Results returned as objects (never throws) for reliable batch processing
12
+ */
13
+ import { execSync } from 'node:child_process';
14
+ import { homedir } from 'node:os';
15
+ import path from 'node:path';
16
+ import { generateArtifactV3, } from './actions/generate-artifact-v3.js';
17
+ import { updateGlobalIndex } from './actions/outline-index.js';
18
+ import { processRecordingV3 } from './actions/process-recording-v3.js';
19
+ import { hasContentChanged, publishSummaryV3, updateRecordingOutlineMetadata, } from './actions/publish-summary-v3.js';
20
+ import { createSileroPreprocessor } from './adapters/audio.silero.adapter.js';
21
+ import { createFilesystemCaptureSource } from './adapters/capture.filesystem.adapter.js';
22
+ import { cleanupMlxBridge, createMlxIntelligenceService, } from './adapters/intelligence.mlx.adapter.js';
23
+ import { createOllamaIntelligenceService } from './adapters/intelligence.ollama.adapter.js';
24
+ import { createOutlinePublishingService } from './adapters/publishing.outline.adapter.js';
25
+ import { createWhisperTranscriptionService } from './adapters/transcription.whisper.adapter.js';
26
+ import { createFfmpegVideoService } from './adapters/video.ffmpeg.adapter.js';
27
+ import { getDbPath, getRepositories } from './db/index.js';
28
+ import { log, setResourceTracker, step, withPipeline, } from './pipeline/context.js';
29
+ import { ResourceTracker, setupStatsObserver, } from './stats/index.js';
30
+ import { formatModelSelection, selectBestLLMModel, } from './utils/model-detector.js';
31
+ const MODELS_DIR = path.join(homedir(), '.escribano', 'models');
32
+ const MODEL_FILE = 'ggml-large-v3.bin';
33
+ const MODEL_PATH = path.join(MODELS_DIR, MODEL_FILE);
34
+ /**
35
+ * Initialize system components for batch processing.
36
+ * All adapters are created ONCE and reused across recordings.
37
+ */
38
+ export async function initializeSystem() {
39
+ console.log('Initializing database...');
40
+ const repos = getRepositories();
41
+ console.log(`Database ready: ${getDbPath()}`);
42
+ console.log('');
43
+ // Setup stats observer to capture pipeline events
44
+ setupStatsObserver(repos.stats);
45
+ // Detect best LLM model
46
+ const modelSelection = await selectBestLLMModel();
47
+ console.log(formatModelSelection(modelSelection));
48
+ console.log('');
49
+ // Initialize adapters ONCE
50
+ console.log('[VLM] Using MLX-VLM for image processing');
51
+ const vlm = createMlxIntelligenceService();
52
+ console.log('[LLM] Using Ollama for text generation');
53
+ const llm = createOllamaIntelligenceService();
54
+ const video = createFfmpegVideoService();
55
+ const preprocessor = createSileroPreprocessor();
56
+ const transcription = createWhisperTranscriptionService({
57
+ binaryPath: 'whisper-cli',
58
+ model: MODEL_PATH,
59
+ cwd: MODELS_DIR,
60
+ outputFormat: 'json',
61
+ });
62
+ // Setup resource tracking
63
+ const resourceTracker = new ResourceTracker();
64
+ resourceTracker.register(vlm);
65
+ resourceTracker.register(video);
66
+ resourceTracker.register(preprocessor);
67
+ // Ollama runs as a daemon - special case
68
+ resourceTracker.register({
69
+ getResourceName: () => 'ollama',
70
+ getPid: () => {
71
+ try {
72
+ const output = execSync('pgrep -f "ollama serve"').toString().trim();
73
+ const pid = parseInt(output.split('\n')[0] ?? '0', 10);
74
+ return pid > 0 ? pid : null;
75
+ }
76
+ catch {
77
+ return null;
78
+ }
79
+ },
80
+ });
81
+ setResourceTracker(resourceTracker);
82
+ const outlineConfig = getOutlineConfig();
83
+ return {
84
+ repos,
85
+ adapters: { vlm, llm, video, preprocessor, transcription },
86
+ resourceTracker,
87
+ outlineConfig,
88
+ };
89
+ }
90
+ /**
91
+ * Process a single video file.
92
+ *
93
+ * Note: Uses FilesystemCaptureSource (hardcoded for file input, not Cap recordings).
94
+ * The video adapter is from context, but capture source is created per-call.
95
+ */
96
+ export async function processVideo(videoPath, ctx, options = {}) {
97
+ const startTime = Date.now();
98
+ const { force = false, skipSummary = false, micAudioPath, systemAudioPath, format = 'card', includePersonal = false, copyToClipboard = false, printToStdout = false, } = options;
99
+ const { repos, adapters, outlineConfig } = ctx;
100
+ const { vlm, llm, video, preprocessor, transcription } = adapters;
101
+ try {
102
+ // Create capture source for this specific file
103
+ // Note: Hardcoded to filesystem source, not Cap recordings
104
+ const captureSource = createFilesystemCaptureSource({ videoPath, micAudioPath, systemAudioPath }, video);
105
+ // Get recording metadata
106
+ const recording = await captureSource.getLatestRecording();
107
+ if (!recording) {
108
+ return {
109
+ success: false,
110
+ recordingId: '',
111
+ videoPath,
112
+ error: `Failed to load video file: ${videoPath}`,
113
+ duration: (Date.now() - startTime) / 1000,
114
+ };
115
+ }
116
+ console.log(`\nProcessing recording: ${recording.id}`);
117
+ console.log(`Duration: ${Math.round(recording.duration / 60)} minutes`);
118
+ // Check/create DB recording
119
+ const dbRec = repos.recordings.findById(recording.id);
120
+ if (!dbRec) {
121
+ repos.recordings.save({
122
+ id: recording.id,
123
+ video_path: recording.videoPath,
124
+ audio_mic_path: recording.audioMicPath,
125
+ audio_system_path: recording.audioSystemPath,
126
+ duration: recording.duration,
127
+ captured_at: recording.capturedAt.toISOString(),
128
+ status: 'raw',
129
+ processing_step: null,
130
+ source_type: recording.source.type,
131
+ source_metadata: JSON.stringify(recording.source),
132
+ error_message: null,
133
+ });
134
+ console.log('Created database entry');
135
+ }
136
+ else if (force) {
137
+ console.log('Force flag set: clearing existing data');
138
+ repos.observations.deleteByRecording(recording.id);
139
+ repos.topicBlocks.deleteByRecording(recording.id);
140
+ repos.subjects.deleteByRecording(recording.id);
141
+ repos.recordings.updateStatus(recording.id, 'raw', null, null);
142
+ }
143
+ else if (dbRec.status === 'published' || dbRec.status === 'processed') {
144
+ console.log(`Recording already ${dbRec.status}. Regenerating artifact...`);
145
+ }
146
+ // Run VLM pipeline (skip if already processed or published)
147
+ const skipProcessing = dbRec &&
148
+ (dbRec.status === 'processed' || dbRec.status === 'published') &&
149
+ !force;
150
+ if (!skipProcessing) {
151
+ const runType = force
152
+ ? 'force'
153
+ : dbRec?.processing_step
154
+ ? 'resume'
155
+ : 'initial';
156
+ const runMetadata = collectRunMetadata(ctx.resourceTracker);
157
+ await withPipeline(recording.id, runType, runMetadata, async () => {
158
+ await processRecordingV3(recording.id, repos, { preprocessor, transcription, video, intelligence: vlm }, { force });
159
+ });
160
+ }
161
+ // Generate artifact and publish (unless skipped), tracked as a pipeline run
162
+ let artifact = null;
163
+ let outlineUrl;
164
+ if (!skipSummary) {
165
+ const artifactRunMetadata = collectRunMetadata(ctx.resourceTracker);
166
+ const pipelineResult = await withPipeline(recording.id, 'artifact', artifactRunMetadata, async () => {
167
+ console.log(`\nGenerating ${format} artifact...`);
168
+ const generatedArtifact = await generateArtifactV3(recording.id, repos, llm, {
169
+ recordingId: recording.id,
170
+ format,
171
+ includePersonal,
172
+ copyToClipboard,
173
+ printToStdout,
174
+ });
175
+ console.log(`Artifact saved: ${generatedArtifact.filePath}`);
176
+ if (generatedArtifact.workDuration > 0) {
177
+ const workMins = Math.round(generatedArtifact.workDuration / 60);
178
+ console.log(`Work time: ${workMins} minutes`);
179
+ }
180
+ if (generatedArtifact.personalDuration > 0 && !includePersonal) {
181
+ const personalMins = Math.round(generatedArtifact.personalDuration / 60);
182
+ console.log(`Personal time: ${personalMins} minutes (filtered)`);
183
+ }
184
+ // Publish to Outline (unless no config)
185
+ let publishedUrl;
186
+ if (outlineConfig) {
187
+ try {
188
+ await step('outline publish', async () => {
189
+ console.log('\nPublishing to Outline...');
190
+ const publishing = createOutlinePublishingService(outlineConfig);
191
+ const topicBlocks = repos.topicBlocks.findByRecording(recording.id);
192
+ const dbRecording = repos.recordings.findById(recording.id);
193
+ if (dbRecording &&
194
+ !hasContentChanged(dbRecording, generatedArtifact.content, format)) {
195
+ console.log('Content unchanged, skipping publish.');
196
+ }
197
+ else {
198
+ const published = await publishSummaryV3(recording.id, generatedArtifact.content, topicBlocks, repos, publishing, { collectionName: outlineConfig.collectionName, format });
199
+ const outlineInfo = {
200
+ url: published.url,
201
+ documentId: published.documentId,
202
+ collectionId: published.collectionId,
203
+ publishedAt: new Date().toISOString(),
204
+ contentHash: published.contentHash,
205
+ };
206
+ updateRecordingOutlineMetadata(recording.id, outlineInfo, repos, format);
207
+ console.log(`Published to Outline: ${published.url}`);
208
+ publishedUrl = published.url;
209
+ }
210
+ // Update status BEFORE rebuilding index so findByStatus('published') includes this recording
211
+ repos.recordings.updateStatus(recording.id, 'published', null, null);
212
+ log('info', `[Outline] Recording ${recording.id} status updated to 'published'`);
213
+ // Update global index (after status update so this recording is included)
214
+ if (publishedUrl) {
215
+ const indexResult = await updateGlobalIndex(repos, publishing, {
216
+ collectionName: outlineConfig.collectionName,
217
+ });
218
+ console.log(`Updated index: ${indexResult.url}`);
219
+ }
220
+ });
221
+ }
222
+ catch (error) {
223
+ const errorMessage = error.message;
224
+ console.warn(`Warning: Failed to publish to Outline: ${errorMessage}`);
225
+ log('warn', `[Outline] Publishing failed: ${errorMessage}`);
226
+ // Store error in metadata
227
+ try {
228
+ const dbRecording = repos.recordings.findById(recording.id);
229
+ const currentMetadata = dbRecording?.source_metadata
230
+ ? JSON.parse(dbRecording.source_metadata)
231
+ : {};
232
+ const existingOutline = currentMetadata.outline || {};
233
+ const updatedMetadata = {
234
+ ...currentMetadata,
235
+ outline: {
236
+ ...existingOutline,
237
+ error: errorMessage,
238
+ failedAt: new Date().toISOString(),
239
+ },
240
+ };
241
+ repos.recordings.updateMetadata(recording.id, JSON.stringify(updatedMetadata));
242
+ }
243
+ catch (metaError) {
244
+ log('error', `[Outline] Failed to store error metadata: ${metaError.message}`);
245
+ }
246
+ }
247
+ }
248
+ else {
249
+ console.log('No Outline configuration found. Marking as complete locally.');
250
+ repos.recordings.updateStatus(recording.id, 'published', null, null);
251
+ }
252
+ return { artifact: generatedArtifact, outlineUrl: publishedUrl };
253
+ });
254
+ artifact = pipelineResult.artifact;
255
+ outlineUrl = pipelineResult.outlineUrl;
256
+ }
257
+ console.log('\n✓ Complete!');
258
+ return {
259
+ success: true,
260
+ recordingId: recording.id,
261
+ videoPath,
262
+ artifactPath: artifact?.filePath,
263
+ outlineUrl,
264
+ duration: (Date.now() - startTime) / 1000,
265
+ format: artifact?.format,
266
+ workDuration: artifact?.workDuration,
267
+ personalDuration: artifact?.personalDuration,
268
+ };
269
+ }
270
+ catch (error) {
271
+ const errorMessage = error.message;
272
+ console.error(`\n✗ Failed: ${errorMessage}`);
273
+ return {
274
+ success: false,
275
+ recordingId: '',
276
+ videoPath,
277
+ error: errorMessage,
278
+ duration: (Date.now() - startTime) / 1000,
279
+ };
280
+ }
281
+ }
282
+ /**
283
+ * Get Outline configuration from environment if available.
284
+ */
285
+ function getOutlineConfig() {
286
+ const url = process.env.ESCRIBANO_OUTLINE_URL;
287
+ const token = process.env.ESCRIBANO_OUTLINE_TOKEN;
288
+ if (!url || !token) {
289
+ return null;
290
+ }
291
+ return {
292
+ url,
293
+ token,
294
+ collectionName: process.env.ESCRIBANO_OUTLINE_COLLECTION ?? 'Escribano Sessions',
295
+ };
296
+ }
297
+ /**
298
+ * Collect metadata about the current run.
299
+ */
300
+ function collectRunMetadata(resourceTracker) {
301
+ let commitHash = 'unknown';
302
+ try {
303
+ commitHash = execSync('git rev-parse --short HEAD', {
304
+ encoding: 'utf-8',
305
+ }).trim();
306
+ }
307
+ catch {
308
+ // Not in a git repo
309
+ }
310
+ const metadata = {
311
+ vlm_model: process.env.ESCRIBANO_VLM_MODEL ??
312
+ 'mlx-community/Qwen3-VL-2B-Instruct-bf16',
313
+ llm_model: process.env.ESCRIBANO_LLM_MODEL ?? 'auto-detected',
314
+ commit_hash: commitHash,
315
+ node_version: process.version,
316
+ platform: process.platform,
317
+ arch: process.arch,
318
+ env: {
319
+ ESCRIBANO_SAMPLE_INTERVAL: process.env.ESCRIBANO_SAMPLE_INTERVAL,
320
+ ESCRIBANO_VLM_BATCH_SIZE: process.env.ESCRIBANO_VLM_BATCH_SIZE,
321
+ ESCRIBANO_VERBOSE: process.env.ESCRIBANO_VERBOSE,
322
+ },
323
+ };
324
+ if (resourceTracker) {
325
+ metadata.system = resourceTracker.getSystemInfo();
326
+ }
327
+ return metadata;
328
+ }
329
+ export { cleanupMlxBridge };
@@ -0,0 +1,50 @@
1
+ /**
2
+ * Database Helpers
3
+ *
4
+ * Utilities for ID generation, embedding conversion, etc.
5
+ */
6
+ import { uuidv7 } from 'uuidv7';
7
+ /**
8
+ * Generate a time-sortable unique ID (UUIDv7)
9
+ */
10
+ export function generateId() {
11
+ return uuidv7();
12
+ }
13
+ /**
14
+ * Convert Float32Array embedding to Buffer for SQLite BLOB storage
15
+ */
16
+ export function embeddingToBlob(embedding) {
17
+ const float32 = new Float32Array(embedding);
18
+ return Buffer.from(float32.buffer);
19
+ }
20
+ /**
21
+ * Convert SQLite BLOB back to number array
22
+ */
23
+ export function blobToEmbedding(blob) {
24
+ const float32 = new Float32Array(blob.buffer, blob.byteOffset, blob.length / 4);
25
+ return Array.from(float32);
26
+ }
27
+ /**
28
+ * Compute cosine similarity between two embeddings
29
+ */
30
+ export function cosineSimilarity(a, b) {
31
+ if (a.length !== b.length) {
32
+ throw new Error(`Embedding dimension mismatch: ${a.length} vs ${b.length}`);
33
+ }
34
+ let dotProduct = 0;
35
+ let normA = 0;
36
+ let normB = 0;
37
+ for (let i = 0; i < a.length; i++) {
38
+ dotProduct += a[i] * b[i];
39
+ normA += a[i] * a[i];
40
+ normB += b[i] * b[i];
41
+ }
42
+ const denominator = Math.sqrt(normA) * Math.sqrt(normB);
43
+ return denominator === 0 ? 0 : dotProduct / denominator;
44
+ }
45
+ /**
46
+ * Get current ISO8601 timestamp for SQLite
47
+ */
48
+ export function nowISO() {
49
+ return new Date().toISOString();
50
+ }
@@ -0,0 +1,95 @@
1
+ /**
2
+ * Database Connection
3
+ *
4
+ * Singleton database connection with lazy initialization.
5
+ * Location: ~/.escribano/escribano.db
6
+ */
7
+ import { mkdirSync } from 'node:fs';
8
+ import os from 'node:os';
9
+ import { dirname, join } from 'node:path';
10
+ import Database from 'better-sqlite3';
11
+ import { createStatsRepository } from '../stats/repository.js';
12
+ import { runMigrations } from './migrate.js';
13
+ import { createSqliteArtifactRepository, createSqliteClusterRepository, createSqliteContextRepository, createSqliteObservationRepository, createSqliteRecordingRepository, createSqliteSubjectRepository, createSqliteTopicBlockRepository, } from './repositories/index.js';
14
+ const DB_PATH = join(os.homedir(), '.escribano', 'escribano.db');
15
+ let db = null;
16
+ let repositories = null;
17
+ /**
18
+ * Get database connection (internal)
19
+ */
20
+ function _getDb() {
21
+ if (db)
22
+ return db;
23
+ // Ensure directory exists
24
+ mkdirSync(dirname(DB_PATH), { recursive: true });
25
+ // Open database
26
+ db = new Database(DB_PATH);
27
+ // Configure pragmas for performance and safety
28
+ db.pragma('journal_mode = WAL');
29
+ db.pragma('synchronous = NORMAL');
30
+ db.pragma('foreign_keys = ON');
31
+ db.pragma('busy_timeout = 5000');
32
+ // Run migrations
33
+ runMigrations(db);
34
+ return db;
35
+ }
36
+ /**
37
+ * Ensure database is initialized
38
+ */
39
+ export function ensureDb() {
40
+ _getDb();
41
+ }
42
+ /**
43
+ * Get all repositories
44
+ */
45
+ export function getRepositories() {
46
+ if (repositories)
47
+ return repositories;
48
+ const dbInstance = _getDb();
49
+ repositories = {
50
+ recordings: createSqliteRecordingRepository(dbInstance),
51
+ observations: createSqliteObservationRepository(dbInstance),
52
+ contexts: createSqliteContextRepository(dbInstance),
53
+ topicBlocks: createSqliteTopicBlockRepository(dbInstance),
54
+ artifacts: createSqliteArtifactRepository(dbInstance),
55
+ clusters: createSqliteClusterRepository(dbInstance),
56
+ subjects: createSqliteSubjectRepository(dbInstance),
57
+ stats: createStatsRepository(dbInstance),
58
+ };
59
+ return repositories;
60
+ }
61
+ /**
62
+ * Create a fresh set of repositories for testing (using in-memory DB)
63
+ */
64
+ export function createTestRepositories() {
65
+ const testDb = new Database(':memory:');
66
+ testDb.pragma('foreign_keys = ON');
67
+ runMigrations(testDb);
68
+ return {
69
+ recordings: createSqliteRecordingRepository(testDb),
70
+ observations: createSqliteObservationRepository(testDb),
71
+ contexts: createSqliteContextRepository(testDb),
72
+ topicBlocks: createSqliteTopicBlockRepository(testDb),
73
+ artifacts: createSqliteArtifactRepository(testDb),
74
+ clusters: createSqliteClusterRepository(testDb),
75
+ subjects: createSqliteSubjectRepository(testDb),
76
+ stats: createStatsRepository(testDb),
77
+ cleanup: () => testDb.close(),
78
+ };
79
+ }
80
+ /**
81
+ * Close database connection
82
+ */
83
+ export function closeDb() {
84
+ if (db) {
85
+ db.close();
86
+ db = null;
87
+ repositories = null;
88
+ }
89
+ }
90
+ /**
91
+ * Get database path (useful for tests)
92
+ */
93
+ export function getDbPath() {
94
+ return DB_PATH;
95
+ }
@@ -0,0 +1,80 @@
1
+ /**
2
+ * Database Migration Runner
3
+ *
4
+ * Executes SQL migration files from /migrations directory.
5
+ * Tracks applied versions in _schema_version table.
6
+ */
7
+ import { readdirSync, readFileSync } from 'node:fs';
8
+ import { dirname, join } from 'node:path';
9
+ import { fileURLToPath } from 'node:url';
10
+ const __filename = fileURLToPath(import.meta.url);
11
+ const __dirname = dirname(__filename);
12
+ const MIGRATIONS_DIR = join(__dirname, '..', '..', 'migrations');
13
+ /**
14
+ * Get current schema version from database
15
+ */
16
+ function getCurrentVersion(db) {
17
+ try {
18
+ const row = db
19
+ .prepare('SELECT MAX(version) as version FROM _schema_version')
20
+ .get();
21
+ const version = row?.version;
22
+ return typeof version === 'number' ? version : 0;
23
+ }
24
+ catch {
25
+ // Table doesn't exist yet
26
+ return 0;
27
+ }
28
+ }
29
+ /**
30
+ * Load all migration files from /migrations directory
31
+ */
32
+ function loadMigrations() {
33
+ try {
34
+ const files = readdirSync(MIGRATIONS_DIR)
35
+ .filter((f) => f.endsWith('.sql'))
36
+ .sort();
37
+ return files.map((filename) => {
38
+ const match = filename.match(/^(\d+)_.+\.sql$/);
39
+ if (!match) {
40
+ throw new Error(`Invalid migration filename: ${filename}. Expected format: NNN_description.sql`);
41
+ }
42
+ const version = parseInt(match[1], 10);
43
+ const sql = readFileSync(join(MIGRATIONS_DIR, filename), 'utf-8');
44
+ return { version, filename, sql };
45
+ });
46
+ }
47
+ catch (error) {
48
+ console.error(`[db] Failed to load migrations from ${MIGRATIONS_DIR}:`, error);
49
+ return [];
50
+ }
51
+ }
52
+ /**
53
+ * Run all pending migrations
54
+ */
55
+ export function runMigrations(db) {
56
+ const currentVersion = getCurrentVersion(db);
57
+ const migrations = loadMigrations();
58
+ const pending = migrations.filter((m) => m.version > currentVersion);
59
+ if (pending.length === 0) {
60
+ console.log(`[db] Database is up to date (version ${currentVersion})`);
61
+ return { applied: [], currentVersion };
62
+ }
63
+ console.log(`[db] Found ${pending.length} pending migrations. Starting update...`);
64
+ const applied = [];
65
+ for (const migration of pending) {
66
+ console.log(`[db] Applying migration: ${migration.filename}`);
67
+ // Split migration into individual statements (simple split by ;)
68
+ // NOTE: This might fail if ; is inside a string, but for simple schemas it's fine.
69
+ // better-sqlite3 exec() can handle multiple statements.
70
+ db.exec(migration.sql);
71
+ // Update schema version
72
+ db.prepare('INSERT INTO _schema_version (version) VALUES (?)').run(migration.version);
73
+ applied.push(migration.filename);
74
+ }
75
+ const finalVersion = getCurrentVersion(db);
76
+ if (applied.length > 0) {
77
+ console.log(`[db] Migrations complete. Schema version: ${finalVersion}`);
78
+ }
79
+ return { applied, currentVersion: finalVersion };
80
+ }
@@ -0,0 +1,77 @@
1
+ /**
2
+ * Artifact Repository - SQLite Implementation
3
+ */
4
+ import { nowISO } from '../helpers.js';
5
+ export function createSqliteArtifactRepository(db) {
6
+ const stmts = {
7
+ findById: db.prepare('SELECT * FROM artifacts WHERE id = ?'),
8
+ findByType: db.prepare('SELECT * FROM artifacts WHERE type = ? ORDER BY created_at DESC'),
9
+ findByBlock: db.prepare(`
10
+ SELECT * FROM artifacts
11
+ WHERE source_block_ids LIKE ?
12
+ ORDER BY created_at DESC
13
+ `),
14
+ findByContext: db.prepare(`
15
+ SELECT * FROM artifacts
16
+ WHERE source_context_ids LIKE ?
17
+ ORDER BY created_at DESC
18
+ `),
19
+ findByRecording: db.prepare(`
20
+ SELECT * FROM artifacts
21
+ WHERE recording_id = ?
22
+ ORDER BY created_at DESC
23
+ `),
24
+ insert: db.prepare(`
25
+ INSERT INTO artifacts (
26
+ id, recording_id, type, content, format, source_block_ids, source_context_ids, created_at, updated_at
27
+ )
28
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
29
+ `),
30
+ update: db.prepare(`
31
+ UPDATE artifacts SET content = ?, updated_at = ? WHERE id = ?
32
+ `),
33
+ delete: db.prepare('DELETE FROM artifacts WHERE id = ?'),
34
+ deleteByRecording: db.prepare('DELETE FROM artifacts WHERE recording_id = ?'),
35
+ linkSubject: db.prepare('INSERT OR IGNORE INTO artifact_subjects (artifact_id, subject_id) VALUES (?, ?)'),
36
+ findSubjectsByArtifact: db.prepare('SELECT * FROM artifact_subjects WHERE artifact_id = ?'),
37
+ };
38
+ return {
39
+ findById(id) {
40
+ const row = stmts.findById.get(id);
41
+ return row ?? null;
42
+ },
43
+ findByType(type) {
44
+ return stmts.findByType.all(type);
45
+ },
46
+ findByBlock(blockId) {
47
+ return stmts.findByBlock.all(`%${blockId}%`);
48
+ },
49
+ findByContext(contextId) {
50
+ return stmts.findByContext.all(`%${contextId}%`);
51
+ },
52
+ findByRecording(recordingId) {
53
+ return stmts.findByRecording.all(recordingId);
54
+ },
55
+ save(artifact) {
56
+ const now = nowISO();
57
+ stmts.insert.run(artifact.id, artifact.recording_id ?? null, artifact.type, artifact.content, artifact.format, artifact.source_block_ids, artifact.source_context_ids, now, now);
58
+ },
59
+ update(id, content) {
60
+ stmts.update.run(content, nowISO(), id);
61
+ },
62
+ delete(id) {
63
+ stmts.delete.run(id);
64
+ },
65
+ deleteByRecording(recordingId) {
66
+ stmts.deleteByRecording.run(recordingId);
67
+ },
68
+ linkSubjects(artifactId, subjectIds) {
69
+ for (const subjectId of subjectIds) {
70
+ stmts.linkSubject.run(artifactId, subjectId);
71
+ }
72
+ },
73
+ findSubjectsByArtifact(artifactId) {
74
+ return stmts.findSubjectsByArtifact.all(artifactId);
75
+ },
76
+ };
77
+ }