escribano 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +297 -0
- package/dist/0_types.js +279 -0
- package/dist/actions/classify-session.js +77 -0
- package/dist/actions/create-contexts.js +44 -0
- package/dist/actions/create-topic-blocks.js +68 -0
- package/dist/actions/extract-metadata.js +24 -0
- package/dist/actions/generate-artifact-v3.js +296 -0
- package/dist/actions/generate-artifact.js +61 -0
- package/dist/actions/generate-summary-v3.js +260 -0
- package/dist/actions/outline-index.js +204 -0
- package/dist/actions/process-recording-v2.js +494 -0
- package/dist/actions/process-recording-v3.js +412 -0
- package/dist/actions/process-session.js +183 -0
- package/dist/actions/publish-summary-v3.js +303 -0
- package/dist/actions/sync-to-outline.js +196 -0
- package/dist/adapters/audio.silero.adapter.js +69 -0
- package/dist/adapters/cap.adapter.js +94 -0
- package/dist/adapters/capture.cap.adapter.js +107 -0
- package/dist/adapters/capture.filesystem.adapter.js +124 -0
- package/dist/adapters/embedding.ollama.adapter.js +141 -0
- package/dist/adapters/intelligence.adapter.js +202 -0
- package/dist/adapters/intelligence.mlx.adapter.js +395 -0
- package/dist/adapters/intelligence.ollama.adapter.js +741 -0
- package/dist/adapters/publishing.outline.adapter.js +75 -0
- package/dist/adapters/storage.adapter.js +81 -0
- package/dist/adapters/storage.fs.adapter.js +83 -0
- package/dist/adapters/transcription.whisper.adapter.js +206 -0
- package/dist/adapters/video.ffmpeg.adapter.js +405 -0
- package/dist/adapters/whisper.adapter.js +168 -0
- package/dist/batch-context.js +329 -0
- package/dist/db/helpers.js +50 -0
- package/dist/db/index.js +95 -0
- package/dist/db/migrate.js +80 -0
- package/dist/db/repositories/artifact.sqlite.js +77 -0
- package/dist/db/repositories/cluster.sqlite.js +92 -0
- package/dist/db/repositories/context.sqlite.js +75 -0
- package/dist/db/repositories/index.js +10 -0
- package/dist/db/repositories/observation.sqlite.js +70 -0
- package/dist/db/repositories/recording.sqlite.js +56 -0
- package/dist/db/repositories/subject.sqlite.js +64 -0
- package/dist/db/repositories/topic-block.sqlite.js +45 -0
- package/dist/db/types.js +4 -0
- package/dist/domain/classification.js +60 -0
- package/dist/domain/context.js +97 -0
- package/dist/domain/index.js +2 -0
- package/dist/domain/observation.js +17 -0
- package/dist/domain/recording.js +41 -0
- package/dist/domain/segment.js +93 -0
- package/dist/domain/session.js +93 -0
- package/dist/domain/time-range.js +38 -0
- package/dist/domain/transcript.js +79 -0
- package/dist/index.js +173 -0
- package/dist/pipeline/context.js +162 -0
- package/dist/pipeline/events.js +2 -0
- package/dist/prerequisites.js +226 -0
- package/dist/scripts/rebuild-index.js +53 -0
- package/dist/scripts/seed-fixtures.js +290 -0
- package/dist/services/activity-segmentation.js +333 -0
- package/dist/services/activity-segmentation.test.js +191 -0
- package/dist/services/app-normalization.js +212 -0
- package/dist/services/cluster-merge.js +69 -0
- package/dist/services/clustering.js +237 -0
- package/dist/services/debug.js +58 -0
- package/dist/services/frame-sampling.js +318 -0
- package/dist/services/signal-extraction.js +106 -0
- package/dist/services/subject-grouping.js +342 -0
- package/dist/services/temporal-alignment.js +99 -0
- package/dist/services/vlm-enrichment.js +84 -0
- package/dist/services/vlm-service.js +130 -0
- package/dist/stats/index.js +3 -0
- package/dist/stats/observer.js +65 -0
- package/dist/stats/repository.js +36 -0
- package/dist/stats/resource-tracker.js +86 -0
- package/dist/stats/types.js +1 -0
- package/dist/test-classification-prompts.js +181 -0
- package/dist/tests/cap.adapter.test.js +75 -0
- package/dist/tests/capture.cap.adapter.test.js +69 -0
- package/dist/tests/classify-session.test.js +140 -0
- package/dist/tests/db/repositories.test.js +243 -0
- package/dist/tests/domain/time-range.test.js +31 -0
- package/dist/tests/integration.test.js +84 -0
- package/dist/tests/intelligence.adapter.test.js +102 -0
- package/dist/tests/intelligence.ollama.adapter.test.js +178 -0
- package/dist/tests/process-v2.test.js +90 -0
- package/dist/tests/services/clustering.test.js +112 -0
- package/dist/tests/services/frame-sampling.test.js +152 -0
- package/dist/tests/utils/ocr.test.js +76 -0
- package/dist/tests/utils/parallel.test.js +57 -0
- package/dist/tests/visual-observer.test.js +175 -0
- package/dist/utils/id-normalization.js +15 -0
- package/dist/utils/index.js +9 -0
- package/dist/utils/model-detector.js +154 -0
- package/dist/utils/ocr.js +80 -0
- package/dist/utils/parallel.js +32 -0
- package/migrations/001_initial.sql +109 -0
- package/migrations/002_clusters.sql +41 -0
- package/migrations/003_observations_vlm_fields.sql +14 -0
- package/migrations/004_observations_unique.sql +18 -0
- package/migrations/005_processing_stats.sql +29 -0
- package/migrations/006_vlm_raw_response.sql +6 -0
- package/migrations/007_subjects.sql +23 -0
- package/migrations/008_artifacts_recording.sql +6 -0
- package/migrations/009_artifact_subjects.sql +10 -0
- package/package.json +82 -0
- package/prompts/action-items.md +55 -0
- package/prompts/blog-draft.md +54 -0
- package/prompts/blog-research.md +87 -0
- package/prompts/card.md +54 -0
- package/prompts/classify-segment.md +38 -0
- package/prompts/classify.md +37 -0
- package/prompts/code-snippets.md +163 -0
- package/prompts/extract-metadata.md +149 -0
- package/prompts/notes.md +83 -0
- package/prompts/runbook.md +123 -0
- package/prompts/standup.md +50 -0
- package/prompts/step-by-step.md +125 -0
- package/prompts/subject-grouping.md +31 -0
- package/prompts/summary-v3.md +89 -0
- package/prompts/summary.md +77 -0
- package/prompts/topic-classifier.md +24 -0
- package/prompts/topic-extract.md +13 -0
- package/prompts/vlm-batch.md +21 -0
- package/prompts/vlm-single.md +19 -0
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Batch Processing Context
|
|
3
|
+
*
|
|
4
|
+
* Provides reusable initialization and video processing functions
|
|
5
|
+
* for batch operations (e.g., quality testing multiple recordings).
|
|
6
|
+
*
|
|
7
|
+
* Key Design Decisions:
|
|
8
|
+
* - Adapters initialized ONCE and reused across recordings
|
|
9
|
+
* - MLX bridge spawns once, reused for all videos (no socket conflicts)
|
|
10
|
+
* - Filesystem capture source created per-video (hardcoded to file input)
|
|
11
|
+
* - Results returned as objects (never throws) for reliable batch processing
|
|
12
|
+
*/
|
|
13
|
+
import { execSync } from 'node:child_process';
|
|
14
|
+
import { homedir } from 'node:os';
|
|
15
|
+
import path from 'node:path';
|
|
16
|
+
import { generateArtifactV3, } from './actions/generate-artifact-v3.js';
|
|
17
|
+
import { updateGlobalIndex } from './actions/outline-index.js';
|
|
18
|
+
import { processRecordingV3 } from './actions/process-recording-v3.js';
|
|
19
|
+
import { hasContentChanged, publishSummaryV3, updateRecordingOutlineMetadata, } from './actions/publish-summary-v3.js';
|
|
20
|
+
import { createSileroPreprocessor } from './adapters/audio.silero.adapter.js';
|
|
21
|
+
import { createFilesystemCaptureSource } from './adapters/capture.filesystem.adapter.js';
|
|
22
|
+
import { cleanupMlxBridge, createMlxIntelligenceService, } from './adapters/intelligence.mlx.adapter.js';
|
|
23
|
+
import { createOllamaIntelligenceService } from './adapters/intelligence.ollama.adapter.js';
|
|
24
|
+
import { createOutlinePublishingService } from './adapters/publishing.outline.adapter.js';
|
|
25
|
+
import { createWhisperTranscriptionService } from './adapters/transcription.whisper.adapter.js';
|
|
26
|
+
import { createFfmpegVideoService } from './adapters/video.ffmpeg.adapter.js';
|
|
27
|
+
import { getDbPath, getRepositories } from './db/index.js';
|
|
28
|
+
import { log, setResourceTracker, step, withPipeline, } from './pipeline/context.js';
|
|
29
|
+
import { ResourceTracker, setupStatsObserver, } from './stats/index.js';
|
|
30
|
+
import { formatModelSelection, selectBestLLMModel, } from './utils/model-detector.js';
|
|
31
|
+
const MODELS_DIR = path.join(homedir(), '.escribano', 'models');
|
|
32
|
+
const MODEL_FILE = 'ggml-large-v3.bin';
|
|
33
|
+
const MODEL_PATH = path.join(MODELS_DIR, MODEL_FILE);
|
|
34
|
+
/**
|
|
35
|
+
* Initialize system components for batch processing.
|
|
36
|
+
* All adapters are created ONCE and reused across recordings.
|
|
37
|
+
*/
|
|
38
|
+
export async function initializeSystem() {
|
|
39
|
+
console.log('Initializing database...');
|
|
40
|
+
const repos = getRepositories();
|
|
41
|
+
console.log(`Database ready: ${getDbPath()}`);
|
|
42
|
+
console.log('');
|
|
43
|
+
// Setup stats observer to capture pipeline events
|
|
44
|
+
setupStatsObserver(repos.stats);
|
|
45
|
+
// Detect best LLM model
|
|
46
|
+
const modelSelection = await selectBestLLMModel();
|
|
47
|
+
console.log(formatModelSelection(modelSelection));
|
|
48
|
+
console.log('');
|
|
49
|
+
// Initialize adapters ONCE
|
|
50
|
+
console.log('[VLM] Using MLX-VLM for image processing');
|
|
51
|
+
const vlm = createMlxIntelligenceService();
|
|
52
|
+
console.log('[LLM] Using Ollama for text generation');
|
|
53
|
+
const llm = createOllamaIntelligenceService();
|
|
54
|
+
const video = createFfmpegVideoService();
|
|
55
|
+
const preprocessor = createSileroPreprocessor();
|
|
56
|
+
const transcription = createWhisperTranscriptionService({
|
|
57
|
+
binaryPath: 'whisper-cli',
|
|
58
|
+
model: MODEL_PATH,
|
|
59
|
+
cwd: MODELS_DIR,
|
|
60
|
+
outputFormat: 'json',
|
|
61
|
+
});
|
|
62
|
+
// Setup resource tracking
|
|
63
|
+
const resourceTracker = new ResourceTracker();
|
|
64
|
+
resourceTracker.register(vlm);
|
|
65
|
+
resourceTracker.register(video);
|
|
66
|
+
resourceTracker.register(preprocessor);
|
|
67
|
+
// Ollama runs as a daemon - special case
|
|
68
|
+
resourceTracker.register({
|
|
69
|
+
getResourceName: () => 'ollama',
|
|
70
|
+
getPid: () => {
|
|
71
|
+
try {
|
|
72
|
+
const output = execSync('pgrep -f "ollama serve"').toString().trim();
|
|
73
|
+
const pid = parseInt(output.split('\n')[0] ?? '0', 10);
|
|
74
|
+
return pid > 0 ? pid : null;
|
|
75
|
+
}
|
|
76
|
+
catch {
|
|
77
|
+
return null;
|
|
78
|
+
}
|
|
79
|
+
},
|
|
80
|
+
});
|
|
81
|
+
setResourceTracker(resourceTracker);
|
|
82
|
+
const outlineConfig = getOutlineConfig();
|
|
83
|
+
return {
|
|
84
|
+
repos,
|
|
85
|
+
adapters: { vlm, llm, video, preprocessor, transcription },
|
|
86
|
+
resourceTracker,
|
|
87
|
+
outlineConfig,
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Process a single video file.
|
|
92
|
+
*
|
|
93
|
+
* Note: Uses FilesystemCaptureSource (hardcoded for file input, not Cap recordings).
|
|
94
|
+
* The video adapter is from context, but capture source is created per-call.
|
|
95
|
+
*/
|
|
96
|
+
export async function processVideo(videoPath, ctx, options = {}) {
|
|
97
|
+
const startTime = Date.now();
|
|
98
|
+
const { force = false, skipSummary = false, micAudioPath, systemAudioPath, format = 'card', includePersonal = false, copyToClipboard = false, printToStdout = false, } = options;
|
|
99
|
+
const { repos, adapters, outlineConfig } = ctx;
|
|
100
|
+
const { vlm, llm, video, preprocessor, transcription } = adapters;
|
|
101
|
+
try {
|
|
102
|
+
// Create capture source for this specific file
|
|
103
|
+
// Note: Hardcoded to filesystem source, not Cap recordings
|
|
104
|
+
const captureSource = createFilesystemCaptureSource({ videoPath, micAudioPath, systemAudioPath }, video);
|
|
105
|
+
// Get recording metadata
|
|
106
|
+
const recording = await captureSource.getLatestRecording();
|
|
107
|
+
if (!recording) {
|
|
108
|
+
return {
|
|
109
|
+
success: false,
|
|
110
|
+
recordingId: '',
|
|
111
|
+
videoPath,
|
|
112
|
+
error: `Failed to load video file: ${videoPath}`,
|
|
113
|
+
duration: (Date.now() - startTime) / 1000,
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
console.log(`\nProcessing recording: ${recording.id}`);
|
|
117
|
+
console.log(`Duration: ${Math.round(recording.duration / 60)} minutes`);
|
|
118
|
+
// Check/create DB recording
|
|
119
|
+
const dbRec = repos.recordings.findById(recording.id);
|
|
120
|
+
if (!dbRec) {
|
|
121
|
+
repos.recordings.save({
|
|
122
|
+
id: recording.id,
|
|
123
|
+
video_path: recording.videoPath,
|
|
124
|
+
audio_mic_path: recording.audioMicPath,
|
|
125
|
+
audio_system_path: recording.audioSystemPath,
|
|
126
|
+
duration: recording.duration,
|
|
127
|
+
captured_at: recording.capturedAt.toISOString(),
|
|
128
|
+
status: 'raw',
|
|
129
|
+
processing_step: null,
|
|
130
|
+
source_type: recording.source.type,
|
|
131
|
+
source_metadata: JSON.stringify(recording.source),
|
|
132
|
+
error_message: null,
|
|
133
|
+
});
|
|
134
|
+
console.log('Created database entry');
|
|
135
|
+
}
|
|
136
|
+
else if (force) {
|
|
137
|
+
console.log('Force flag set: clearing existing data');
|
|
138
|
+
repos.observations.deleteByRecording(recording.id);
|
|
139
|
+
repos.topicBlocks.deleteByRecording(recording.id);
|
|
140
|
+
repos.subjects.deleteByRecording(recording.id);
|
|
141
|
+
repos.recordings.updateStatus(recording.id, 'raw', null, null);
|
|
142
|
+
}
|
|
143
|
+
else if (dbRec.status === 'published' || dbRec.status === 'processed') {
|
|
144
|
+
console.log(`Recording already ${dbRec.status}. Regenerating artifact...`);
|
|
145
|
+
}
|
|
146
|
+
// Run VLM pipeline (skip if already processed or published)
|
|
147
|
+
const skipProcessing = dbRec &&
|
|
148
|
+
(dbRec.status === 'processed' || dbRec.status === 'published') &&
|
|
149
|
+
!force;
|
|
150
|
+
if (!skipProcessing) {
|
|
151
|
+
const runType = force
|
|
152
|
+
? 'force'
|
|
153
|
+
: dbRec?.processing_step
|
|
154
|
+
? 'resume'
|
|
155
|
+
: 'initial';
|
|
156
|
+
const runMetadata = collectRunMetadata(ctx.resourceTracker);
|
|
157
|
+
await withPipeline(recording.id, runType, runMetadata, async () => {
|
|
158
|
+
await processRecordingV3(recording.id, repos, { preprocessor, transcription, video, intelligence: vlm }, { force });
|
|
159
|
+
});
|
|
160
|
+
}
|
|
161
|
+
// Generate artifact and publish (unless skipped), tracked as a pipeline run
|
|
162
|
+
let artifact = null;
|
|
163
|
+
let outlineUrl;
|
|
164
|
+
if (!skipSummary) {
|
|
165
|
+
const artifactRunMetadata = collectRunMetadata(ctx.resourceTracker);
|
|
166
|
+
const pipelineResult = await withPipeline(recording.id, 'artifact', artifactRunMetadata, async () => {
|
|
167
|
+
console.log(`\nGenerating ${format} artifact...`);
|
|
168
|
+
const generatedArtifact = await generateArtifactV3(recording.id, repos, llm, {
|
|
169
|
+
recordingId: recording.id,
|
|
170
|
+
format,
|
|
171
|
+
includePersonal,
|
|
172
|
+
copyToClipboard,
|
|
173
|
+
printToStdout,
|
|
174
|
+
});
|
|
175
|
+
console.log(`Artifact saved: ${generatedArtifact.filePath}`);
|
|
176
|
+
if (generatedArtifact.workDuration > 0) {
|
|
177
|
+
const workMins = Math.round(generatedArtifact.workDuration / 60);
|
|
178
|
+
console.log(`Work time: ${workMins} minutes`);
|
|
179
|
+
}
|
|
180
|
+
if (generatedArtifact.personalDuration > 0 && !includePersonal) {
|
|
181
|
+
const personalMins = Math.round(generatedArtifact.personalDuration / 60);
|
|
182
|
+
console.log(`Personal time: ${personalMins} minutes (filtered)`);
|
|
183
|
+
}
|
|
184
|
+
// Publish to Outline (unless no config)
|
|
185
|
+
let publishedUrl;
|
|
186
|
+
if (outlineConfig) {
|
|
187
|
+
try {
|
|
188
|
+
await step('outline publish', async () => {
|
|
189
|
+
console.log('\nPublishing to Outline...');
|
|
190
|
+
const publishing = createOutlinePublishingService(outlineConfig);
|
|
191
|
+
const topicBlocks = repos.topicBlocks.findByRecording(recording.id);
|
|
192
|
+
const dbRecording = repos.recordings.findById(recording.id);
|
|
193
|
+
if (dbRecording &&
|
|
194
|
+
!hasContentChanged(dbRecording, generatedArtifact.content, format)) {
|
|
195
|
+
console.log('Content unchanged, skipping publish.');
|
|
196
|
+
}
|
|
197
|
+
else {
|
|
198
|
+
const published = await publishSummaryV3(recording.id, generatedArtifact.content, topicBlocks, repos, publishing, { collectionName: outlineConfig.collectionName, format });
|
|
199
|
+
const outlineInfo = {
|
|
200
|
+
url: published.url,
|
|
201
|
+
documentId: published.documentId,
|
|
202
|
+
collectionId: published.collectionId,
|
|
203
|
+
publishedAt: new Date().toISOString(),
|
|
204
|
+
contentHash: published.contentHash,
|
|
205
|
+
};
|
|
206
|
+
updateRecordingOutlineMetadata(recording.id, outlineInfo, repos, format);
|
|
207
|
+
console.log(`Published to Outline: ${published.url}`);
|
|
208
|
+
publishedUrl = published.url;
|
|
209
|
+
}
|
|
210
|
+
// Update status BEFORE rebuilding index so findByStatus('published') includes this recording
|
|
211
|
+
repos.recordings.updateStatus(recording.id, 'published', null, null);
|
|
212
|
+
log('info', `[Outline] Recording ${recording.id} status updated to 'published'`);
|
|
213
|
+
// Update global index (after status update so this recording is included)
|
|
214
|
+
if (publishedUrl) {
|
|
215
|
+
const indexResult = await updateGlobalIndex(repos, publishing, {
|
|
216
|
+
collectionName: outlineConfig.collectionName,
|
|
217
|
+
});
|
|
218
|
+
console.log(`Updated index: ${indexResult.url}`);
|
|
219
|
+
}
|
|
220
|
+
});
|
|
221
|
+
}
|
|
222
|
+
catch (error) {
|
|
223
|
+
const errorMessage = error.message;
|
|
224
|
+
console.warn(`Warning: Failed to publish to Outline: ${errorMessage}`);
|
|
225
|
+
log('warn', `[Outline] Publishing failed: ${errorMessage}`);
|
|
226
|
+
// Store error in metadata
|
|
227
|
+
try {
|
|
228
|
+
const dbRecording = repos.recordings.findById(recording.id);
|
|
229
|
+
const currentMetadata = dbRecording?.source_metadata
|
|
230
|
+
? JSON.parse(dbRecording.source_metadata)
|
|
231
|
+
: {};
|
|
232
|
+
const existingOutline = currentMetadata.outline || {};
|
|
233
|
+
const updatedMetadata = {
|
|
234
|
+
...currentMetadata,
|
|
235
|
+
outline: {
|
|
236
|
+
...existingOutline,
|
|
237
|
+
error: errorMessage,
|
|
238
|
+
failedAt: new Date().toISOString(),
|
|
239
|
+
},
|
|
240
|
+
};
|
|
241
|
+
repos.recordings.updateMetadata(recording.id, JSON.stringify(updatedMetadata));
|
|
242
|
+
}
|
|
243
|
+
catch (metaError) {
|
|
244
|
+
log('error', `[Outline] Failed to store error metadata: ${metaError.message}`);
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
else {
|
|
249
|
+
console.log('No Outline configuration found. Marking as complete locally.');
|
|
250
|
+
repos.recordings.updateStatus(recording.id, 'published', null, null);
|
|
251
|
+
}
|
|
252
|
+
return { artifact: generatedArtifact, outlineUrl: publishedUrl };
|
|
253
|
+
});
|
|
254
|
+
artifact = pipelineResult.artifact;
|
|
255
|
+
outlineUrl = pipelineResult.outlineUrl;
|
|
256
|
+
}
|
|
257
|
+
console.log('\n✓ Complete!');
|
|
258
|
+
return {
|
|
259
|
+
success: true,
|
|
260
|
+
recordingId: recording.id,
|
|
261
|
+
videoPath,
|
|
262
|
+
artifactPath: artifact?.filePath,
|
|
263
|
+
outlineUrl,
|
|
264
|
+
duration: (Date.now() - startTime) / 1000,
|
|
265
|
+
format: artifact?.format,
|
|
266
|
+
workDuration: artifact?.workDuration,
|
|
267
|
+
personalDuration: artifact?.personalDuration,
|
|
268
|
+
};
|
|
269
|
+
}
|
|
270
|
+
catch (error) {
|
|
271
|
+
const errorMessage = error.message;
|
|
272
|
+
console.error(`\n✗ Failed: ${errorMessage}`);
|
|
273
|
+
return {
|
|
274
|
+
success: false,
|
|
275
|
+
recordingId: '',
|
|
276
|
+
videoPath,
|
|
277
|
+
error: errorMessage,
|
|
278
|
+
duration: (Date.now() - startTime) / 1000,
|
|
279
|
+
};
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
/**
|
|
283
|
+
* Get Outline configuration from environment if available.
|
|
284
|
+
*/
|
|
285
|
+
function getOutlineConfig() {
|
|
286
|
+
const url = process.env.ESCRIBANO_OUTLINE_URL;
|
|
287
|
+
const token = process.env.ESCRIBANO_OUTLINE_TOKEN;
|
|
288
|
+
if (!url || !token) {
|
|
289
|
+
return null;
|
|
290
|
+
}
|
|
291
|
+
return {
|
|
292
|
+
url,
|
|
293
|
+
token,
|
|
294
|
+
collectionName: process.env.ESCRIBANO_OUTLINE_COLLECTION ?? 'Escribano Sessions',
|
|
295
|
+
};
|
|
296
|
+
}
|
|
297
|
+
/**
|
|
298
|
+
* Collect metadata about the current run.
|
|
299
|
+
*/
|
|
300
|
+
function collectRunMetadata(resourceTracker) {
|
|
301
|
+
let commitHash = 'unknown';
|
|
302
|
+
try {
|
|
303
|
+
commitHash = execSync('git rev-parse --short HEAD', {
|
|
304
|
+
encoding: 'utf-8',
|
|
305
|
+
}).trim();
|
|
306
|
+
}
|
|
307
|
+
catch {
|
|
308
|
+
// Not in a git repo
|
|
309
|
+
}
|
|
310
|
+
const metadata = {
|
|
311
|
+
vlm_model: process.env.ESCRIBANO_VLM_MODEL ??
|
|
312
|
+
'mlx-community/Qwen3-VL-2B-Instruct-bf16',
|
|
313
|
+
llm_model: process.env.ESCRIBANO_LLM_MODEL ?? 'auto-detected',
|
|
314
|
+
commit_hash: commitHash,
|
|
315
|
+
node_version: process.version,
|
|
316
|
+
platform: process.platform,
|
|
317
|
+
arch: process.arch,
|
|
318
|
+
env: {
|
|
319
|
+
ESCRIBANO_SAMPLE_INTERVAL: process.env.ESCRIBANO_SAMPLE_INTERVAL,
|
|
320
|
+
ESCRIBANO_VLM_BATCH_SIZE: process.env.ESCRIBANO_VLM_BATCH_SIZE,
|
|
321
|
+
ESCRIBANO_VERBOSE: process.env.ESCRIBANO_VERBOSE,
|
|
322
|
+
},
|
|
323
|
+
};
|
|
324
|
+
if (resourceTracker) {
|
|
325
|
+
metadata.system = resourceTracker.getSystemInfo();
|
|
326
|
+
}
|
|
327
|
+
return metadata;
|
|
328
|
+
}
|
|
329
|
+
export { cleanupMlxBridge };
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Database Helpers
|
|
3
|
+
*
|
|
4
|
+
* Utilities for ID generation, embedding conversion, etc.
|
|
5
|
+
*/
|
|
6
|
+
import { uuidv7 } from 'uuidv7';
|
|
7
|
+
/**
|
|
8
|
+
* Generate a time-sortable unique ID (UUIDv7)
|
|
9
|
+
*/
|
|
10
|
+
export function generateId() {
|
|
11
|
+
return uuidv7();
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Convert Float32Array embedding to Buffer for SQLite BLOB storage
|
|
15
|
+
*/
|
|
16
|
+
export function embeddingToBlob(embedding) {
|
|
17
|
+
const float32 = new Float32Array(embedding);
|
|
18
|
+
return Buffer.from(float32.buffer);
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Convert SQLite BLOB back to number array
|
|
22
|
+
*/
|
|
23
|
+
export function blobToEmbedding(blob) {
|
|
24
|
+
const float32 = new Float32Array(blob.buffer, blob.byteOffset, blob.length / 4);
|
|
25
|
+
return Array.from(float32);
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Compute cosine similarity between two embeddings
|
|
29
|
+
*/
|
|
30
|
+
export function cosineSimilarity(a, b) {
|
|
31
|
+
if (a.length !== b.length) {
|
|
32
|
+
throw new Error(`Embedding dimension mismatch: ${a.length} vs ${b.length}`);
|
|
33
|
+
}
|
|
34
|
+
let dotProduct = 0;
|
|
35
|
+
let normA = 0;
|
|
36
|
+
let normB = 0;
|
|
37
|
+
for (let i = 0; i < a.length; i++) {
|
|
38
|
+
dotProduct += a[i] * b[i];
|
|
39
|
+
normA += a[i] * a[i];
|
|
40
|
+
normB += b[i] * b[i];
|
|
41
|
+
}
|
|
42
|
+
const denominator = Math.sqrt(normA) * Math.sqrt(normB);
|
|
43
|
+
return denominator === 0 ? 0 : dotProduct / denominator;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Get current ISO8601 timestamp for SQLite
|
|
47
|
+
*/
|
|
48
|
+
export function nowISO() {
|
|
49
|
+
return new Date().toISOString();
|
|
50
|
+
}
|
package/dist/db/index.js
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Database Connection
|
|
3
|
+
*
|
|
4
|
+
* Singleton database connection with lazy initialization.
|
|
5
|
+
* Location: ~/.escribano/escribano.db
|
|
6
|
+
*/
|
|
7
|
+
import { mkdirSync } from 'node:fs';
|
|
8
|
+
import os from 'node:os';
|
|
9
|
+
import { dirname, join } from 'node:path';
|
|
10
|
+
import Database from 'better-sqlite3';
|
|
11
|
+
import { createStatsRepository } from '../stats/repository.js';
|
|
12
|
+
import { runMigrations } from './migrate.js';
|
|
13
|
+
import { createSqliteArtifactRepository, createSqliteClusterRepository, createSqliteContextRepository, createSqliteObservationRepository, createSqliteRecordingRepository, createSqliteSubjectRepository, createSqliteTopicBlockRepository, } from './repositories/index.js';
|
|
14
|
+
const DB_PATH = join(os.homedir(), '.escribano', 'escribano.db');
|
|
15
|
+
let db = null;
|
|
16
|
+
let repositories = null;
|
|
17
|
+
/**
|
|
18
|
+
* Get database connection (internal)
|
|
19
|
+
*/
|
|
20
|
+
function _getDb() {
|
|
21
|
+
if (db)
|
|
22
|
+
return db;
|
|
23
|
+
// Ensure directory exists
|
|
24
|
+
mkdirSync(dirname(DB_PATH), { recursive: true });
|
|
25
|
+
// Open database
|
|
26
|
+
db = new Database(DB_PATH);
|
|
27
|
+
// Configure pragmas for performance and safety
|
|
28
|
+
db.pragma('journal_mode = WAL');
|
|
29
|
+
db.pragma('synchronous = NORMAL');
|
|
30
|
+
db.pragma('foreign_keys = ON');
|
|
31
|
+
db.pragma('busy_timeout = 5000');
|
|
32
|
+
// Run migrations
|
|
33
|
+
runMigrations(db);
|
|
34
|
+
return db;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Ensure database is initialized
|
|
38
|
+
*/
|
|
39
|
+
export function ensureDb() {
|
|
40
|
+
_getDb();
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Get all repositories
|
|
44
|
+
*/
|
|
45
|
+
export function getRepositories() {
|
|
46
|
+
if (repositories)
|
|
47
|
+
return repositories;
|
|
48
|
+
const dbInstance = _getDb();
|
|
49
|
+
repositories = {
|
|
50
|
+
recordings: createSqliteRecordingRepository(dbInstance),
|
|
51
|
+
observations: createSqliteObservationRepository(dbInstance),
|
|
52
|
+
contexts: createSqliteContextRepository(dbInstance),
|
|
53
|
+
topicBlocks: createSqliteTopicBlockRepository(dbInstance),
|
|
54
|
+
artifacts: createSqliteArtifactRepository(dbInstance),
|
|
55
|
+
clusters: createSqliteClusterRepository(dbInstance),
|
|
56
|
+
subjects: createSqliteSubjectRepository(dbInstance),
|
|
57
|
+
stats: createStatsRepository(dbInstance),
|
|
58
|
+
};
|
|
59
|
+
return repositories;
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Create a fresh set of repositories for testing (using in-memory DB)
|
|
63
|
+
*/
|
|
64
|
+
export function createTestRepositories() {
|
|
65
|
+
const testDb = new Database(':memory:');
|
|
66
|
+
testDb.pragma('foreign_keys = ON');
|
|
67
|
+
runMigrations(testDb);
|
|
68
|
+
return {
|
|
69
|
+
recordings: createSqliteRecordingRepository(testDb),
|
|
70
|
+
observations: createSqliteObservationRepository(testDb),
|
|
71
|
+
contexts: createSqliteContextRepository(testDb),
|
|
72
|
+
topicBlocks: createSqliteTopicBlockRepository(testDb),
|
|
73
|
+
artifacts: createSqliteArtifactRepository(testDb),
|
|
74
|
+
clusters: createSqliteClusterRepository(testDb),
|
|
75
|
+
subjects: createSqliteSubjectRepository(testDb),
|
|
76
|
+
stats: createStatsRepository(testDb),
|
|
77
|
+
cleanup: () => testDb.close(),
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Close database connection
|
|
82
|
+
*/
|
|
83
|
+
export function closeDb() {
|
|
84
|
+
if (db) {
|
|
85
|
+
db.close();
|
|
86
|
+
db = null;
|
|
87
|
+
repositories = null;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Get database path (useful for tests)
|
|
92
|
+
*/
|
|
93
|
+
export function getDbPath() {
|
|
94
|
+
return DB_PATH;
|
|
95
|
+
}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Database Migration Runner
|
|
3
|
+
*
|
|
4
|
+
* Executes SQL migration files from /migrations directory.
|
|
5
|
+
* Tracks applied versions in _schema_version table.
|
|
6
|
+
*/
|
|
7
|
+
import { readdirSync, readFileSync } from 'node:fs';
|
|
8
|
+
import { dirname, join } from 'node:path';
|
|
9
|
+
import { fileURLToPath } from 'node:url';
|
|
10
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
11
|
+
const __dirname = dirname(__filename);
|
|
12
|
+
const MIGRATIONS_DIR = join(__dirname, '..', '..', 'migrations');
|
|
13
|
+
/**
|
|
14
|
+
* Get current schema version from database
|
|
15
|
+
*/
|
|
16
|
+
function getCurrentVersion(db) {
|
|
17
|
+
try {
|
|
18
|
+
const row = db
|
|
19
|
+
.prepare('SELECT MAX(version) as version FROM _schema_version')
|
|
20
|
+
.get();
|
|
21
|
+
const version = row?.version;
|
|
22
|
+
return typeof version === 'number' ? version : 0;
|
|
23
|
+
}
|
|
24
|
+
catch {
|
|
25
|
+
// Table doesn't exist yet
|
|
26
|
+
return 0;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Load all migration files from /migrations directory
|
|
31
|
+
*/
|
|
32
|
+
function loadMigrations() {
|
|
33
|
+
try {
|
|
34
|
+
const files = readdirSync(MIGRATIONS_DIR)
|
|
35
|
+
.filter((f) => f.endsWith('.sql'))
|
|
36
|
+
.sort();
|
|
37
|
+
return files.map((filename) => {
|
|
38
|
+
const match = filename.match(/^(\d+)_.+\.sql$/);
|
|
39
|
+
if (!match) {
|
|
40
|
+
throw new Error(`Invalid migration filename: ${filename}. Expected format: NNN_description.sql`);
|
|
41
|
+
}
|
|
42
|
+
const version = parseInt(match[1], 10);
|
|
43
|
+
const sql = readFileSync(join(MIGRATIONS_DIR, filename), 'utf-8');
|
|
44
|
+
return { version, filename, sql };
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
catch (error) {
|
|
48
|
+
console.error(`[db] Failed to load migrations from ${MIGRATIONS_DIR}:`, error);
|
|
49
|
+
return [];
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Run all pending migrations
|
|
54
|
+
*/
|
|
55
|
+
export function runMigrations(db) {
|
|
56
|
+
const currentVersion = getCurrentVersion(db);
|
|
57
|
+
const migrations = loadMigrations();
|
|
58
|
+
const pending = migrations.filter((m) => m.version > currentVersion);
|
|
59
|
+
if (pending.length === 0) {
|
|
60
|
+
console.log(`[db] Database is up to date (version ${currentVersion})`);
|
|
61
|
+
return { applied: [], currentVersion };
|
|
62
|
+
}
|
|
63
|
+
console.log(`[db] Found ${pending.length} pending migrations. Starting update...`);
|
|
64
|
+
const applied = [];
|
|
65
|
+
for (const migration of pending) {
|
|
66
|
+
console.log(`[db] Applying migration: ${migration.filename}`);
|
|
67
|
+
// Split migration into individual statements (simple split by ;)
|
|
68
|
+
// NOTE: This might fail if ; is inside a string, but for simple schemas it's fine.
|
|
69
|
+
// better-sqlite3 exec() can handle multiple statements.
|
|
70
|
+
db.exec(migration.sql);
|
|
71
|
+
// Update schema version
|
|
72
|
+
db.prepare('INSERT INTO _schema_version (version) VALUES (?)').run(migration.version);
|
|
73
|
+
applied.push(migration.filename);
|
|
74
|
+
}
|
|
75
|
+
const finalVersion = getCurrentVersion(db);
|
|
76
|
+
if (applied.length > 0) {
|
|
77
|
+
console.log(`[db] Migrations complete. Schema version: ${finalVersion}`);
|
|
78
|
+
}
|
|
79
|
+
return { applied, currentVersion: finalVersion };
|
|
80
|
+
}
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Artifact Repository - SQLite Implementation
|
|
3
|
+
*/
|
|
4
|
+
import { nowISO } from '../helpers.js';
|
|
5
|
+
export function createSqliteArtifactRepository(db) {
|
|
6
|
+
const stmts = {
|
|
7
|
+
findById: db.prepare('SELECT * FROM artifacts WHERE id = ?'),
|
|
8
|
+
findByType: db.prepare('SELECT * FROM artifacts WHERE type = ? ORDER BY created_at DESC'),
|
|
9
|
+
findByBlock: db.prepare(`
|
|
10
|
+
SELECT * FROM artifacts
|
|
11
|
+
WHERE source_block_ids LIKE ?
|
|
12
|
+
ORDER BY created_at DESC
|
|
13
|
+
`),
|
|
14
|
+
findByContext: db.prepare(`
|
|
15
|
+
SELECT * FROM artifacts
|
|
16
|
+
WHERE source_context_ids LIKE ?
|
|
17
|
+
ORDER BY created_at DESC
|
|
18
|
+
`),
|
|
19
|
+
findByRecording: db.prepare(`
|
|
20
|
+
SELECT * FROM artifacts
|
|
21
|
+
WHERE recording_id = ?
|
|
22
|
+
ORDER BY created_at DESC
|
|
23
|
+
`),
|
|
24
|
+
insert: db.prepare(`
|
|
25
|
+
INSERT INTO artifacts (
|
|
26
|
+
id, recording_id, type, content, format, source_block_ids, source_context_ids, created_at, updated_at
|
|
27
|
+
)
|
|
28
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
29
|
+
`),
|
|
30
|
+
update: db.prepare(`
|
|
31
|
+
UPDATE artifacts SET content = ?, updated_at = ? WHERE id = ?
|
|
32
|
+
`),
|
|
33
|
+
delete: db.prepare('DELETE FROM artifacts WHERE id = ?'),
|
|
34
|
+
deleteByRecording: db.prepare('DELETE FROM artifacts WHERE recording_id = ?'),
|
|
35
|
+
linkSubject: db.prepare('INSERT OR IGNORE INTO artifact_subjects (artifact_id, subject_id) VALUES (?, ?)'),
|
|
36
|
+
findSubjectsByArtifact: db.prepare('SELECT * FROM artifact_subjects WHERE artifact_id = ?'),
|
|
37
|
+
};
|
|
38
|
+
return {
|
|
39
|
+
findById(id) {
|
|
40
|
+
const row = stmts.findById.get(id);
|
|
41
|
+
return row ?? null;
|
|
42
|
+
},
|
|
43
|
+
findByType(type) {
|
|
44
|
+
return stmts.findByType.all(type);
|
|
45
|
+
},
|
|
46
|
+
findByBlock(blockId) {
|
|
47
|
+
return stmts.findByBlock.all(`%${blockId}%`);
|
|
48
|
+
},
|
|
49
|
+
findByContext(contextId) {
|
|
50
|
+
return stmts.findByContext.all(`%${contextId}%`);
|
|
51
|
+
},
|
|
52
|
+
findByRecording(recordingId) {
|
|
53
|
+
return stmts.findByRecording.all(recordingId);
|
|
54
|
+
},
|
|
55
|
+
save(artifact) {
|
|
56
|
+
const now = nowISO();
|
|
57
|
+
stmts.insert.run(artifact.id, artifact.recording_id ?? null, artifact.type, artifact.content, artifact.format, artifact.source_block_ids, artifact.source_context_ids, now, now);
|
|
58
|
+
},
|
|
59
|
+
update(id, content) {
|
|
60
|
+
stmts.update.run(content, nowISO(), id);
|
|
61
|
+
},
|
|
62
|
+
delete(id) {
|
|
63
|
+
stmts.delete.run(id);
|
|
64
|
+
},
|
|
65
|
+
deleteByRecording(recordingId) {
|
|
66
|
+
stmts.deleteByRecording.run(recordingId);
|
|
67
|
+
},
|
|
68
|
+
linkSubjects(artifactId, subjectIds) {
|
|
69
|
+
for (const subjectId of subjectIds) {
|
|
70
|
+
stmts.linkSubject.run(artifactId, subjectId);
|
|
71
|
+
}
|
|
72
|
+
},
|
|
73
|
+
findSubjectsByArtifact(artifactId) {
|
|
74
|
+
return stmts.findSubjectsByArtifact.all(artifactId);
|
|
75
|
+
},
|
|
76
|
+
};
|
|
77
|
+
}
|