escribano 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +297 -0
- package/dist/0_types.js +279 -0
- package/dist/actions/classify-session.js +77 -0
- package/dist/actions/create-contexts.js +44 -0
- package/dist/actions/create-topic-blocks.js +68 -0
- package/dist/actions/extract-metadata.js +24 -0
- package/dist/actions/generate-artifact-v3.js +296 -0
- package/dist/actions/generate-artifact.js +61 -0
- package/dist/actions/generate-summary-v3.js +260 -0
- package/dist/actions/outline-index.js +204 -0
- package/dist/actions/process-recording-v2.js +494 -0
- package/dist/actions/process-recording-v3.js +412 -0
- package/dist/actions/process-session.js +183 -0
- package/dist/actions/publish-summary-v3.js +303 -0
- package/dist/actions/sync-to-outline.js +196 -0
- package/dist/adapters/audio.silero.adapter.js +69 -0
- package/dist/adapters/cap.adapter.js +94 -0
- package/dist/adapters/capture.cap.adapter.js +107 -0
- package/dist/adapters/capture.filesystem.adapter.js +124 -0
- package/dist/adapters/embedding.ollama.adapter.js +141 -0
- package/dist/adapters/intelligence.adapter.js +202 -0
- package/dist/adapters/intelligence.mlx.adapter.js +395 -0
- package/dist/adapters/intelligence.ollama.adapter.js +741 -0
- package/dist/adapters/publishing.outline.adapter.js +75 -0
- package/dist/adapters/storage.adapter.js +81 -0
- package/dist/adapters/storage.fs.adapter.js +83 -0
- package/dist/adapters/transcription.whisper.adapter.js +206 -0
- package/dist/adapters/video.ffmpeg.adapter.js +405 -0
- package/dist/adapters/whisper.adapter.js +168 -0
- package/dist/batch-context.js +329 -0
- package/dist/db/helpers.js +50 -0
- package/dist/db/index.js +95 -0
- package/dist/db/migrate.js +80 -0
- package/dist/db/repositories/artifact.sqlite.js +77 -0
- package/dist/db/repositories/cluster.sqlite.js +92 -0
- package/dist/db/repositories/context.sqlite.js +75 -0
- package/dist/db/repositories/index.js +10 -0
- package/dist/db/repositories/observation.sqlite.js +70 -0
- package/dist/db/repositories/recording.sqlite.js +56 -0
- package/dist/db/repositories/subject.sqlite.js +64 -0
- package/dist/db/repositories/topic-block.sqlite.js +45 -0
- package/dist/db/types.js +4 -0
- package/dist/domain/classification.js +60 -0
- package/dist/domain/context.js +97 -0
- package/dist/domain/index.js +2 -0
- package/dist/domain/observation.js +17 -0
- package/dist/domain/recording.js +41 -0
- package/dist/domain/segment.js +93 -0
- package/dist/domain/session.js +93 -0
- package/dist/domain/time-range.js +38 -0
- package/dist/domain/transcript.js +79 -0
- package/dist/index.js +173 -0
- package/dist/pipeline/context.js +162 -0
- package/dist/pipeline/events.js +2 -0
- package/dist/prerequisites.js +226 -0
- package/dist/scripts/rebuild-index.js +53 -0
- package/dist/scripts/seed-fixtures.js +290 -0
- package/dist/services/activity-segmentation.js +333 -0
- package/dist/services/activity-segmentation.test.js +191 -0
- package/dist/services/app-normalization.js +212 -0
- package/dist/services/cluster-merge.js +69 -0
- package/dist/services/clustering.js +237 -0
- package/dist/services/debug.js +58 -0
- package/dist/services/frame-sampling.js +318 -0
- package/dist/services/signal-extraction.js +106 -0
- package/dist/services/subject-grouping.js +342 -0
- package/dist/services/temporal-alignment.js +99 -0
- package/dist/services/vlm-enrichment.js +84 -0
- package/dist/services/vlm-service.js +130 -0
- package/dist/stats/index.js +3 -0
- package/dist/stats/observer.js +65 -0
- package/dist/stats/repository.js +36 -0
- package/dist/stats/resource-tracker.js +86 -0
- package/dist/stats/types.js +1 -0
- package/dist/test-classification-prompts.js +181 -0
- package/dist/tests/cap.adapter.test.js +75 -0
- package/dist/tests/capture.cap.adapter.test.js +69 -0
- package/dist/tests/classify-session.test.js +140 -0
- package/dist/tests/db/repositories.test.js +243 -0
- package/dist/tests/domain/time-range.test.js +31 -0
- package/dist/tests/integration.test.js +84 -0
- package/dist/tests/intelligence.adapter.test.js +102 -0
- package/dist/tests/intelligence.ollama.adapter.test.js +178 -0
- package/dist/tests/process-v2.test.js +90 -0
- package/dist/tests/services/clustering.test.js +112 -0
- package/dist/tests/services/frame-sampling.test.js +152 -0
- package/dist/tests/utils/ocr.test.js +76 -0
- package/dist/tests/utils/parallel.test.js +57 -0
- package/dist/tests/visual-observer.test.js +175 -0
- package/dist/utils/id-normalization.js +15 -0
- package/dist/utils/index.js +9 -0
- package/dist/utils/model-detector.js +154 -0
- package/dist/utils/ocr.js +80 -0
- package/dist/utils/parallel.js +32 -0
- package/migrations/001_initial.sql +109 -0
- package/migrations/002_clusters.sql +41 -0
- package/migrations/003_observations_vlm_fields.sql +14 -0
- package/migrations/004_observations_unique.sql +18 -0
- package/migrations/005_processing_stats.sql +29 -0
- package/migrations/006_vlm_raw_response.sql +6 -0
- package/migrations/007_subjects.sql +23 -0
- package/migrations/008_artifacts_recording.sql +6 -0
- package/migrations/009_artifact_subjects.sql +10 -0
- package/package.json +82 -0
- package/prompts/action-items.md +55 -0
- package/prompts/blog-draft.md +54 -0
- package/prompts/blog-research.md +87 -0
- package/prompts/card.md +54 -0
- package/prompts/classify-segment.md +38 -0
- package/prompts/classify.md +37 -0
- package/prompts/code-snippets.md +163 -0
- package/prompts/extract-metadata.md +149 -0
- package/prompts/notes.md +83 -0
- package/prompts/runbook.md +123 -0
- package/prompts/standup.md +50 -0
- package/prompts/step-by-step.md +125 -0
- package/prompts/subject-grouping.md +31 -0
- package/prompts/summary-v3.md +89 -0
- package/prompts/summary.md +77 -0
- package/prompts/topic-classifier.md +24 -0
- package/prompts/topic-extract.md +13 -0
- package/prompts/vlm-batch.md +21 -0
- package/prompts/vlm-single.md +19 -0
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
export * from './ocr.js';
|
|
2
|
+
export * from './parallel.js';
|
|
3
|
+
/**
|
|
4
|
+
* Convert SQLite BLOB buffer to number array.
|
|
5
|
+
*/
|
|
6
|
+
export function bufferToEmbedding(buffer) {
|
|
7
|
+
const float32 = new Float32Array(buffer.buffer, buffer.byteOffset, buffer.length / 4);
|
|
8
|
+
return Array.from(float32);
|
|
9
|
+
}
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM Model Auto-Detection
|
|
3
|
+
*
|
|
4
|
+
* Detects the best available LLM model from installed Ollama models
|
|
5
|
+
* based on system RAM and model quality tiers.
|
|
6
|
+
*/
|
|
7
|
+
import { totalmem } from 'node:os';
|
|
8
|
+
export const LLM_MODEL_TIERS = [
|
|
9
|
+
{ model: 'qwen3.5:27b', tier: 4, minRamGB: 32, label: 'best' },
|
|
10
|
+
{ model: 'qwen3:14b', tier: 3, minRamGB: 20, label: 'very good' },
|
|
11
|
+
{ model: 'qwen3:8b', tier: 2, minRamGB: 10, label: 'good' },
|
|
12
|
+
{ model: 'qwen3:4b', tier: 1, minRamGB: 6, label: 'minimum' },
|
|
13
|
+
];
|
|
14
|
+
const OLLAMA_ENDPOINT = process.env.OLLAMA_HOST || 'http://localhost:11434';
|
|
15
|
+
/**
|
|
16
|
+
* Fetch installed models from Ollama
|
|
17
|
+
*/
|
|
18
|
+
export async function getInstalledOllamaModels() {
|
|
19
|
+
try {
|
|
20
|
+
const response = await fetch(`${OLLAMA_ENDPOINT}/api/tags`);
|
|
21
|
+
if (!response.ok) {
|
|
22
|
+
return [];
|
|
23
|
+
}
|
|
24
|
+
const data = (await response.json());
|
|
25
|
+
return data.models?.map((m) => m.name) ?? [];
|
|
26
|
+
}
|
|
27
|
+
catch {
|
|
28
|
+
return [];
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Check if a model matches (handles model:tag format)
|
|
33
|
+
*/
|
|
34
|
+
function modelMatches(installed, target) {
|
|
35
|
+
const installedBase = installed.split(':')[0];
|
|
36
|
+
const targetBase = target.split(':')[0];
|
|
37
|
+
return installedBase === targetBase;
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Find the best tier for a given model name
|
|
41
|
+
*/
|
|
42
|
+
function findTier(model) {
|
|
43
|
+
return LLM_MODEL_TIERS.find((t) => modelMatches(model, t.model));
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Get system RAM in GB
|
|
47
|
+
*/
|
|
48
|
+
export function getSystemRamGB() {
|
|
49
|
+
return Math.round(totalmem() / (1024 * 1024 * 1024));
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Select the best LLM model based on installed models and system RAM.
|
|
53
|
+
*
|
|
54
|
+
* If ESCRIBANO_LLM_MODEL is set, uses that but still validates and warns.
|
|
55
|
+
* Otherwise, auto-selects the best available model that fits in RAM.
|
|
56
|
+
*/
|
|
57
|
+
export async function selectBestLLMModel() {
|
|
58
|
+
const ramGB = getSystemRamGB();
|
|
59
|
+
const envModel = process.env.ESCRIBANO_LLM_MODEL;
|
|
60
|
+
const installed = await getInstalledOllamaModels();
|
|
61
|
+
// If env var is set, use it but validate
|
|
62
|
+
if (envModel) {
|
|
63
|
+
const tier = findTier(envModel);
|
|
64
|
+
const isInstalled = installed.some((m) => modelMatches(m, envModel));
|
|
65
|
+
if (!isInstalled) {
|
|
66
|
+
return {
|
|
67
|
+
model: envModel,
|
|
68
|
+
source: 'env',
|
|
69
|
+
tier: tier?.tier ?? 0,
|
|
70
|
+
label: tier?.label ?? 'unknown',
|
|
71
|
+
ramGB,
|
|
72
|
+
warning: `${envModel} is not installed. Run: ollama pull ${envModel}`,
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
if (tier && tier.minRamGB > ramGB) {
|
|
76
|
+
const recommended = LLM_MODEL_TIERS.find((t) => t.minRamGB <= ramGB);
|
|
77
|
+
return {
|
|
78
|
+
model: envModel,
|
|
79
|
+
source: 'env',
|
|
80
|
+
tier: tier.tier,
|
|
81
|
+
label: tier.label,
|
|
82
|
+
ramGB,
|
|
83
|
+
warning: `${envModel} may be too large for your ${ramGB}GB RAM.`,
|
|
84
|
+
recommendation: recommended
|
|
85
|
+
? `Consider ${recommended.model} for stability (ollama pull ${recommended.model})`
|
|
86
|
+
: undefined,
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
// Check if there's a better model available for this RAM
|
|
90
|
+
const betterTier = LLM_MODEL_TIERS.find((t) => t.tier > (tier?.tier ?? 0) &&
|
|
91
|
+
t.minRamGB <= ramGB &&
|
|
92
|
+
installed.some((m) => modelMatches(m, t.model)));
|
|
93
|
+
return {
|
|
94
|
+
model: envModel,
|
|
95
|
+
source: 'env',
|
|
96
|
+
tier: tier?.tier ?? 0,
|
|
97
|
+
label: tier?.label ?? 'unknown',
|
|
98
|
+
ramGB,
|
|
99
|
+
recommendation: betterTier
|
|
100
|
+
? `${betterTier.model} is available and would give better quality (ollama pull ${betterTier.model})`
|
|
101
|
+
: undefined,
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
// Auto-select: find best installed model that fits in RAM
|
|
105
|
+
for (const tier of LLM_MODEL_TIERS) {
|
|
106
|
+
if (tier.minRamGB > ramGB)
|
|
107
|
+
continue;
|
|
108
|
+
const installedModel = installed.find((m) => modelMatches(m, tier.model));
|
|
109
|
+
if (installedModel) {
|
|
110
|
+
// Check if there's a better model NOT installed
|
|
111
|
+
const betterTier = LLM_MODEL_TIERS.find((t) => t.tier > tier.tier && t.minRamGB <= ramGB);
|
|
112
|
+
return {
|
|
113
|
+
model: installedModel.split(':')[0] === tier.model.split(':')[0]
|
|
114
|
+
? installedModel
|
|
115
|
+
: tier.model,
|
|
116
|
+
source: 'auto',
|
|
117
|
+
tier: tier.tier,
|
|
118
|
+
label: tier.label,
|
|
119
|
+
ramGB,
|
|
120
|
+
recommendation: betterTier
|
|
121
|
+
? `For better quality, install ${betterTier.model} (ollama pull ${betterTier.model})`
|
|
122
|
+
: undefined,
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
// Nothing found - return lowest tier with install instruction
|
|
127
|
+
const lowest = LLM_MODEL_TIERS[LLM_MODEL_TIERS.length - 1];
|
|
128
|
+
return {
|
|
129
|
+
model: lowest.model,
|
|
130
|
+
source: 'auto',
|
|
131
|
+
tier: 0,
|
|
132
|
+
label: 'not found',
|
|
133
|
+
ramGB,
|
|
134
|
+
warning: `No supported LLM model found.`,
|
|
135
|
+
recommendation: `Install at least ${lowest.model}: ollama pull ${lowest.model}`,
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
/**
|
|
139
|
+
* Format model selection for console output
|
|
140
|
+
*/
|
|
141
|
+
export function formatModelSelection(selection) {
|
|
142
|
+
const lines = [];
|
|
143
|
+
const sourceLabel = selection.source === 'env'
|
|
144
|
+
? '(from ESCRIBANO_LLM_MODEL)'
|
|
145
|
+
: '(auto-detected)';
|
|
146
|
+
lines.push(`Using ${selection.model} ${sourceLabel}`);
|
|
147
|
+
if (selection.warning) {
|
|
148
|
+
lines.push(` ⚠ ${selection.warning}`);
|
|
149
|
+
}
|
|
150
|
+
if (selection.recommendation) {
|
|
151
|
+
lines.push(` ℹ ${selection.recommendation}`);
|
|
152
|
+
}
|
|
153
|
+
return lines.join('\n');
|
|
154
|
+
}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OCR Text Cleanup Utilities
|
|
3
|
+
*
|
|
4
|
+
* Tesseract OCR picks up menu bar icons, status indicators, and other
|
|
5
|
+
* non-textual elements. This module filters that garbage.
|
|
6
|
+
*
|
|
7
|
+
* TODO: Tune patterns based on real data analysis after Phase 3C
|
|
8
|
+
*/
|
|
9
|
+
// Patterns that indicate garbage (icons, symbols, status bar)
|
|
10
|
+
const GARBAGE_PATTERNS = [
|
|
11
|
+
/^[^a-zA-Z0-9]{1,10}$/, // Only symbols (@ & © ® € etc)
|
|
12
|
+
/^\d+%$/, // Percentages alone (43%)
|
|
13
|
+
/^[a-z]$/i, // Single letters
|
|
14
|
+
/^[\s\n]+$/, // Whitespace only
|
|
15
|
+
/^(.)\1{2,}$/, // Entire line is 3+ repeated chars (eee, ---)
|
|
16
|
+
/(.)\1{4,}/, // Line contains 5+ repeated chars (GPassssss, =====)
|
|
17
|
+
/^© \d{2}\/\d{2}/, // System Clock: © 15/01 Thu...
|
|
18
|
+
/^> [a-zA-Z ]+$/, // Window Title: > Google Chrome
|
|
19
|
+
/^[|>]\s+/, // Window artifacts: | BSI, > Google
|
|
20
|
+
];
|
|
21
|
+
const UI_STOPWORDS = new Set([
|
|
22
|
+
'All Bookmarks',
|
|
23
|
+
'Zoho Mail',
|
|
24
|
+
'Detected Language',
|
|
25
|
+
'English',
|
|
26
|
+
'New Chat',
|
|
27
|
+
'cid', // Common OCR artifact
|
|
28
|
+
'sh Twelv', // Truncated "English Twelve" or similar
|
|
29
|
+
]);
|
|
30
|
+
const MIN_LINE_LENGTH = 3;
|
|
31
|
+
const MIN_WORD_LENGTH = 3; // Require at least 3-letter sequences
|
|
32
|
+
/**
|
|
33
|
+
* Clean OCR text by removing garbage lines
|
|
34
|
+
* @param raw - Raw OCR text from Tesseract
|
|
35
|
+
* @returns Cleaned text suitable for embedding
|
|
36
|
+
*/
|
|
37
|
+
export function cleanOcrText(raw) {
|
|
38
|
+
if (!raw || raw.trim().length === 0)
|
|
39
|
+
return '';
|
|
40
|
+
const cleaned = [];
|
|
41
|
+
let lastLine = '';
|
|
42
|
+
for (const line of raw.split('\n')) {
|
|
43
|
+
const trimmed = line.trim();
|
|
44
|
+
// Skip short lines
|
|
45
|
+
if (trimmed.length < MIN_LINE_LENGTH)
|
|
46
|
+
continue;
|
|
47
|
+
// Skip duplicate consecutive lines (common in static UI)
|
|
48
|
+
if (trimmed === lastLine)
|
|
49
|
+
continue;
|
|
50
|
+
// Skip exact UI stopwords
|
|
51
|
+
if (UI_STOPWORDS.has(trimmed))
|
|
52
|
+
continue;
|
|
53
|
+
// Skip garbage patterns
|
|
54
|
+
if (GARBAGE_PATTERNS.some((p) => p.test(trimmed)))
|
|
55
|
+
continue;
|
|
56
|
+
// Keep if has at least one word-like sequence (3+ letters)
|
|
57
|
+
// We use a RegExp to find sequences of letters
|
|
58
|
+
const hasWord = new RegExp(`[a-zA-Z]{${MIN_WORD_LENGTH},}`).test(trimmed);
|
|
59
|
+
if (hasWord) {
|
|
60
|
+
cleaned.push(trimmed);
|
|
61
|
+
lastLine = trimmed;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
return cleaned.join('\n');
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Check if cleaned OCR text is meaningful enough for embedding
|
|
68
|
+
* @param cleanedText - Already cleaned OCR text
|
|
69
|
+
* @returns true if text has substantial content
|
|
70
|
+
*/
|
|
71
|
+
export function isOcrMeaningful(cleanedText) {
|
|
72
|
+
if (!cleanedText)
|
|
73
|
+
return false;
|
|
74
|
+
// At least 20 chars of cleaned text
|
|
75
|
+
if (cleanedText.length < 20)
|
|
76
|
+
return false;
|
|
77
|
+
// At least 3 lines of content
|
|
78
|
+
const lines = cleanedText.split('\n').filter((l) => l.trim().length > 0);
|
|
79
|
+
return lines.length >= 3;
|
|
80
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parallel Map Utility
|
|
3
|
+
*
|
|
4
|
+
* Executes async operations with bounded concurrency.
|
|
5
|
+
* Results are returned in the same order as input items.
|
|
6
|
+
*/
|
|
7
|
+
export async function parallelMap(items, fn, concurrency) {
|
|
8
|
+
if (items.length === 0)
|
|
9
|
+
return [];
|
|
10
|
+
const results = new Array(items.length);
|
|
11
|
+
let nextIndex = 0;
|
|
12
|
+
const actualConcurrency = Math.max(1, concurrency);
|
|
13
|
+
async function worker() {
|
|
14
|
+
while (nextIndex < items.length) {
|
|
15
|
+
const currentIndex = nextIndex++;
|
|
16
|
+
results[currentIndex] = await fn(items[currentIndex], currentIndex);
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
const workers = Array.from({ length: Math.min(actualConcurrency, items.length) }, worker);
|
|
20
|
+
await Promise.all(workers);
|
|
21
|
+
return results;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Chunk an array into smaller arrays of specified size
|
|
25
|
+
*/
|
|
26
|
+
export function chunkArray(items, size) {
|
|
27
|
+
const chunks = [];
|
|
28
|
+
for (let i = 0; i < items.length; i += size) {
|
|
29
|
+
chunks.push(items.slice(i, i + size));
|
|
30
|
+
}
|
|
31
|
+
return chunks;
|
|
32
|
+
}
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
-- ============================================================================
|
|
2
|
+
-- RECORDINGS
|
|
3
|
+
-- ============================================================================
|
|
4
|
+
CREATE TABLE recordings (
|
|
5
|
+
id TEXT PRIMARY KEY,
|
|
6
|
+
video_path TEXT,
|
|
7
|
+
audio_mic_path TEXT,
|
|
8
|
+
audio_system_path TEXT,
|
|
9
|
+
duration REAL NOT NULL,
|
|
10
|
+
captured_at TEXT NOT NULL, -- ISO8601
|
|
11
|
+
status TEXT NOT NULL DEFAULT 'raw', -- raw, processing, processed, error
|
|
12
|
+
processing_step TEXT, -- extraction, clustering, context_derivation, block_formation, complete
|
|
13
|
+
source_type TEXT NOT NULL, -- cap, meetily, raw
|
|
14
|
+
source_metadata TEXT, -- JSON
|
|
15
|
+
error_message TEXT,
|
|
16
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
17
|
+
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
18
|
+
);
|
|
19
|
+
|
|
20
|
+
CREATE INDEX idx_recordings_status ON recordings(status);
|
|
21
|
+
CREATE INDEX idx_recordings_captured_at ON recordings(captured_at);
|
|
22
|
+
|
|
23
|
+
-- ============================================================================
|
|
24
|
+
-- OBSERVATIONS
|
|
25
|
+
-- ============================================================================
|
|
26
|
+
CREATE TABLE observations (
|
|
27
|
+
id TEXT PRIMARY KEY, -- UUIDv7
|
|
28
|
+
recording_id TEXT NOT NULL REFERENCES recordings(id) ON DELETE CASCADE,
|
|
29
|
+
type TEXT NOT NULL, -- visual, audio
|
|
30
|
+
timestamp REAL NOT NULL, -- seconds from start
|
|
31
|
+
end_timestamp REAL, -- for audio segments
|
|
32
|
+
-- Visual fields
|
|
33
|
+
image_path TEXT,
|
|
34
|
+
ocr_text TEXT,
|
|
35
|
+
vlm_description TEXT,
|
|
36
|
+
-- Audio fields
|
|
37
|
+
text TEXT,
|
|
38
|
+
audio_source TEXT, -- mic, system
|
|
39
|
+
audio_type TEXT, -- speech, music, silence
|
|
40
|
+
-- Embedding
|
|
41
|
+
embedding BLOB,
|
|
42
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
43
|
+
);
|
|
44
|
+
|
|
45
|
+
CREATE INDEX idx_obs_recording_type ON observations(recording_id, type);
|
|
46
|
+
CREATE INDEX idx_obs_recording_time ON observations(recording_id, timestamp);
|
|
47
|
+
|
|
48
|
+
-- ============================================================================
|
|
49
|
+
-- CONTEXTS
|
|
50
|
+
-- ============================================================================
|
|
51
|
+
CREATE TABLE contexts (
|
|
52
|
+
id TEXT PRIMARY KEY, -- UUIDv7
|
|
53
|
+
type TEXT NOT NULL, -- project, app, url, topic, etc.
|
|
54
|
+
name TEXT NOT NULL,
|
|
55
|
+
metadata TEXT, -- JSON
|
|
56
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
57
|
+
);
|
|
58
|
+
|
|
59
|
+
CREATE UNIQUE INDEX idx_context_type_name ON contexts(type, name);
|
|
60
|
+
|
|
61
|
+
-- ============================================================================
|
|
62
|
+
-- OBSERVATION_CONTEXTS (Join Table)
|
|
63
|
+
-- ============================================================================
|
|
64
|
+
CREATE TABLE observation_contexts (
|
|
65
|
+
observation_id TEXT NOT NULL REFERENCES observations(id) ON DELETE CASCADE,
|
|
66
|
+
context_id TEXT NOT NULL REFERENCES contexts(id) ON DELETE CASCADE,
|
|
67
|
+
confidence REAL DEFAULT 1.0,
|
|
68
|
+
PRIMARY KEY (observation_id, context_id)
|
|
69
|
+
);
|
|
70
|
+
|
|
71
|
+
CREATE INDEX idx_obs_ctx_context ON observation_contexts(context_id);
|
|
72
|
+
|
|
73
|
+
-- ============================================================================
|
|
74
|
+
-- TOPIC_BLOCKS
|
|
75
|
+
-- ============================================================================
|
|
76
|
+
CREATE TABLE topic_blocks (
|
|
77
|
+
id TEXT PRIMARY KEY, -- UUIDv7
|
|
78
|
+
recording_id TEXT NOT NULL REFERENCES recordings(id) ON DELETE CASCADE,
|
|
79
|
+
context_ids TEXT NOT NULL, -- JSON array of context IDs
|
|
80
|
+
classification TEXT, -- JSON: { meeting: 85, debugging: 10, ... }
|
|
81
|
+
duration REAL, -- total duration in seconds
|
|
82
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
83
|
+
);
|
|
84
|
+
|
|
85
|
+
CREATE INDEX idx_topic_blocks_recording ON topic_blocks(recording_id);
|
|
86
|
+
|
|
87
|
+
-- ============================================================================
|
|
88
|
+
-- ARTIFACTS
|
|
89
|
+
-- ============================================================================
|
|
90
|
+
CREATE TABLE artifacts (
|
|
91
|
+
id TEXT PRIMARY KEY, -- UUIDv7
|
|
92
|
+
type TEXT NOT NULL, -- summary, action-items, runbook, etc.
|
|
93
|
+
content TEXT NOT NULL,
|
|
94
|
+
format TEXT NOT NULL DEFAULT 'markdown',
|
|
95
|
+
source_block_ids TEXT, -- JSON array (single recording)
|
|
96
|
+
source_context_ids TEXT, -- JSON array (cross-recording)
|
|
97
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
98
|
+
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
99
|
+
);
|
|
100
|
+
|
|
101
|
+
CREATE INDEX idx_artifacts_type ON artifacts(type);
|
|
102
|
+
|
|
103
|
+
-- ============================================================================
|
|
104
|
+
-- SCHEMA VERSION (for migrations)
|
|
105
|
+
-- ============================================================================
|
|
106
|
+
CREATE TABLE _schema_version (
|
|
107
|
+
version INTEGER PRIMARY KEY,
|
|
108
|
+
applied_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
109
|
+
);
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
-- ============================================================================
|
|
2
|
+
-- CLUSTERS
|
|
3
|
+
-- ============================================================================
|
|
4
|
+
CREATE TABLE clusters (
|
|
5
|
+
id TEXT PRIMARY KEY, -- UUIDv7
|
|
6
|
+
recording_id TEXT NOT NULL REFERENCES recordings(id) ON DELETE CASCADE,
|
|
7
|
+
type TEXT NOT NULL, -- 'visual' | 'audio'
|
|
8
|
+
start_timestamp REAL NOT NULL,
|
|
9
|
+
end_timestamp REAL NOT NULL,
|
|
10
|
+
observation_count INTEGER NOT NULL,
|
|
11
|
+
centroid BLOB, -- Average embedding (for similarity)
|
|
12
|
+
classification TEXT, -- JSON: {"topics": [...], "apps": [...], "projects": [...], "urls": [...]}
|
|
13
|
+
metadata TEXT, -- JSON: extra debug info
|
|
14
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
15
|
+
);
|
|
16
|
+
|
|
17
|
+
CREATE INDEX idx_clusters_recording ON clusters(recording_id);
|
|
18
|
+
CREATE INDEX idx_clusters_type ON clusters(recording_id, type);
|
|
19
|
+
|
|
20
|
+
-- ============================================================================
|
|
21
|
+
-- OBSERVATION_CLUSTERS (Join Table)
|
|
22
|
+
-- ============================================================================
|
|
23
|
+
CREATE TABLE observation_clusters (
|
|
24
|
+
observation_id TEXT NOT NULL REFERENCES observations(id) ON DELETE CASCADE,
|
|
25
|
+
cluster_id TEXT NOT NULL REFERENCES clusters(id) ON DELETE CASCADE,
|
|
26
|
+
distance REAL, -- Distance from centroid (0 = perfect match)
|
|
27
|
+
PRIMARY KEY (observation_id, cluster_id)
|
|
28
|
+
);
|
|
29
|
+
|
|
30
|
+
CREATE INDEX idx_obs_cluster_cluster ON observation_clusters(cluster_id);
|
|
31
|
+
|
|
32
|
+
-- ============================================================================
|
|
33
|
+
-- CLUSTER_MERGES (Track audio-visual merges)
|
|
34
|
+
-- ============================================================================
|
|
35
|
+
CREATE TABLE cluster_merges (
|
|
36
|
+
visual_cluster_id TEXT NOT NULL REFERENCES clusters(id) ON DELETE CASCADE,
|
|
37
|
+
audio_cluster_id TEXT NOT NULL REFERENCES clusters(id) ON DELETE CASCADE,
|
|
38
|
+
similarity_score REAL NOT NULL, -- Classification similarity (0-1)
|
|
39
|
+
merge_reason TEXT, -- 'shared_topic' | 'shared_app' | 'centroid_similarity'
|
|
40
|
+
PRIMARY KEY (visual_cluster_id, audio_cluster_id)
|
|
41
|
+
);
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
-- ============================================================================
|
|
2
|
+
-- Migration: 003_observations_vlm_fields
|
|
3
|
+
-- Description: Add VLM-specific fields for activity_type, apps, and topics
|
|
4
|
+
-- Per ADR-005 VLM-First Visual Pipeline
|
|
5
|
+
-- ============================================================================
|
|
6
|
+
|
|
7
|
+
-- Add activity_type column (per ADR-005 activity types)
|
|
8
|
+
ALTER TABLE observations ADD COLUMN activity_type TEXT;
|
|
9
|
+
|
|
10
|
+
-- Add apps column (JSON array of application names)
|
|
11
|
+
ALTER TABLE observations ADD COLUMN apps TEXT;
|
|
12
|
+
|
|
13
|
+
-- Add topics column (JSON array of topic/project names)
|
|
14
|
+
ALTER TABLE observations ADD COLUMN topics TEXT;
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
-- ============================================================================
|
|
2
|
+
-- Migration: 004_observations_unique
|
|
3
|
+
-- Description: Add unique index on observations to prevent duplicate entries
|
|
4
|
+
-- Cleans up existing duplicates before adding index
|
|
5
|
+
-- ============================================================================
|
|
6
|
+
|
|
7
|
+
-- Step 1: Delete duplicate observations (keep oldest by id per unique combo)
|
|
8
|
+
DELETE FROM observations
|
|
9
|
+
WHERE id NOT IN (
|
|
10
|
+
SELECT MIN(id)
|
|
11
|
+
FROM observations
|
|
12
|
+
GROUP BY recording_id, type, timestamp, audio_source
|
|
13
|
+
);
|
|
14
|
+
|
|
15
|
+
-- Step 2: Add unique index for audio observations only
|
|
16
|
+
-- (visual observations can have different data at same timestamp)
|
|
17
|
+
CREATE UNIQUE INDEX idx_obs_audio_unique ON observations(recording_id, type, timestamp, audio_source)
|
|
18
|
+
WHERE type = 'audio';
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
CREATE TABLE processing_runs (
|
|
2
|
+
id TEXT PRIMARY KEY,
|
|
3
|
+
recording_id TEXT NOT NULL REFERENCES recordings(id),
|
|
4
|
+
run_type TEXT NOT NULL,
|
|
5
|
+
status TEXT NOT NULL DEFAULT 'running',
|
|
6
|
+
started_at TEXT NOT NULL,
|
|
7
|
+
completed_at TEXT,
|
|
8
|
+
total_duration_ms INTEGER,
|
|
9
|
+
error_message TEXT,
|
|
10
|
+
metadata TEXT
|
|
11
|
+
);
|
|
12
|
+
|
|
13
|
+
CREATE TABLE processing_stats (
|
|
14
|
+
id TEXT PRIMARY KEY,
|
|
15
|
+
run_id TEXT NOT NULL REFERENCES processing_runs(id) ON DELETE CASCADE,
|
|
16
|
+
phase TEXT NOT NULL,
|
|
17
|
+
status TEXT NOT NULL DEFAULT 'running',
|
|
18
|
+
started_at TEXT NOT NULL,
|
|
19
|
+
completed_at TEXT,
|
|
20
|
+
duration_ms INTEGER,
|
|
21
|
+
items_total INTEGER,
|
|
22
|
+
items_processed INTEGER,
|
|
23
|
+
metadata TEXT
|
|
24
|
+
);
|
|
25
|
+
|
|
26
|
+
CREATE INDEX idx_processing_runs_recording ON processing_runs(recording_id);
|
|
27
|
+
CREATE INDEX idx_processing_runs_status ON processing_runs(status);
|
|
28
|
+
CREATE INDEX idx_processing_stats_run ON processing_stats(run_id);
|
|
29
|
+
CREATE INDEX idx_processing_stats_phase ON processing_stats(phase);
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
-- ============================================================================
|
|
2
|
+
-- Migration: 006_vlm_raw_response
|
|
3
|
+
-- Description: Store raw VLM response when parsing fails for debugging
|
|
4
|
+
-- ============================================================================
|
|
5
|
+
|
|
6
|
+
ALTER TABLE observations ADD COLUMN vlm_raw_response TEXT;
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
-- Migration 007: Add subjects and subject_topic_blocks tables
|
|
2
|
+
-- Subjects are per-recording groupings of TopicBlocks into coherent work threads
|
|
3
|
+
|
|
4
|
+
CREATE TABLE IF NOT EXISTS subjects (
|
|
5
|
+
id TEXT PRIMARY KEY,
|
|
6
|
+
recording_id TEXT NOT NULL REFERENCES recordings(id) ON DELETE CASCADE,
|
|
7
|
+
label TEXT NOT NULL,
|
|
8
|
+
is_personal INTEGER DEFAULT 0,
|
|
9
|
+
duration REAL DEFAULT 0,
|
|
10
|
+
activity_breakdown TEXT,
|
|
11
|
+
metadata TEXT,
|
|
12
|
+
created_at TEXT DEFAULT (datetime('now'))
|
|
13
|
+
);
|
|
14
|
+
|
|
15
|
+
CREATE TABLE IF NOT EXISTS subject_topic_blocks (
|
|
16
|
+
subject_id TEXT NOT NULL REFERENCES subjects(id) ON DELETE CASCADE,
|
|
17
|
+
topic_block_id TEXT NOT NULL REFERENCES topic_blocks(id) ON DELETE CASCADE,
|
|
18
|
+
PRIMARY KEY (subject_id, topic_block_id)
|
|
19
|
+
);
|
|
20
|
+
|
|
21
|
+
CREATE INDEX IF NOT EXISTS idx_subjects_recording ON subjects(recording_id);
|
|
22
|
+
CREATE INDEX IF NOT EXISTS idx_subject_topic_blocks_subject ON subject_topic_blocks(subject_id);
|
|
23
|
+
CREATE INDEX IF NOT EXISTS idx_subject_topic_blocks_block ON subject_topic_blocks(topic_block_id);
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
-- Migration 008: Add recording_id to artifacts table
|
|
2
|
+
-- Enables 1:N relationship between recordings and their artifacts
|
|
3
|
+
|
|
4
|
+
ALTER TABLE artifacts ADD COLUMN recording_id TEXT REFERENCES recordings(id) ON DELETE CASCADE;
|
|
5
|
+
|
|
6
|
+
CREATE INDEX idx_artifacts_recording ON artifacts(recording_id);
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
-- Migration 009: Link artifacts to subjects
|
|
2
|
+
-- Allows queries like "which subjects made this artifact" for dashboard and analysis
|
|
3
|
+
|
|
4
|
+
CREATE TABLE artifact_subjects (
|
|
5
|
+
artifact_id TEXT NOT NULL REFERENCES artifacts(id) ON DELETE CASCADE,
|
|
6
|
+
subject_id TEXT NOT NULL REFERENCES subjects(id) ON DELETE CASCADE,
|
|
7
|
+
PRIMARY KEY (artifact_id, subject_id)
|
|
8
|
+
);
|
|
9
|
+
|
|
10
|
+
CREATE INDEX idx_artifact_subjects_subject ON artifact_subjects(subject_id);
|
package/package.json
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "escribano",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "AI-powered session intelligence tool — turn screen recordings into structured work summaries",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"type": "module",
|
|
7
|
+
"bin": {
|
|
8
|
+
"escribano": "./dist/index.js"
|
|
9
|
+
},
|
|
10
|
+
"files": [
|
|
11
|
+
"dist",
|
|
12
|
+
"migrations",
|
|
13
|
+
"prompts"
|
|
14
|
+
],
|
|
15
|
+
"scripts": {
|
|
16
|
+
"test": "vitest run",
|
|
17
|
+
"test:ui": "vitest --ui",
|
|
18
|
+
"typecheck": "tsc --noEmit",
|
|
19
|
+
"build": "tsc",
|
|
20
|
+
"postbuild": "node scripts/add-shebang.mjs",
|
|
21
|
+
"prepublishOnly": "pnpm build",
|
|
22
|
+
"lint": "biome check .",
|
|
23
|
+
"lint:fix": "biome check --write .",
|
|
24
|
+
"format": "biome format --write .",
|
|
25
|
+
"check": "biome ci .",
|
|
26
|
+
"escribano": "tsx --env-file=.env src/index.ts",
|
|
27
|
+
"quality-test": "tsx --env-file=.env scripts/quality-test.ts",
|
|
28
|
+
"quality-test:fast": "tsx --env-file=.env scripts/quality-test.ts --skip-summary",
|
|
29
|
+
"dashboard": "node tools/dashboard/server.js",
|
|
30
|
+
"db:reset": "rm -f ~/.escribano/escribano.db*",
|
|
31
|
+
"ollama": "OLLAMA_NUM_PARALLEL=4 OLLAMA_MAX_LOADED_MODELS=3 OLLAMA_FLASH_ATTENTION=1 OLLAMA_KEEP_ALIVE=-1 OLLAMA_CONTEXT_LENGTH=262144 ollama serve",
|
|
32
|
+
"ollama-2": "OLLAMA_NUM_PARALLEL=1 OLLAMA_HOST=127.0.0.1:11435 OLLAMA_MAX_LOADED_MODELS=1 OLLAMA_FLASH_ATTENTION=1 OLLAMA_KEEP_ALIVE=-1 OLLAMA_CONTEXT_LENGTH=262144 ollama serve",
|
|
33
|
+
"index:rebuild": "tsx --env-file=.env src/scripts/rebuild-index.ts"
|
|
34
|
+
},
|
|
35
|
+
"keywords": [
|
|
36
|
+
"ai",
|
|
37
|
+
"vlm",
|
|
38
|
+
"screen-recording",
|
|
39
|
+
"developer-tools",
|
|
40
|
+
"session-intelligence",
|
|
41
|
+
"productivity",
|
|
42
|
+
"macos",
|
|
43
|
+
"local-ai",
|
|
44
|
+
"whisper",
|
|
45
|
+
"ollama",
|
|
46
|
+
"mlx"
|
|
47
|
+
],
|
|
48
|
+
"author": "Eduardo Sanchez",
|
|
49
|
+
"license": "MIT",
|
|
50
|
+
"repository": {
|
|
51
|
+
"type": "git",
|
|
52
|
+
"url": "https://github.com/eduardosanzb/escribano"
|
|
53
|
+
},
|
|
54
|
+
"homepage": "https://escribano.work",
|
|
55
|
+
"bugs": "https://github.com/eduardosanzb/escribano/issues",
|
|
56
|
+
"packageManager": "pnpm@10.8.0",
|
|
57
|
+
"engines": {
|
|
58
|
+
"node": ">=20"
|
|
59
|
+
},
|
|
60
|
+
"publishConfig": {
|
|
61
|
+
"access": "public"
|
|
62
|
+
},
|
|
63
|
+
"dependencies": {
|
|
64
|
+
"better-sqlite3": "^12.6.2",
|
|
65
|
+
"express": "^5.0.0",
|
|
66
|
+
"pidusage": "^4.0.1",
|
|
67
|
+
"undici": "^7.22.0",
|
|
68
|
+
"uuidv7": "1.1.0",
|
|
69
|
+
"zod": "^4.3.5"
|
|
70
|
+
},
|
|
71
|
+
"devDependencies": {
|
|
72
|
+
"@biomejs/biome": "^2.3.11",
|
|
73
|
+
"@types/better-sqlite3": "^7.6.13",
|
|
74
|
+
"@types/node": "^25.0.3",
|
|
75
|
+
"@types/pidusage": "^2.0.5",
|
|
76
|
+
"@vitest/coverage-v8": "^4.0.17",
|
|
77
|
+
"@vitest/ui": "^4.0.17",
|
|
78
|
+
"tsx": "^4.21.0",
|
|
79
|
+
"typescript": "^5.9.3",
|
|
80
|
+
"vitest": "^4.0.17"
|
|
81
|
+
}
|
|
82
|
+
}
|