escribano 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +297 -0
- package/dist/0_types.js +279 -0
- package/dist/actions/classify-session.js +77 -0
- package/dist/actions/create-contexts.js +44 -0
- package/dist/actions/create-topic-blocks.js +68 -0
- package/dist/actions/extract-metadata.js +24 -0
- package/dist/actions/generate-artifact-v3.js +296 -0
- package/dist/actions/generate-artifact.js +61 -0
- package/dist/actions/generate-summary-v3.js +260 -0
- package/dist/actions/outline-index.js +204 -0
- package/dist/actions/process-recording-v2.js +494 -0
- package/dist/actions/process-recording-v3.js +412 -0
- package/dist/actions/process-session.js +183 -0
- package/dist/actions/publish-summary-v3.js +303 -0
- package/dist/actions/sync-to-outline.js +196 -0
- package/dist/adapters/audio.silero.adapter.js +69 -0
- package/dist/adapters/cap.adapter.js +94 -0
- package/dist/adapters/capture.cap.adapter.js +107 -0
- package/dist/adapters/capture.filesystem.adapter.js +124 -0
- package/dist/adapters/embedding.ollama.adapter.js +141 -0
- package/dist/adapters/intelligence.adapter.js +202 -0
- package/dist/adapters/intelligence.mlx.adapter.js +395 -0
- package/dist/adapters/intelligence.ollama.adapter.js +741 -0
- package/dist/adapters/publishing.outline.adapter.js +75 -0
- package/dist/adapters/storage.adapter.js +81 -0
- package/dist/adapters/storage.fs.adapter.js +83 -0
- package/dist/adapters/transcription.whisper.adapter.js +206 -0
- package/dist/adapters/video.ffmpeg.adapter.js +405 -0
- package/dist/adapters/whisper.adapter.js +168 -0
- package/dist/batch-context.js +329 -0
- package/dist/db/helpers.js +50 -0
- package/dist/db/index.js +95 -0
- package/dist/db/migrate.js +80 -0
- package/dist/db/repositories/artifact.sqlite.js +77 -0
- package/dist/db/repositories/cluster.sqlite.js +92 -0
- package/dist/db/repositories/context.sqlite.js +75 -0
- package/dist/db/repositories/index.js +10 -0
- package/dist/db/repositories/observation.sqlite.js +70 -0
- package/dist/db/repositories/recording.sqlite.js +56 -0
- package/dist/db/repositories/subject.sqlite.js +64 -0
- package/dist/db/repositories/topic-block.sqlite.js +45 -0
- package/dist/db/types.js +4 -0
- package/dist/domain/classification.js +60 -0
- package/dist/domain/context.js +97 -0
- package/dist/domain/index.js +2 -0
- package/dist/domain/observation.js +17 -0
- package/dist/domain/recording.js +41 -0
- package/dist/domain/segment.js +93 -0
- package/dist/domain/session.js +93 -0
- package/dist/domain/time-range.js +38 -0
- package/dist/domain/transcript.js +79 -0
- package/dist/index.js +173 -0
- package/dist/pipeline/context.js +162 -0
- package/dist/pipeline/events.js +2 -0
- package/dist/prerequisites.js +226 -0
- package/dist/scripts/rebuild-index.js +53 -0
- package/dist/scripts/seed-fixtures.js +290 -0
- package/dist/services/activity-segmentation.js +333 -0
- package/dist/services/activity-segmentation.test.js +191 -0
- package/dist/services/app-normalization.js +212 -0
- package/dist/services/cluster-merge.js +69 -0
- package/dist/services/clustering.js +237 -0
- package/dist/services/debug.js +58 -0
- package/dist/services/frame-sampling.js +318 -0
- package/dist/services/signal-extraction.js +106 -0
- package/dist/services/subject-grouping.js +342 -0
- package/dist/services/temporal-alignment.js +99 -0
- package/dist/services/vlm-enrichment.js +84 -0
- package/dist/services/vlm-service.js +130 -0
- package/dist/stats/index.js +3 -0
- package/dist/stats/observer.js +65 -0
- package/dist/stats/repository.js +36 -0
- package/dist/stats/resource-tracker.js +86 -0
- package/dist/stats/types.js +1 -0
- package/dist/test-classification-prompts.js +181 -0
- package/dist/tests/cap.adapter.test.js +75 -0
- package/dist/tests/capture.cap.adapter.test.js +69 -0
- package/dist/tests/classify-session.test.js +140 -0
- package/dist/tests/db/repositories.test.js +243 -0
- package/dist/tests/domain/time-range.test.js +31 -0
- package/dist/tests/integration.test.js +84 -0
- package/dist/tests/intelligence.adapter.test.js +102 -0
- package/dist/tests/intelligence.ollama.adapter.test.js +178 -0
- package/dist/tests/process-v2.test.js +90 -0
- package/dist/tests/services/clustering.test.js +112 -0
- package/dist/tests/services/frame-sampling.test.js +152 -0
- package/dist/tests/utils/ocr.test.js +76 -0
- package/dist/tests/utils/parallel.test.js +57 -0
- package/dist/tests/visual-observer.test.js +175 -0
- package/dist/utils/id-normalization.js +15 -0
- package/dist/utils/index.js +9 -0
- package/dist/utils/model-detector.js +154 -0
- package/dist/utils/ocr.js +80 -0
- package/dist/utils/parallel.js +32 -0
- package/migrations/001_initial.sql +109 -0
- package/migrations/002_clusters.sql +41 -0
- package/migrations/003_observations_vlm_fields.sql +14 -0
- package/migrations/004_observations_unique.sql +18 -0
- package/migrations/005_processing_stats.sql +29 -0
- package/migrations/006_vlm_raw_response.sql +6 -0
- package/migrations/007_subjects.sql +23 -0
- package/migrations/008_artifacts_recording.sql +6 -0
- package/migrations/009_artifact_subjects.sql +10 -0
- package/package.json +82 -0
- package/prompts/action-items.md +55 -0
- package/prompts/blog-draft.md +54 -0
- package/prompts/blog-research.md +87 -0
- package/prompts/card.md +54 -0
- package/prompts/classify-segment.md +38 -0
- package/prompts/classify.md +37 -0
- package/prompts/code-snippets.md +163 -0
- package/prompts/extract-metadata.md +149 -0
- package/prompts/notes.md +83 -0
- package/prompts/runbook.md +123 -0
- package/prompts/standup.md +50 -0
- package/prompts/step-by-step.md +125 -0
- package/prompts/subject-grouping.md +31 -0
- package/prompts/summary-v3.md +89 -0
- package/prompts/summary.md +77 -0
- package/prompts/topic-classifier.md +24 -0
- package/prompts/topic-extract.md +13 -0
- package/prompts/vlm-batch.md +21 -0
- package/prompts/vlm-single.md +19 -0
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import { readdir, readFile, stat } from 'node:fs/promises';
|
|
2
|
+
import { homedir } from 'node:os';
|
|
3
|
+
import { join } from 'node:path';
|
|
4
|
+
import { capConfigSchema } from '../0_types.js';
|
|
5
|
+
import { normalizeSessionId } from '../utils/id-normalization.js';
|
|
6
|
+
function expandPath(path) {
|
|
7
|
+
if (path.startsWith('~/')) {
|
|
8
|
+
return join(homedir(), path.slice(2));
|
|
9
|
+
}
|
|
10
|
+
return path;
|
|
11
|
+
}
|
|
12
|
+
async function parseCapRecording(capDirPath, videoService) {
|
|
13
|
+
try {
|
|
14
|
+
const metaPath = join(capDirPath, 'recording-meta.json');
|
|
15
|
+
const metaContent = await readFile(metaPath, 'utf-8');
|
|
16
|
+
const meta = JSON.parse(metaContent);
|
|
17
|
+
if (!meta.segments ||
|
|
18
|
+
!Array.isArray(meta.segments) ||
|
|
19
|
+
meta.segments.length === 0) {
|
|
20
|
+
throw new Error(`Invalid metadata in ${capDirPath}: missing or empty segments array`);
|
|
21
|
+
}
|
|
22
|
+
const firstSegment = meta.segments[0];
|
|
23
|
+
const videoPath = firstSegment.display?.path
|
|
24
|
+
? join(capDirPath, firstSegment.display.path)
|
|
25
|
+
: null;
|
|
26
|
+
// we fked up cuz we have mic but also system_audio.ogg
|
|
27
|
+
const micAudio = firstSegment.mic?.path
|
|
28
|
+
? join(capDirPath, firstSegment.mic.path)
|
|
29
|
+
: null;
|
|
30
|
+
const systemAudio = firstSegment.system_audio?.path
|
|
31
|
+
? join(capDirPath, firstSegment.system_audio.path)
|
|
32
|
+
: null;
|
|
33
|
+
const audioToStat = micAudio || systemAudio;
|
|
34
|
+
if (!audioToStat) {
|
|
35
|
+
console.log(`Skipping ${capDirPath}: none audio track found`);
|
|
36
|
+
return null;
|
|
37
|
+
}
|
|
38
|
+
const stats = await stat(audioToStat);
|
|
39
|
+
const capturedAt = stats.mtime;
|
|
40
|
+
// Calculate duration from video if available
|
|
41
|
+
let duration = 0;
|
|
42
|
+
if (videoPath) {
|
|
43
|
+
try {
|
|
44
|
+
const metadata = await videoService.getMetadata(videoPath);
|
|
45
|
+
duration = metadata.duration;
|
|
46
|
+
}
|
|
47
|
+
catch (e) {
|
|
48
|
+
console.warn(`Failed to get video duration for ${capDirPath}: ${e.message}`);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
const rawId = capDirPath.split('/').pop() || 'unknown';
|
|
52
|
+
const recordingId = normalizeSessionId(rawId);
|
|
53
|
+
return {
|
|
54
|
+
id: recordingId,
|
|
55
|
+
source: {
|
|
56
|
+
type: 'cap',
|
|
57
|
+
originalPath: capDirPath,
|
|
58
|
+
metadata: meta,
|
|
59
|
+
},
|
|
60
|
+
videoPath,
|
|
61
|
+
audioMicPath: micAudio ? micAudio : null,
|
|
62
|
+
audioSystemPath: systemAudio ? systemAudio : null,
|
|
63
|
+
duration,
|
|
64
|
+
capturedAt,
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
catch (error) {
|
|
68
|
+
if (error.code === 'ENOENT') {
|
|
69
|
+
throw new Error(`Recording directory or files not found: ${capDirPath}`);
|
|
70
|
+
}
|
|
71
|
+
if (error.name === 'SyntaxError') {
|
|
72
|
+
throw new Error(`Invalid JSON in recording-meta.json at ${capDirPath}`);
|
|
73
|
+
}
|
|
74
|
+
throw new Error(`Failed to parse recording at ${capDirPath}: ${error.message}`);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
export function createCapCaptureSource(config = {}, videoService) {
|
|
78
|
+
const parsedConfig = capConfigSchema.parse(config);
|
|
79
|
+
const recordingsPath = expandPath(parsedConfig.recordingsPath);
|
|
80
|
+
const innerList = async (limit = 10) => {
|
|
81
|
+
try {
|
|
82
|
+
//
|
|
83
|
+
// 7 directories, 5 files
|
|
84
|
+
const entries = await readdir(recordingsPath, { withFileTypes: true });
|
|
85
|
+
const capDirs = entries.filter((entry) => entry.isDirectory() && entry.name.endsWith('.cap'));
|
|
86
|
+
const recordings = await Promise.allSettled(capDirs.map(async (dir) => parseCapRecording(join(recordingsPath, dir.name), videoService)));
|
|
87
|
+
// logging errors
|
|
88
|
+
console.log(recordings
|
|
89
|
+
.filter((p) => p.status === 'rejected')
|
|
90
|
+
.map((p) => `${p.reason}\n`));
|
|
91
|
+
return recordings
|
|
92
|
+
.filter((p) => p.status === 'fulfilled')
|
|
93
|
+
.map((x) => x.value)
|
|
94
|
+
.filter((r) => r !== null)
|
|
95
|
+
.sort((a, b) => b.capturedAt.getTime() - a.capturedAt.getTime())
|
|
96
|
+
.slice(0, limit);
|
|
97
|
+
}
|
|
98
|
+
catch (error) {
|
|
99
|
+
console.error('Failed to list Cap recordings:', error);
|
|
100
|
+
return [];
|
|
101
|
+
}
|
|
102
|
+
};
|
|
103
|
+
return {
|
|
104
|
+
getLatestRecording: () => innerList(1).then((recordings) => recordings[0] ?? null),
|
|
105
|
+
listRecordings: innerList,
|
|
106
|
+
};
|
|
107
|
+
}
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Filesystem Capture Adapter
|
|
3
|
+
*
|
|
4
|
+
* Allows processing arbitrary video files from the filesystem.
|
|
5
|
+
* Useful for QuickTime screen recordings, downloaded videos, etc.
|
|
6
|
+
*/
|
|
7
|
+
import { exec } from 'node:child_process';
|
|
8
|
+
import { mkdir, stat } from 'node:fs/promises';
|
|
9
|
+
import os from 'node:os';
|
|
10
|
+
import path from 'node:path';
|
|
11
|
+
import { promisify } from 'node:util';
|
|
12
|
+
const execAsync = promisify(exec);
|
|
13
|
+
function expandPath(inputPath) {
|
|
14
|
+
if (inputPath.startsWith('~/')) {
|
|
15
|
+
const homeDir = process.env.HOME || process.env.USERPROFILE || '';
|
|
16
|
+
return path.join(homeDir, inputPath.slice(2));
|
|
17
|
+
}
|
|
18
|
+
return inputPath;
|
|
19
|
+
}
|
|
20
|
+
function sanitizeFilenameToId(filename) {
|
|
21
|
+
// Remove extension
|
|
22
|
+
const baseName = filename.replace(/\.[^/.]+$/, '');
|
|
23
|
+
// Replace spaces and special chars with hyphens, keep only alphanumeric and hyphens
|
|
24
|
+
return baseName
|
|
25
|
+
.toLowerCase()
|
|
26
|
+
.replace(/[^a-z0-9]+/g, '-')
|
|
27
|
+
.replace(/^-+|-+$/g, '')
|
|
28
|
+
.substring(0, 100);
|
|
29
|
+
}
|
|
30
|
+
async function checkHasAudio(videoPath) {
|
|
31
|
+
const command = `ffprobe -v error -select_streams a -show_entries stream=codec_type -of csv=p=0 "${videoPath}"`;
|
|
32
|
+
const { stdout } = await execAsync(command);
|
|
33
|
+
return stdout.trim().length > 0;
|
|
34
|
+
}
|
|
35
|
+
async function extractAudio(videoPath, outputPath) {
|
|
36
|
+
await mkdir(path.dirname(outputPath), { recursive: true });
|
|
37
|
+
const command = `ffmpeg -i "${videoPath}" -vn -acodec pcm_s16le -ar 16000 "${outputPath}" -y`;
|
|
38
|
+
await execAsync(command);
|
|
39
|
+
}
|
|
40
|
+
async function parseFilesystemRecording(videoPath, videoService, audioOverrides) {
|
|
41
|
+
try {
|
|
42
|
+
const stats = await stat(videoPath);
|
|
43
|
+
const capturedAt = stats.mtime;
|
|
44
|
+
// Get video metadata (duration, dimensions)
|
|
45
|
+
const metadata = await videoService.getMetadata(videoPath);
|
|
46
|
+
// Generate recording ID from filename
|
|
47
|
+
const fileName = path.basename(videoPath);
|
|
48
|
+
const recordingId = sanitizeFilenameToId(fileName);
|
|
49
|
+
// Audio handling - check for overrides first
|
|
50
|
+
let audioMicPath = null;
|
|
51
|
+
let audioSystemPath = null;
|
|
52
|
+
if (audioOverrides?.micAudioPath) {
|
|
53
|
+
audioMicPath = expandPath(audioOverrides.micAudioPath);
|
|
54
|
+
}
|
|
55
|
+
else {
|
|
56
|
+
const hasAudio = await checkHasAudio(videoPath);
|
|
57
|
+
if (hasAudio) {
|
|
58
|
+
const tempAudioPath = path.join(os.tmpdir(), 'escribano', recordingId, 'audio.wav');
|
|
59
|
+
await extractAudio(videoPath, tempAudioPath);
|
|
60
|
+
audioMicPath = tempAudioPath;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
if (audioOverrides?.systemAudioPath) {
|
|
64
|
+
audioSystemPath = expandPath(audioOverrides.systemAudioPath);
|
|
65
|
+
}
|
|
66
|
+
return {
|
|
67
|
+
id: recordingId,
|
|
68
|
+
source: {
|
|
69
|
+
type: 'raw',
|
|
70
|
+
originalPath: videoPath,
|
|
71
|
+
metadata: {
|
|
72
|
+
filename: fileName,
|
|
73
|
+
size: stats.size,
|
|
74
|
+
duration: metadata.duration,
|
|
75
|
+
width: metadata.width,
|
|
76
|
+
height: metadata.height,
|
|
77
|
+
},
|
|
78
|
+
},
|
|
79
|
+
videoPath: videoPath,
|
|
80
|
+
audioMicPath: audioMicPath,
|
|
81
|
+
audioSystemPath: audioSystemPath,
|
|
82
|
+
duration: metadata.duration,
|
|
83
|
+
capturedAt,
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
catch (error) {
|
|
87
|
+
if (error.code === 'ENOENT') {
|
|
88
|
+
throw new Error(`Video file not found: ${videoPath}`);
|
|
89
|
+
}
|
|
90
|
+
throw new Error(`Failed to parse video at ${videoPath}: ${error.message}`);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Creates a CaptureSource that reads a single video file from the filesystem.
|
|
95
|
+
*
|
|
96
|
+
* @param config - Configuration with video file path
|
|
97
|
+
* @param videoService - Video service for metadata extraction
|
|
98
|
+
* @returns CaptureSource that treats the file as the "latest" recording
|
|
99
|
+
*/
|
|
100
|
+
export function createFilesystemCaptureSource(config, videoService) {
|
|
101
|
+
const resolvedPath = expandPath(config.videoPath);
|
|
102
|
+
return {
|
|
103
|
+
getLatestRecording: async () => {
|
|
104
|
+
try {
|
|
105
|
+
const recording = await parseFilesystemRecording(resolvedPath, videoService, {
|
|
106
|
+
micAudioPath: config.micAudioPath,
|
|
107
|
+
systemAudioPath: config.systemAudioPath,
|
|
108
|
+
});
|
|
109
|
+
return recording;
|
|
110
|
+
}
|
|
111
|
+
catch (error) {
|
|
112
|
+
console.error('Failed to load filesystem recording:', error);
|
|
113
|
+
return null;
|
|
114
|
+
}
|
|
115
|
+
},
|
|
116
|
+
listRecordings: async (_limit = 1) => {
|
|
117
|
+
const recording = await parseFilesystemRecording(resolvedPath, videoService, {
|
|
118
|
+
micAudioPath: config.micAudioPath,
|
|
119
|
+
systemAudioPath: config.systemAudioPath,
|
|
120
|
+
});
|
|
121
|
+
return recording ? [recording] : [];
|
|
122
|
+
},
|
|
123
|
+
};
|
|
124
|
+
}
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Escribano - Embedding Adapter (Ollama)
|
|
3
|
+
*
|
|
4
|
+
* Simplified atomic worker for Ollama REST /api/embed API.
|
|
5
|
+
* Batching and parallelism are handled by the pipeline, not here.
|
|
6
|
+
*/
|
|
7
|
+
const MIN_TEXT_LENGTH = 5;
|
|
8
|
+
const DEFAULT_TIMEOUT_MS = 600_000; // 10 minutes
|
|
9
|
+
const MAX_RETRIES = 3;
|
|
10
|
+
export function createOllamaEmbeddingService(config) {
|
|
11
|
+
const baseUrl = config.endpoint.replace('/api/chat', '');
|
|
12
|
+
const model = process.env.ESCRIBANO_EMBED_MODEL ||
|
|
13
|
+
config.embedding?.model ||
|
|
14
|
+
'qwen3-embedding:8b';
|
|
15
|
+
/**
|
|
16
|
+
* Call Ollama /api/embed endpoint with retry logic
|
|
17
|
+
*/
|
|
18
|
+
async function callEmbedAPI(texts, externalSignal) {
|
|
19
|
+
let lastError = null;
|
|
20
|
+
for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
|
|
21
|
+
const controller = new AbortController();
|
|
22
|
+
const timeoutId = setTimeout(() => controller.abort(), DEFAULT_TIMEOUT_MS);
|
|
23
|
+
// Link external signal if provided
|
|
24
|
+
if (externalSignal) {
|
|
25
|
+
externalSignal.addEventListener('abort', () => controller.abort());
|
|
26
|
+
}
|
|
27
|
+
try {
|
|
28
|
+
const response = await fetch(`${baseUrl}/api/embed`, {
|
|
29
|
+
method: 'POST',
|
|
30
|
+
headers: { 'Content-Type': 'application/json' },
|
|
31
|
+
body: JSON.stringify({
|
|
32
|
+
model,
|
|
33
|
+
input: texts,
|
|
34
|
+
truncate: true,
|
|
35
|
+
options: {
|
|
36
|
+
num_ctx: 40000,
|
|
37
|
+
},
|
|
38
|
+
}),
|
|
39
|
+
signal: controller.signal,
|
|
40
|
+
});
|
|
41
|
+
if (!response.ok) {
|
|
42
|
+
const errorText = await response.text();
|
|
43
|
+
throw new Error(`Ollama embed error: ${response.status} ${response.statusText} - ${errorText.substring(0, 200)}`);
|
|
44
|
+
}
|
|
45
|
+
const data = await response.json();
|
|
46
|
+
return data.embeddings;
|
|
47
|
+
}
|
|
48
|
+
catch (error) {
|
|
49
|
+
lastError = error;
|
|
50
|
+
const isRetryable = lastError.message.includes('abort') ||
|
|
51
|
+
lastError.message.includes('500') ||
|
|
52
|
+
lastError.message.includes('ECONNRESET');
|
|
53
|
+
if (isRetryable && attempt < MAX_RETRIES) {
|
|
54
|
+
const delay = 2 ** attempt * 1000; // Exponential backoff
|
|
55
|
+
console.warn(`[Embedding] Attempt ${attempt}/${MAX_RETRIES} failed, retrying in ${delay / 1000}s...`);
|
|
56
|
+
await new Promise((r) => setTimeout(r, delay));
|
|
57
|
+
}
|
|
58
|
+
else {
|
|
59
|
+
// If not retryable or max retries reached, don't just continue the loop
|
|
60
|
+
break;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
finally {
|
|
64
|
+
clearTimeout(timeoutId);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
throw lastError || new Error('Embedding failed after retries');
|
|
68
|
+
}
|
|
69
|
+
return {
|
|
70
|
+
embed: async (text, taskType) => {
|
|
71
|
+
if (!text || text.trim().length < MIN_TEXT_LENGTH) {
|
|
72
|
+
return [];
|
|
73
|
+
}
|
|
74
|
+
const prefix = taskType === 'clustering'
|
|
75
|
+
? 'Instruct: Cluster screen recording observations for semantic similarity\n'
|
|
76
|
+
: '';
|
|
77
|
+
const results = await callEmbedAPI([prefix + text]);
|
|
78
|
+
return results[0] || [];
|
|
79
|
+
},
|
|
80
|
+
embedBatch: async (texts, taskType, options) => {
|
|
81
|
+
const prefix = taskType === 'clustering'
|
|
82
|
+
? 'Instruct: Cluster screen recording observations for semantic similarity\n'
|
|
83
|
+
: '';
|
|
84
|
+
// Filter valid texts and track indices
|
|
85
|
+
const validItems = [];
|
|
86
|
+
for (let i = 0; i < texts.length; i++) {
|
|
87
|
+
if (texts[i] && texts[i].trim().length >= MIN_TEXT_LENGTH) {
|
|
88
|
+
validItems.push({ index: i, text: prefix + texts[i] });
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
if (validItems.length === 0) {
|
|
92
|
+
return new Array(texts.length).fill([]);
|
|
93
|
+
}
|
|
94
|
+
// Single API call for this batch
|
|
95
|
+
const textsToEmbed = validItems.map((v) => v.text);
|
|
96
|
+
const embeddings = await callEmbedAPI(textsToEmbed, options?.signal);
|
|
97
|
+
// Reconstruct full array in original order
|
|
98
|
+
const finalEmbeddings = new Array(texts.length).fill([]);
|
|
99
|
+
for (let i = 0; i < validItems.length; i++) {
|
|
100
|
+
finalEmbeddings[validItems[i].index] = embeddings[i] || [];
|
|
101
|
+
}
|
|
102
|
+
return finalEmbeddings;
|
|
103
|
+
},
|
|
104
|
+
similarity: (a, b) => {
|
|
105
|
+
if (a.length === 0 || b.length === 0)
|
|
106
|
+
return 0;
|
|
107
|
+
if (a.length !== b.length) {
|
|
108
|
+
throw new Error(`Embedding dimensions mismatch: ${a.length} vs ${b.length}`);
|
|
109
|
+
}
|
|
110
|
+
let dotProduct = 0;
|
|
111
|
+
let normA = 0;
|
|
112
|
+
let normB = 0;
|
|
113
|
+
for (let i = 0; i < a.length; i++) {
|
|
114
|
+
dotProduct += a[i] * b[i];
|
|
115
|
+
normA += a[i] * a[i];
|
|
116
|
+
normB += b[i] * b[i];
|
|
117
|
+
}
|
|
118
|
+
if (normA === 0 || normB === 0)
|
|
119
|
+
return 0;
|
|
120
|
+
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
|
|
121
|
+
},
|
|
122
|
+
centroid: (embeddings) => {
|
|
123
|
+
const valid = embeddings.filter((e) => e.length > 0);
|
|
124
|
+
if (valid.length === 0)
|
|
125
|
+
return [];
|
|
126
|
+
if (valid.length === 1)
|
|
127
|
+
return valid[0];
|
|
128
|
+
const dim = valid[0].length;
|
|
129
|
+
const result = new Array(dim).fill(0);
|
|
130
|
+
for (const emb of valid) {
|
|
131
|
+
for (let i = 0; i < dim; i++) {
|
|
132
|
+
result[i] += emb[i];
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
for (let i = 0; i < dim; i++) {
|
|
136
|
+
result[i] /= valid.length;
|
|
137
|
+
}
|
|
138
|
+
return result;
|
|
139
|
+
},
|
|
140
|
+
};
|
|
141
|
+
}
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Escribano - Intelligence Adapter (Ollama)
|
|
3
|
+
*
|
|
4
|
+
* Implements IntelligenceService using Ollama REST API
|
|
5
|
+
*/
|
|
6
|
+
import { readFileSync } from 'node:fs';
|
|
7
|
+
import { join } from 'node:path';
|
|
8
|
+
export function createIntelligenceService(config) {
|
|
9
|
+
return {
|
|
10
|
+
classify: (transcript) => classifyWithOllama(transcript, config),
|
|
11
|
+
extractMetadata: (transcript, classification) => extractMetadata(transcript, classification, config),
|
|
12
|
+
generate: (artifactType, context) => generateArtifact(artifactType, context, config),
|
|
13
|
+
};
|
|
14
|
+
}
|
|
15
|
+
async function checkOllamaHealth() {
|
|
16
|
+
try {
|
|
17
|
+
const response = await fetch('http://localhost:11434/api/tags');
|
|
18
|
+
if (!response.ok) {
|
|
19
|
+
throw new Error('Ollama API not accessible');
|
|
20
|
+
}
|
|
21
|
+
const data = await response.json();
|
|
22
|
+
console.log('✓ Ollama is running and accessible');
|
|
23
|
+
console.log(` Available models: ${data.models?.length || 0}`);
|
|
24
|
+
}
|
|
25
|
+
catch (error) {
|
|
26
|
+
console.error('✗ Ollama is not running or not accessible');
|
|
27
|
+
console.error(' Error:', error.message);
|
|
28
|
+
console.error('');
|
|
29
|
+
console.error('Please start Ollama:');
|
|
30
|
+
console.error(' brew install ollama');
|
|
31
|
+
console.error(' ollama pull qwen3:32b');
|
|
32
|
+
console.error(' ollama serve');
|
|
33
|
+
console.error('');
|
|
34
|
+
throw new Error('Ollama service required for classification');
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
async function classifyWithOllama(transcript, config) {
|
|
38
|
+
console.log('Classifying transcript with Ollama...');
|
|
39
|
+
const tick = setInterval(() => {
|
|
40
|
+
process.stdout.write('.');
|
|
41
|
+
}, 1000);
|
|
42
|
+
await checkOllamaHealth();
|
|
43
|
+
const prompt = loadClassifyPrompt(transcript);
|
|
44
|
+
const raw = await callOllama(prompt, config, { expectJson: true });
|
|
45
|
+
clearInterval(tick);
|
|
46
|
+
console.log('\nClassification completed.');
|
|
47
|
+
const classification = {
|
|
48
|
+
meeting: raw.meeting * (raw.meeting <= 1 ? 100 : 1) || 0,
|
|
49
|
+
debugging: raw.debugging * (raw.debugging <= 1 ? 100 : 1) || 0,
|
|
50
|
+
tutorial: raw.tutorial * (raw.tutorial <= 1 ? 100 : 1) || 0,
|
|
51
|
+
learning: raw.learning * (raw.learning <= 1 ? 100 : 1) || 0,
|
|
52
|
+
working: raw.working * (raw.working <= 1 ? 100 : 1) || 0,
|
|
53
|
+
};
|
|
54
|
+
return classification;
|
|
55
|
+
}
|
|
56
|
+
function loadClassifyPrompt(transcript) {
|
|
57
|
+
const promptPath = join(process.cwd(), 'prompts', 'classify.md');
|
|
58
|
+
let prompt = readFileSync(promptPath, 'utf-8');
|
|
59
|
+
const segmentsText = transcript.segments
|
|
60
|
+
.map((seg) => `[seg-${seg.id}] [${seg.start}s - ${seg.end}s] ${seg.text}`)
|
|
61
|
+
.join('\n');
|
|
62
|
+
prompt = prompt.replace('{{TRANSCRIPT_ALL}}', transcript.fullText);
|
|
63
|
+
prompt = prompt.replace('{{TRANSCRIPT_SEGMENTS}}', segmentsText);
|
|
64
|
+
return prompt;
|
|
65
|
+
}
|
|
66
|
+
async function callOllama(prompt, config, options = { expectJson: true }) {
|
|
67
|
+
const { endpoint, model, maxRetries, timeout } = config;
|
|
68
|
+
let lastError = null;
|
|
69
|
+
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
70
|
+
try {
|
|
71
|
+
const controller = new AbortController();
|
|
72
|
+
const timeoutId = setTimeout(() => controller.abort(), timeout);
|
|
73
|
+
const response = await fetch(endpoint, {
|
|
74
|
+
method: 'POST',
|
|
75
|
+
headers: {
|
|
76
|
+
'Content-Type': 'application/json',
|
|
77
|
+
},
|
|
78
|
+
body: JSON.stringify({
|
|
79
|
+
model,
|
|
80
|
+
messages: [
|
|
81
|
+
{
|
|
82
|
+
role: 'system',
|
|
83
|
+
content: options.expectJson
|
|
84
|
+
? 'You are a JSON-only output system. Output ONLY valid JSON, no other text.'
|
|
85
|
+
: 'You are a helpful assistant that generates high-quality markdown documentation.',
|
|
86
|
+
},
|
|
87
|
+
{
|
|
88
|
+
role: 'user',
|
|
89
|
+
content: prompt,
|
|
90
|
+
},
|
|
91
|
+
],
|
|
92
|
+
stream: false,
|
|
93
|
+
...(options.expectJson && { format: 'json' }),
|
|
94
|
+
}),
|
|
95
|
+
signal: controller.signal,
|
|
96
|
+
});
|
|
97
|
+
clearTimeout(timeoutId);
|
|
98
|
+
if (!response.ok) {
|
|
99
|
+
throw new Error(`Ollama API error: ${response.status} ${response.statusText}`);
|
|
100
|
+
}
|
|
101
|
+
const data = await response.json();
|
|
102
|
+
if (!data.done || data.done_reason !== 'stop') {
|
|
103
|
+
throw new Error(`Incomplete response: done=${data.done}, reason=${data.done_reason}`);
|
|
104
|
+
}
|
|
105
|
+
const content = data.message.content;
|
|
106
|
+
if (options.expectJson) {
|
|
107
|
+
const jsonMatch = content.match(/\{[\s\S]*\}/);
|
|
108
|
+
if (!jsonMatch)
|
|
109
|
+
throw new Error('No JSON found in response');
|
|
110
|
+
return JSON.parse(jsonMatch[0]);
|
|
111
|
+
}
|
|
112
|
+
return content;
|
|
113
|
+
}
|
|
114
|
+
catch (error) {
|
|
115
|
+
lastError = error;
|
|
116
|
+
if (error instanceof Error && error.name === 'AbortError') {
|
|
117
|
+
console.log(`Attempt ${attempt}/${maxRetries}: Request timed out, retrying...`);
|
|
118
|
+
}
|
|
119
|
+
else {
|
|
120
|
+
console.log(`Attempt ${attempt}/${maxRetries}: Request failed, retrying...`);
|
|
121
|
+
}
|
|
122
|
+
if (attempt < maxRetries) {
|
|
123
|
+
await new Promise((resolve) => setTimeout(resolve, 1000 * attempt));
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
throw new Error(`Request failed after ${maxRetries} retries: ${lastError?.message}`);
|
|
128
|
+
}
|
|
129
|
+
async function extractMetadata(transcript, classification, config) {
|
|
130
|
+
const prompt = loadMetadataPrompt(transcript, classification);
|
|
131
|
+
const raw = await callOllama(prompt, config, { expectJson: true });
|
|
132
|
+
return {
|
|
133
|
+
speakers: raw.speakers || [],
|
|
134
|
+
keyMoments: raw.keyMoments || [],
|
|
135
|
+
actionItems: raw.actionItems || [],
|
|
136
|
+
technicalTerms: raw.technicalTerms || [],
|
|
137
|
+
codeSnippets: raw.codeSnippets || [],
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
function loadMetadataPrompt(transcript, classification) {
|
|
141
|
+
const promptPath = join(process.cwd(), 'prompts', 'extract-metadata.md');
|
|
142
|
+
let prompt = readFileSync(promptPath, 'utf-8');
|
|
143
|
+
const classificationSummary = Object.entries(classification)
|
|
144
|
+
.filter(([_, score]) => score >= 25)
|
|
145
|
+
.map(([type, score]) => `${type}: ${score}%`)
|
|
146
|
+
.join(', ');
|
|
147
|
+
const segmentsText = transcript.segments
|
|
148
|
+
.map((seg) => `[${seg.start}s - ${seg.end}s] ${seg.text}`)
|
|
149
|
+
.join('\n');
|
|
150
|
+
prompt = prompt.replace('{{CLASSIFICATION_SUMMARY}}', classificationSummary);
|
|
151
|
+
prompt = prompt.replace('{{TRANSCRIPT_SEGMENTS}}', segmentsText);
|
|
152
|
+
prompt = prompt.replace('{{TRANSCRIPT_ALL}}', transcript.fullText);
|
|
153
|
+
return prompt;
|
|
154
|
+
}
|
|
155
|
+
function parseMetadataJson(content) {
|
|
156
|
+
const jsonMatch = content.match(/\{[\s\S]*\}/);
|
|
157
|
+
if (!jsonMatch) {
|
|
158
|
+
throw new Error('No JSON object found in metadata extraction response');
|
|
159
|
+
}
|
|
160
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
161
|
+
return {
|
|
162
|
+
speakers: parsed.speakers || [],
|
|
163
|
+
keyMoments: parsed.keyMoments || [],
|
|
164
|
+
actionItems: parsed.actionItems || [],
|
|
165
|
+
technicalTerms: parsed.technicalTerms || [],
|
|
166
|
+
codeSnippets: parsed.codeSnippets || [],
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
async function generateArtifact(artifactType, context, config) {
|
|
170
|
+
const prompt = loadArtifactPrompt(artifactType, context);
|
|
171
|
+
const response = await callOllama(prompt, config, { expectJson: false });
|
|
172
|
+
return response;
|
|
173
|
+
}
|
|
174
|
+
function loadArtifactPrompt(artifactType, context) {
|
|
175
|
+
const promptPath = join(process.cwd(), 'prompts', `${artifactType}.md`);
|
|
176
|
+
let prompt = readFileSync(promptPath, 'utf-8');
|
|
177
|
+
prompt = prompt.replace('{{TRANSCRIPT_ALL}}', context.transcript.fullText);
|
|
178
|
+
const segmentsText = context.transcript.segments
|
|
179
|
+
.map((seg) => `[${seg.start}s - ${seg.end}s] ${seg.text}`)
|
|
180
|
+
.join('\n');
|
|
181
|
+
prompt = prompt.replace('{{TRANSCRIPT_SEGMENTS}}', segmentsText);
|
|
182
|
+
const classificationSummary = Object.entries(context.classification)
|
|
183
|
+
.filter(([_, score]) => score >= 25)
|
|
184
|
+
.map(([type, score]) => `${type}: ${score}%`)
|
|
185
|
+
.join(', ');
|
|
186
|
+
prompt = prompt.replace('{{CLASSIFICATION_SUMMARY}}', classificationSummary);
|
|
187
|
+
if (context.metadata) {
|
|
188
|
+
prompt = prompt.replace('{{SPEAKERS}}', JSON.stringify(context.metadata.speakers || [], null, 2));
|
|
189
|
+
prompt = prompt.replace('{{KEY_MOMENTS}}', JSON.stringify(context.metadata.keyMoments || [], null, 2));
|
|
190
|
+
prompt = prompt.replace('{{ACTION_ITEMS}}', JSON.stringify(context.metadata.actionItems || [], null, 2));
|
|
191
|
+
prompt = prompt.replace('{{TECHNICAL_TERMS}}', JSON.stringify(context.metadata.technicalTerms || [], null, 2));
|
|
192
|
+
prompt = prompt.replace('{{CODE_SNIPPETS}}', JSON.stringify(context.metadata.codeSnippets || [], null, 2));
|
|
193
|
+
}
|
|
194
|
+
else {
|
|
195
|
+
prompt = prompt.replace('{{SPEAKERS}}', 'N/A');
|
|
196
|
+
prompt = prompt.replace('{{KEY_MOMENTS}}', 'N/A');
|
|
197
|
+
prompt = prompt.replace('{{ACTION_ITEMS}}', 'N/A');
|
|
198
|
+
prompt = prompt.replace('{{TECHNICAL_TERMS}}', 'N/A');
|
|
199
|
+
prompt = prompt.replace('{{CODE_SNIPPETS}}', 'N/A');
|
|
200
|
+
}
|
|
201
|
+
return prompt;
|
|
202
|
+
}
|