npm - escribano - Versions diffs - 0.1.0 - Mend

escribano 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (124) hide show

package/LICENSE +21 -0
package/README.md +297 -0
package/dist/0_types.js +279 -0
package/dist/actions/classify-session.js +77 -0
package/dist/actions/create-contexts.js +44 -0
package/dist/actions/create-topic-blocks.js +68 -0
package/dist/actions/extract-metadata.js +24 -0
package/dist/actions/generate-artifact-v3.js +296 -0
package/dist/actions/generate-artifact.js +61 -0
package/dist/actions/generate-summary-v3.js +260 -0
package/dist/actions/outline-index.js +204 -0
package/dist/actions/process-recording-v2.js +494 -0
package/dist/actions/process-recording-v3.js +412 -0
package/dist/actions/process-session.js +183 -0
package/dist/actions/publish-summary-v3.js +303 -0
package/dist/actions/sync-to-outline.js +196 -0
package/dist/adapters/audio.silero.adapter.js +69 -0
package/dist/adapters/cap.adapter.js +94 -0
package/dist/adapters/capture.cap.adapter.js +107 -0
package/dist/adapters/capture.filesystem.adapter.js +124 -0
package/dist/adapters/embedding.ollama.adapter.js +141 -0
package/dist/adapters/intelligence.adapter.js +202 -0
package/dist/adapters/intelligence.mlx.adapter.js +395 -0
package/dist/adapters/intelligence.ollama.adapter.js +741 -0
package/dist/adapters/publishing.outline.adapter.js +75 -0
package/dist/adapters/storage.adapter.js +81 -0
package/dist/adapters/storage.fs.adapter.js +83 -0
package/dist/adapters/transcription.whisper.adapter.js +206 -0
package/dist/adapters/video.ffmpeg.adapter.js +405 -0
package/dist/adapters/whisper.adapter.js +168 -0
package/dist/batch-context.js +329 -0
package/dist/db/helpers.js +50 -0
package/dist/db/index.js +95 -0
package/dist/db/migrate.js +80 -0
package/dist/db/repositories/artifact.sqlite.js +77 -0
package/dist/db/repositories/cluster.sqlite.js +92 -0
package/dist/db/repositories/context.sqlite.js +75 -0
package/dist/db/repositories/index.js +10 -0
package/dist/db/repositories/observation.sqlite.js +70 -0
package/dist/db/repositories/recording.sqlite.js +56 -0
package/dist/db/repositories/subject.sqlite.js +64 -0
package/dist/db/repositories/topic-block.sqlite.js +45 -0
package/dist/db/types.js +4 -0
package/dist/domain/classification.js +60 -0
package/dist/domain/context.js +97 -0
package/dist/domain/index.js +2 -0
package/dist/domain/observation.js +17 -0
package/dist/domain/recording.js +41 -0
package/dist/domain/segment.js +93 -0
package/dist/domain/session.js +93 -0
package/dist/domain/time-range.js +38 -0
package/dist/domain/transcript.js +79 -0
package/dist/index.js +173 -0
package/dist/pipeline/context.js +162 -0
package/dist/pipeline/events.js +2 -0
package/dist/prerequisites.js +226 -0
package/dist/scripts/rebuild-index.js +53 -0
package/dist/scripts/seed-fixtures.js +290 -0
package/dist/services/activity-segmentation.js +333 -0
package/dist/services/activity-segmentation.test.js +191 -0
package/dist/services/app-normalization.js +212 -0
package/dist/services/cluster-merge.js +69 -0
package/dist/services/clustering.js +237 -0
package/dist/services/debug.js +58 -0
package/dist/services/frame-sampling.js +318 -0
package/dist/services/signal-extraction.js +106 -0
package/dist/services/subject-grouping.js +342 -0
package/dist/services/temporal-alignment.js +99 -0
package/dist/services/vlm-enrichment.js +84 -0
package/dist/services/vlm-service.js +130 -0
package/dist/stats/index.js +3 -0
package/dist/stats/observer.js +65 -0
package/dist/stats/repository.js +36 -0
package/dist/stats/resource-tracker.js +86 -0
package/dist/stats/types.js +1 -0
package/dist/test-classification-prompts.js +181 -0
package/dist/tests/cap.adapter.test.js +75 -0
package/dist/tests/capture.cap.adapter.test.js +69 -0
package/dist/tests/classify-session.test.js +140 -0
package/dist/tests/db/repositories.test.js +243 -0
package/dist/tests/domain/time-range.test.js +31 -0
package/dist/tests/integration.test.js +84 -0
package/dist/tests/intelligence.adapter.test.js +102 -0
package/dist/tests/intelligence.ollama.adapter.test.js +178 -0
package/dist/tests/process-v2.test.js +90 -0
package/dist/tests/services/clustering.test.js +112 -0
package/dist/tests/services/frame-sampling.test.js +152 -0
package/dist/tests/utils/ocr.test.js +76 -0
package/dist/tests/utils/parallel.test.js +57 -0
package/dist/tests/visual-observer.test.js +175 -0
package/dist/utils/id-normalization.js +15 -0
package/dist/utils/index.js +9 -0
package/dist/utils/model-detector.js +154 -0
package/dist/utils/ocr.js +80 -0
package/dist/utils/parallel.js +32 -0
package/migrations/001_initial.sql +109 -0
package/migrations/002_clusters.sql +41 -0
package/migrations/003_observations_vlm_fields.sql +14 -0
package/migrations/004_observations_unique.sql +18 -0
package/migrations/005_processing_stats.sql +29 -0
package/migrations/006_vlm_raw_response.sql +6 -0
package/migrations/007_subjects.sql +23 -0
package/migrations/008_artifacts_recording.sql +6 -0
package/migrations/009_artifact_subjects.sql +10 -0
package/package.json +82 -0
package/prompts/action-items.md +55 -0
package/prompts/blog-draft.md +54 -0
package/prompts/blog-research.md +87 -0
package/prompts/card.md +54 -0
package/prompts/classify-segment.md +38 -0
package/prompts/classify.md +37 -0
package/prompts/code-snippets.md +163 -0
package/prompts/extract-metadata.md +149 -0
package/prompts/notes.md +83 -0
package/prompts/runbook.md +123 -0
package/prompts/standup.md +50 -0
package/prompts/step-by-step.md +125 -0
package/prompts/subject-grouping.md +31 -0
package/prompts/summary-v3.md +89 -0
package/prompts/summary.md +77 -0
package/prompts/topic-classifier.md +24 -0
package/prompts/topic-extract.md +13 -0
package/prompts/vlm-batch.md +21 -0
package/prompts/vlm-single.md +19 -0

package/dist/actions/publish-summary-v3.js ADDED Viewed

@@ -0,0 +1,303 @@
+/**
+ * Escribano - Publish Summary V3
+ *
+ * Publishes V3 session summaries to Outline wiki.
+ */
+import { log } from '../pipeline/context.js';
+/**
+ * Publish a V3 session summary to Outline.
+ *
+ * Creates a single document per recording with full summary content.
+ *
+ * @param recordingId - Recording ID
+ * @param content - Summary markdown content
+ * @param topicBlocks - V3 topic blocks for metadata
+ * @param repos - Database repositories
+ * @param publishing - Outline publishing service
+ * @param options - Publishing options
+ * @returns Published document info
+ */
+export async function publishSummaryV3(recordingId, content, topicBlocks, repos, publishing, options = {}) {
+    const collectionName = options.collectionName ?? 'Escribano Sessions';
+    const indexTitle = '📋 Session Summaries Index';
+    log('info', `[Publish V3] Publishing summary for ${recordingId}...`);
+    // 1. Get recording info
+    const recording = repos.recordings.findById(recordingId);
+    if (!recording) {
+        throw new Error(`Recording ${recordingId} not found`);
+    }
+    // 2. Ensure collection exists
+    const collection = await publishing.ensureCollection(collectionName);
+    log('info', `[Publish V3] Using collection: ${collectionName} (${collection.id})`);
+    // Find index document to use as parent (if it exists)
+    const indexDoc = await publishing.findDocumentByTitle(collection.id, indexTitle);
+    if (indexDoc) {
+        log('info', `[Publish V3] Nesting under index: ${indexTitle}`);
+    }
+    // 3. Build document title and content
+    const title = buildDocumentTitle(recording, topicBlocks, options.format);
+    const documentContent = buildDocumentContent(recording, content, topicBlocks);
+    // 4. Check for existing document (by title)
+    const existing = await publishing.findDocumentByTitle(collection.id, title);
+    // 5. Create or update document
+    let document;
+    if (existing) {
+        log('info', `[Publish V3] Updating existing document: ${title}`);
+        await publishing.updateDocument(existing.id, {
+            title,
+            content: documentContent,
+        });
+        document = existing;
+    }
+    else {
+        log('info', `[Publish V3] Creating new document: ${title}`);
+        document = await publishing.createDocument({
+            collectionId: collection.id,
+            parentDocumentId: indexDoc?.id, // Nest under index if it exists
+            title,
+            content: documentContent,
+            publish: options.publish ?? true,
+        });
+    }
+    // 6. Build sync state and content hash
+    const contentHash = hashContent(content);
+    const syncState = {
+        collectionId: collection.id,
+        sessionDocumentId: document.id,
+        sessionDocumentUrl: document.url,
+        artifacts: [], // V3 doesn't use artifacts as children
+        lastSyncedAt: new Date(),
+    };
+    log('info', `[Publish V3] Published to: ${document.url}`);
+    return {
+        url: document.url,
+        documentId: document.id,
+        collectionId: collection.id,
+        syncState,
+        contentHash,
+    };
+}
+/**
+ * Build a descriptive document title from recording and topic blocks.
+ */
+function buildDocumentTitle(recording, topicBlocks, format) {
+    const date = new Date(recording.captured_at);
+    const dateStr = date.toISOString().split('T')[0];
+    const timeStr = date.toTimeString().split(' ')[0].substring(0, 5);
+    // Try to extract primary activity from blocks
+    const activities = extractActivities(topicBlocks);
+    const primaryActivity = activities[0] ?? 'Session';
+    // Append format if provided
+    const formatSuffix = format ? ` [${format}]` : '';
+    return `[${dateStr} ${timeStr}] ${primaryActivity} (${formatDuration(recording.duration)})${formatSuffix}`;
+}
+/**
+ * Extract unique activities from topic blocks, sorted by frequency.
+ */
+function extractActivities(topicBlocks) {
+    const activityCounts = new Map();
+    for (const block of topicBlocks) {
+        try {
+            const classification = JSON.parse(block.classification || '{}');
+            const activity = classification.activity_type;
+            if (activity) {
+                activityCounts.set(activity, (activityCounts.get(activity) ?? 0) + 1);
+            }
+        }
+        catch {
+            // Ignore invalid JSON
+        }
+    }
+    // Sort by count descending
+    return Array.from(activityCounts.entries())
+        .sort((a, b) => b[1] - a[1])
+        .map(([activity]) => activity.charAt(0).toUpperCase() + activity.slice(1));
+}
+/**
+ * Format duration in human-readable form.
+ */
+function formatDuration(seconds) {
+    const minutes = Math.round(seconds / 60);
+    if (minutes < 60) {
+        return `${minutes}m`;
+    }
+    const hours = Math.floor(minutes / 60);
+    const remainingMinutes = minutes % 60;
+    if (remainingMinutes === 0) {
+        return `${hours}h`;
+    }
+    return `${hours}h ${remainingMinutes}m`;
+}
+/**
+ * Build the full document content with metadata and summary.
+ */
+function buildDocumentContent(recording, summary, topicBlocks) {
+    const date = new Date(recording.captured_at);
+    const activities = extractActivities(topicBlocks);
+    // Build metadata section
+    let metadata = `---\n`;
+    metadata += `**Date:** ${date.toLocaleString()}\n\n`;
+    metadata += `**Duration:** ${formatDuration(recording.duration)}\n\n`;
+    metadata += `**Activities:** ${activities.join(', ') || 'Unknown'}\n\n`;
+    metadata += `**Recording ID:** \`${recording.id}\`\n\n`;
+    if (recording.source_type) {
+        metadata += `**Source:** ${recording.source_type}\n\n`;
+    }
+    metadata += `---\n\n`;
+    // Append timeline of blocks if available
+    const timeline = buildTimeline(topicBlocks);
+    if (timeline) {
+        metadata += `## Timeline\n\n`;
+        metadata += timeline;
+        metadata += `\n---\n\n`;
+    }
+    // Summary header
+    const summaryHeader = `# Session Summary\n\n`;
+    // Combine all parts
+    return metadata + summaryHeader + summary;
+}
+/**
+ * Build a brief timeline from topic blocks.
+ */
+function buildTimeline(topicBlocks) {
+    if (topicBlocks.length === 0)
+        return '';
+    // Sort by start time
+    const sortedBlocks = [...topicBlocks].sort((a, b) => {
+        const aStart = JSON.parse(a.classification || '{}').start_time ?? 0;
+        const bStart = JSON.parse(b.classification || '{}').start_time ?? 0;
+        return aStart - bStart;
+    });
+    let timeline = '';
+    for (const block of sortedBlocks) {
+        try {
+            const classification = JSON.parse(block.classification || '{}');
+            const activity = classification.activity_type ?? 'unknown';
+            const startTime = classification.start_time ?? 0;
+            const endTime = classification.end_time ?? 0;
+            const duration = endTime - startTime;
+            const apps = (classification.apps ?? []).join(', ') || 'none';
+            const timeStr = formatTime(startTime);
+            const durationStr = formatDuration(duration);
+            timeline += `- **${timeStr}** (${durationStr}): ${activity}`;
+            if (apps !== 'none') {
+                timeline += ` — ${apps}`;
+            }
+            timeline += `\n`;
+        }
+        catch {
+            // Skip invalid blocks
+        }
+    }
+    return timeline;
+}
+/**
+ * Format seconds as MM:SS.
+ */
+function formatTime(seconds) {
+    const mins = Math.floor(seconds / 60);
+    const secs = Math.floor(seconds % 60);
+    return `${mins}:${secs.toString().padStart(2, '0')}`;
+}
+/**
+ * Simple content hashing for change detection.
+ */
+function hashContent(content) {
+    let hash = 0;
+    for (let i = 0; i < content.length; i++) {
+        const char = content.charCodeAt(i);
+        hash = (hash << 5) - hash + char;
+        hash |= 0;
+    }
+    return hash.toString(16);
+}
+/**
+ * Update recording metadata with Outline publishing info.
+ * This should be called after successful publish.
+ */
+export function updateRecordingOutlineMetadata(recordingId, outlineInfo, repos, format) {
+    const recording = repos.recordings.findById(recordingId);
+    if (!recording) {
+        throw new Error(`Recording ${recordingId} not found`);
+    }
+    // Parse existing metadata
+    const currentMetadata = recording.source_metadata
+        ? JSON.parse(recording.source_metadata)
+        : {};
+    // Store format-specific metadata
+    if (format) {
+        // Initialize formats array if needed
+        if (!currentMetadata.outline_formats) {
+            currentMetadata.outline_formats = [];
+        }
+        // Remove any existing entry for this format and add the new one
+        currentMetadata.outline_formats = currentMetadata.outline_formats.filter((f) => f.format !== format);
+        currentMetadata.outline_formats.push({
+            format,
+            ...outlineInfo,
+        });
+    }
+    else {
+        // Backward compatibility: store as single outline object if no format specified
+        currentMetadata.outline = outlineInfo;
+    }
+    repos.recordings.updateMetadata(recordingId, JSON.stringify(currentMetadata));
+    log('info', `[Publish V3] Updated metadata for ${recordingId}${format ? ` (${format})` : ''}`);
+}
+/**
+ * Get current Outline metadata from recording if it exists (legacy single-outline format).
+ */
+export function getOutlineMetadata(recording) {
+    try {
+        const metadata = recording.source_metadata
+            ? JSON.parse(recording.source_metadata)
+            : {};
+        return metadata.outline ?? null;
+    }
+    catch {
+        return null;
+    }
+}
+/**
+ * Get Outline metadata for a specific format.
+ * Checks outline_formats[] first, falls back to legacy outline for backward compat.
+ */
+export function getOutlineMetadataForFormat(recording, format) {
+    try {
+        const metadata = recording.source_metadata
+            ? JSON.parse(recording.source_metadata)
+            : {};
+        // Check multi-format structure first
+        if (format &&
+            metadata.outline_formats &&
+            Array.isArray(metadata.outline_formats)) {
+            const formatEntry = metadata.outline_formats.find((f) => f.format === format);
+            if (formatEntry) {
+                return {
+                    url: formatEntry.url,
+                    documentId: formatEntry.documentId,
+                    collectionId: formatEntry.collectionId,
+                    publishedAt: formatEntry.publishedAt,
+                    contentHash: formatEntry.contentHash,
+                    error: formatEntry.error,
+                    failedAt: formatEntry.failedAt,
+                };
+            }
+        }
+        // Fallback to legacy single-outline
+        return metadata.outline ?? null;
+    }
+    catch {
+        return null;
+    }
+}
+/**
+ * Check if content has changed since last publish for a specific format.
+ */
+export function hasContentChanged(recording, currentContent, format) {
+    const outlineMeta = getOutlineMetadataForFormat(recording, format);
+    if (!outlineMeta)
+        return true;
+    const currentHash = hashContent(currentContent);
+    return currentHash !== outlineMeta.contentHash;
+}

package/dist/actions/sync-to-outline.js ADDED Viewed

@@ -0,0 +1,196 @@
+/**
+ * Sync Session to Outline Action
+ *
+ * Orchestrates publishing a session and its artifacts to Outline.
+ */
+/**
+ * Syncs a session and all its artifacts to Outline.
+ */
+export async function syncSessionToOutline(session, publishing, storage, collectionName = 'Escribano Sessions') {
+    // 1. Ensure collection exists
+    const collection = await publishing.ensureCollection(collectionName);
+    // 2. Create or update session parent document
+    const sessionTitle = formatSessionTitle(session);
+    const sessionContent = generateSessionDocument(session);
+    const existingSession = await publishing.findDocumentByTitle(collection.id, sessionTitle);
+    const sessionDoc = await upsertDocument(publishing, collection.id, sessionTitle, sessionContent, existingSession?.id);
+    // 3. Sync each artifact as child document
+    const syncedArtifacts = [];
+    for (const artifact of session.artifacts) {
+        const artifactTitle = formatArtifactType(artifact.type);
+        const existingArtifact = await findChildDocumentByTitle(publishing, collection.id, sessionDoc.id, artifactTitle);
+        const artifactDoc = await upsertDocument(publishing, collection.id, artifactTitle, artifact.content, existingArtifact?.id, sessionDoc.id);
+        syncedArtifacts.push({
+            type: artifact.type,
+            documentId: artifactDoc.id,
+            documentUrl: artifactDoc.url,
+            syncedAt: new Date(),
+            contentHash: hashContent(artifact.content),
+        });
+    }
+    // 4. Update sync state
+    session.outlineSyncState = {
+        collectionId: collection.id,
+        sessionDocumentId: sessionDoc.id,
+        sessionDocumentUrl: sessionDoc.url,
+        artifacts: syncedArtifacts,
+        lastSyncedAt: new Date(),
+    };
+    await storage.saveSession(session);
+    // 5. Update global index
+    await updateGlobalIndex(publishing, storage, collection.id);
+    return { url: sessionDoc.url };
+}
+/**
+ * Creates or updates a document in Outline
+ */
+async function upsertDocument(publishing, collectionId, title, content, existingId, parentDocumentId) {
+    if (existingId) {
+        await publishing.updateDocument(existingId, { content });
+        // We need the URL, but updateDocument doesn't return it.
+        // Most publishing services will have the URL stable if the title/ID don't change.
+        // For now we re-fetch or assume findDocumentByTitle was sufficient.
+        const updated = await publishing.findDocumentByTitle(collectionId, title);
+        if (!updated)
+            throw new Error(`Failed to find updated document: ${title}`);
+        return updated;
+    }
+    return publishing.createDocument({
+        collectionId,
+        title,
+        content,
+        parentDocumentId,
+        publish: true,
+    });
+}
+/**
+ * Format session title for Outline
+ */
+function formatSessionTitle(session) {
+    const date = new Date(session.createdAt);
+    const dateStr = date.toISOString().split('T')[0];
+    const timeStr = date.toTimeString().split(' ')[0].substring(0, 5);
+    const primaryType = getPrimaryType(session);
+    const typeLabel = primaryType ? `[${primaryType.toUpperCase()}] ` : '';
+    return `${typeLabel}${dateStr} ${timeStr} - ${session.id}`;
+}
+function getPrimaryType(session) {
+    if (!session.classification)
+        return null;
+    const top = Object.entries(session.classification).sort(([, a], [, b]) => b - a)[0];
+    return top[1] >= 25 ? top[0] : null;
+}
+/**
+ * Format artifact type for display
+ */
+function formatArtifactType(type) {
+    return type
+        .split('-')
+        .map((word) => word.charAt(0).toUpperCase() + word.slice(1))
+        .join(' ');
+}
+/**
+ * Find a child document by title under a specific parent
+ */
+async function findChildDocumentByTitle(publishing, collectionId, parentId, title) {
+    const docs = await publishing.listDocuments(collectionId);
+    return (docs.find((d) => d.parentDocumentId === parentId && d.title === title) ||
+        null);
+}
+/**
+ * Generate parent session document content
+ */
+function generateSessionDocument(session) {
+    const date = new Date(session.createdAt).toLocaleString();
+    const types = session.classification
+        ? Object.entries(session.classification)
+            .filter(([, s]) => s >= 25)
+            .sort(([, a], [, b]) => b - a)
+            .map(([t, s]) => `${t} (${s}%)`)
+            .join(' | ')
+        : 'Not classified';
+    let content = `# Session: ${session.id}\n\n`;
+    content += `**Date:** ${date}\n`;
+    content += `**Classification:** ${types}\n\n`;
+    if (session.artifacts.length > 0) {
+        content += `## Artifacts\n\n`;
+        for (const artifact of session.artifacts) {
+            content += `- ${formatArtifactType(artifact.type)}\n`;
+        }
+        content += `\n`;
+    }
+    if (session.metadata) {
+        content += `## Metadata\n\n`;
+        if (session.metadata.speakers?.length) {
+            content += `### Speakers\n`;
+            for (const s of session.metadata.speakers) {
+                content += `- ${s.name}${s.role ? ` (${s.role})` : ''}\n`;
+            }
+            content += `\n`;
+        }
+        if (session.metadata.keyMoments?.length) {
+            content += `### Key Moments\n`;
+            for (const m of session.metadata.keyMoments) {
+                content += `- [${formatTime(m.timestamp)}] ${m.description}\n`;
+            }
+            content += `\n`;
+        }
+    }
+    return content;
+}
+function formatTime(seconds) {
+    const mins = Math.floor(seconds / 60);
+    const secs = Math.floor(seconds % 60);
+    return `${mins}:${secs.toString().padStart(2, '0')}`;
+}
+/**
+ * Simple content hashing (placeholder)
+ */
+function hashContent(content) {
+    let hash = 0;
+    for (let i = 0; i < content.length; i++) {
+        const char = content.charCodeAt(i);
+        hash = (hash << 5) - hash + char;
+        hash |= 0;
+    }
+    return hash.toString();
+}
+/**
+ * Updates the global session index document in Outline
+ */
+async function updateGlobalIndex(publishing, storage, collectionId) {
+    const sessions = await storage.listSessions();
+    const title = '📋 Session Index';
+    let content = `# 📋 Escribano Session Index\n\n`;
+    content += `*Last updated: ${new Date().toLocaleString()}*\n\n`;
+    // Group by month
+    const grouped = {};
+    for (const s of sessions) {
+        const month = new Date(s.createdAt).toLocaleString('default', {
+            month: 'long',
+            year: 'numeric',
+        });
+        if (!grouped[month])
+            grouped[month] = [];
+        grouped[month].push(s);
+    }
+    for (const [month, monthSessions] of Object.entries(grouped)) {
+        content += `## ${month}\n\n`;
+        content += `| Date | Type | Artifacts | Link |\n`;
+        content += `|------|------|-----------|------|\n`;
+        for (const s of monthSessions) {
+            const date = new Date(s.createdAt).toLocaleString();
+            const type = getPrimaryType(s) || 'Unknown';
+            const artifacts = s.artifacts
+                .map((a) => formatArtifactType(a.type))
+                .join(', ');
+            const link = s.outlineSyncState?.sessionDocumentUrl
+                ? `[View](${s.outlineSyncState.sessionDocumentUrl})`
+                : 'N/A';
+            content += `| ${date} | ${type} | ${artifacts} | ${link} |\n`;
+        }
+        content += `\n`;
+    }
+    const existing = await publishing.findDocumentByTitle(collectionId, title);
+    await upsertDocument(publishing, collectionId, title, content, existing?.id);
+}

package/dist/adapters/audio.silero.adapter.js ADDED Viewed

@@ -0,0 +1,69 @@
+import { exec, spawn } from 'node:child_process';
+import { mkdir, readFile, rm } from 'node:fs/promises';
+import os from 'node:os';
+import path from 'node:path';
+import { promisify } from 'node:util';
+const execAsync = promisify(exec);
+export function createSileroPreprocessor() {
+    let currentProcess = null;
+    return {
+        extractSpeechSegments: async (audioPath, recordingId) => {
+            const tempDir = path.join(os.tmpdir(), 'escribano', recordingId, 'segments');
+            const manifestPath = path.join(tempDir, 'manifest.json');
+            await mkdir(tempDir, { recursive: true });
+            const inputWavPath = path.join(tempDir, 'input_16k.wav');
+            try {
+                console.log(`Converting ${audioPath} to 16kHz mono WAV...`);
+                await execAsync(`ffmpeg -i "${audioPath}" -ar 16000 -ac 1 "${inputWavPath}" -y`);
+            }
+            catch (error) {
+                throw new Error(`Failed to pre-convert audio for VAD: ${error.message}`);
+            }
+            const scriptPath = path.join(process.cwd(), 'src', 'scripts', 'audio_preprocessor.py');
+            const command = `uv run "${scriptPath}" --audio "${inputWavPath}" --output-dir "${tempDir}" --output-json "${manifestPath}"`;
+            try {
+                console.log(`Running Silero VAD on ${inputWavPath}...`);
+                currentProcess = spawn('sh', ['-c', command]);
+                await new Promise((resolve, reject) => {
+                    currentProcess?.on('close', (code) => {
+                        currentProcess = null;
+                        if (code === 0) {
+                            resolve();
+                        }
+                        else {
+                            reject(new Error(`Silero VAD failed with code ${code}`));
+                        }
+                    });
+                    currentProcess?.on('error', (err) => {
+                        currentProcess = null;
+                        reject(err);
+                    });
+                });
+                const manifestContent = await readFile(manifestPath, 'utf-8');
+                const segments = JSON.parse(manifestContent);
+                return { segments, tempDir };
+            }
+            catch (error) {
+                currentProcess = null;
+                console.error(`Silero VAD failed: ${error.message}`);
+                throw new Error(`Failed to extract speech segments: ${error.message}`);
+            }
+        },
+        cleanup: async (tempDir) => {
+            try {
+                await rm(tempDir, { recursive: true, force: true });
+                const recordingDir = path.dirname(tempDir);
+                await rm(recordingDir).catch(() => { });
+            }
+            catch (error) {
+                console.warn(`Failed to cleanup temp segments: ${error.message}`);
+            }
+        },
+        getResourceName() {
+            return 'silero-python';
+        },
+        getPid() {
+            return currentProcess?.pid ?? null;
+        },
+    };
+}

package/dist/adapters/cap.adapter.js ADDED Viewed

@@ -0,0 +1,94 @@
+import { readdir, readFile, stat } from 'node:fs/promises';
+import { homedir } from 'node:os';
+import { join } from 'node:path';
+import { capConfigSchema } from '../0_types.js';
+function expandPath(path) {
+    if (path.startsWith('~/')) {
+        return join(homedir(), path.slice(2));
+    }
+    return path;
+}
+async function parseCapRecording(capDirPath) {
+    try {
+        const metaPath = join(capDirPath, 'recording-meta.json');
+        const metaContent = await readFile(metaPath, 'utf-8');
+        const meta = JSON.parse(metaContent);
+        if (!meta.segments ||
+            !Array.isArray(meta.segments) ||
+            meta.segments.length === 0) {
+            throw new Error(`Invalid metadata in ${capDirPath}: missing or empty segments array`);
+        }
+        const firstSegment = meta.segments[0];
+        const videoPath = firstSegment.display?.path
+            ? join(capDirPath, firstSegment.display.path)
+            : null;
+        // we fked up cuz we have mic but also system_audio.ogg
+        const micAudio = firstSegment.mic?.path
+            ? join(capDirPath, firstSegment.mic.path)
+            : null;
+        const systemAudio = firstSegment.system_audio?.path
+            ? join(capDirPath, firstSegment.system_audio.path)
+            : null;
+        const audioToStat = micAudio || systemAudio;
+        if (!audioToStat) {
+            console.log(`Skipping ${capDirPath}: none audio track found`);
+            return null;
+        }
+        const stats = await stat(audioToStat);
+        const capturedAt = stats.mtime;
+        const recordingId = capDirPath.split('/').pop() || 'unknown';
+        return {
+            id: recordingId,
+            source: {
+                type: 'cap',
+                originalPath: capDirPath,
+                metadata: meta,
+            },
+            videoPath,
+            audioMicPath: micAudio ? micAudio : null,
+            audioSystemPath: systemAudio ? systemAudio : null,
+            duration: 0,
+            capturedAt,
+        };
+    }
+    catch (error) {
+        if (error.code === 'ENOENT') {
+            throw new Error(`Recording directory or files not found: ${capDirPath}`);
+        }
+        if (error.name === 'SyntaxError') {
+            throw new Error(`Invalid JSON in recording-meta.json at ${capDirPath}`);
+        }
+        throw new Error(`Failed to parse recording at ${capDirPath}: ${error.message}`);
+    }
+}
+export function createCapSource(config = {}) {
+    const parsedConfig = capConfigSchema.parse(config);
+    const recordingsPath = expandPath(parsedConfig.recordingsPath);
+    const innerList = async (limit = 10) => {
+        try {
+            //
+            // 7 directories, 5 files
+            const entries = await readdir(recordingsPath, { withFileTypes: true });
+            const capDirs = entries.filter((entry) => entry.isDirectory() && entry.name.endsWith('.cap'));
+            const recordings = await Promise.allSettled(capDirs.map(async (dir) => parseCapRecording(join(recordingsPath, dir.name))));
+            // logging errors
+            console.log(recordings
+                .filter((p) => p.status === 'rejected')
+                .map((p) => p.reason + '\n'));
+            return recordings
+                .filter((p) => p.status === 'fulfilled')
+                .map((x) => x.value)
+                .filter((r) => r !== null)
+                .sort((a, b) => b.capturedAt.getTime() - a.capturedAt.getTime())
+                .slice(0, limit);
+        }
+        catch (error) {
+            console.error('Failed to list Cap recordings:', error);
+            return [];
+        }
+    };
+    return {
+        getLatestRecording: () => innerList(1).then((recordings) => recordings[0] ?? null),
+        listRecordings: innerList,
+    };
+}