escribano 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +297 -0
- package/dist/0_types.js +279 -0
- package/dist/actions/classify-session.js +77 -0
- package/dist/actions/create-contexts.js +44 -0
- package/dist/actions/create-topic-blocks.js +68 -0
- package/dist/actions/extract-metadata.js +24 -0
- package/dist/actions/generate-artifact-v3.js +296 -0
- package/dist/actions/generate-artifact.js +61 -0
- package/dist/actions/generate-summary-v3.js +260 -0
- package/dist/actions/outline-index.js +204 -0
- package/dist/actions/process-recording-v2.js +494 -0
- package/dist/actions/process-recording-v3.js +412 -0
- package/dist/actions/process-session.js +183 -0
- package/dist/actions/publish-summary-v3.js +303 -0
- package/dist/actions/sync-to-outline.js +196 -0
- package/dist/adapters/audio.silero.adapter.js +69 -0
- package/dist/adapters/cap.adapter.js +94 -0
- package/dist/adapters/capture.cap.adapter.js +107 -0
- package/dist/adapters/capture.filesystem.adapter.js +124 -0
- package/dist/adapters/embedding.ollama.adapter.js +141 -0
- package/dist/adapters/intelligence.adapter.js +202 -0
- package/dist/adapters/intelligence.mlx.adapter.js +395 -0
- package/dist/adapters/intelligence.ollama.adapter.js +741 -0
- package/dist/adapters/publishing.outline.adapter.js +75 -0
- package/dist/adapters/storage.adapter.js +81 -0
- package/dist/adapters/storage.fs.adapter.js +83 -0
- package/dist/adapters/transcription.whisper.adapter.js +206 -0
- package/dist/adapters/video.ffmpeg.adapter.js +405 -0
- package/dist/adapters/whisper.adapter.js +168 -0
- package/dist/batch-context.js +329 -0
- package/dist/db/helpers.js +50 -0
- package/dist/db/index.js +95 -0
- package/dist/db/migrate.js +80 -0
- package/dist/db/repositories/artifact.sqlite.js +77 -0
- package/dist/db/repositories/cluster.sqlite.js +92 -0
- package/dist/db/repositories/context.sqlite.js +75 -0
- package/dist/db/repositories/index.js +10 -0
- package/dist/db/repositories/observation.sqlite.js +70 -0
- package/dist/db/repositories/recording.sqlite.js +56 -0
- package/dist/db/repositories/subject.sqlite.js +64 -0
- package/dist/db/repositories/topic-block.sqlite.js +45 -0
- package/dist/db/types.js +4 -0
- package/dist/domain/classification.js +60 -0
- package/dist/domain/context.js +97 -0
- package/dist/domain/index.js +2 -0
- package/dist/domain/observation.js +17 -0
- package/dist/domain/recording.js +41 -0
- package/dist/domain/segment.js +93 -0
- package/dist/domain/session.js +93 -0
- package/dist/domain/time-range.js +38 -0
- package/dist/domain/transcript.js +79 -0
- package/dist/index.js +173 -0
- package/dist/pipeline/context.js +162 -0
- package/dist/pipeline/events.js +2 -0
- package/dist/prerequisites.js +226 -0
- package/dist/scripts/rebuild-index.js +53 -0
- package/dist/scripts/seed-fixtures.js +290 -0
- package/dist/services/activity-segmentation.js +333 -0
- package/dist/services/activity-segmentation.test.js +191 -0
- package/dist/services/app-normalization.js +212 -0
- package/dist/services/cluster-merge.js +69 -0
- package/dist/services/clustering.js +237 -0
- package/dist/services/debug.js +58 -0
- package/dist/services/frame-sampling.js +318 -0
- package/dist/services/signal-extraction.js +106 -0
- package/dist/services/subject-grouping.js +342 -0
- package/dist/services/temporal-alignment.js +99 -0
- package/dist/services/vlm-enrichment.js +84 -0
- package/dist/services/vlm-service.js +130 -0
- package/dist/stats/index.js +3 -0
- package/dist/stats/observer.js +65 -0
- package/dist/stats/repository.js +36 -0
- package/dist/stats/resource-tracker.js +86 -0
- package/dist/stats/types.js +1 -0
- package/dist/test-classification-prompts.js +181 -0
- package/dist/tests/cap.adapter.test.js +75 -0
- package/dist/tests/capture.cap.adapter.test.js +69 -0
- package/dist/tests/classify-session.test.js +140 -0
- package/dist/tests/db/repositories.test.js +243 -0
- package/dist/tests/domain/time-range.test.js +31 -0
- package/dist/tests/integration.test.js +84 -0
- package/dist/tests/intelligence.adapter.test.js +102 -0
- package/dist/tests/intelligence.ollama.adapter.test.js +178 -0
- package/dist/tests/process-v2.test.js +90 -0
- package/dist/tests/services/clustering.test.js +112 -0
- package/dist/tests/services/frame-sampling.test.js +152 -0
- package/dist/tests/utils/ocr.test.js +76 -0
- package/dist/tests/utils/parallel.test.js +57 -0
- package/dist/tests/visual-observer.test.js +175 -0
- package/dist/utils/id-normalization.js +15 -0
- package/dist/utils/index.js +9 -0
- package/dist/utils/model-detector.js +154 -0
- package/dist/utils/ocr.js +80 -0
- package/dist/utils/parallel.js +32 -0
- package/migrations/001_initial.sql +109 -0
- package/migrations/002_clusters.sql +41 -0
- package/migrations/003_observations_vlm_fields.sql +14 -0
- package/migrations/004_observations_unique.sql +18 -0
- package/migrations/005_processing_stats.sql +29 -0
- package/migrations/006_vlm_raw_response.sql +6 -0
- package/migrations/007_subjects.sql +23 -0
- package/migrations/008_artifacts_recording.sql +6 -0
- package/migrations/009_artifact_subjects.sql +10 -0
- package/package.json +82 -0
- package/prompts/action-items.md +55 -0
- package/prompts/blog-draft.md +54 -0
- package/prompts/blog-research.md +87 -0
- package/prompts/card.md +54 -0
- package/prompts/classify-segment.md +38 -0
- package/prompts/classify.md +37 -0
- package/prompts/code-snippets.md +163 -0
- package/prompts/extract-metadata.md +149 -0
- package/prompts/notes.md +83 -0
- package/prompts/runbook.md +123 -0
- package/prompts/standup.md +50 -0
- package/prompts/step-by-step.md +125 -0
- package/prompts/subject-grouping.md +31 -0
- package/prompts/summary-v3.md +89 -0
- package/prompts/summary.md +77 -0
- package/prompts/topic-classifier.md +24 -0
- package/prompts/topic-extract.md +13 -0
- package/prompts/vlm-batch.md +21 -0
- package/prompts/vlm-single.md +19 -0
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cluster Repository - SQLite Implementation
|
|
3
|
+
*/
|
|
4
|
+
import { nowISO } from '../helpers.js';
|
|
5
|
+
export function createSqliteClusterRepository(db) {
|
|
6
|
+
const stmts = {
|
|
7
|
+
findById: db.prepare('SELECT * FROM clusters WHERE id = ?'),
|
|
8
|
+
findByRecording: db.prepare('SELECT * FROM clusters WHERE recording_id = ? ORDER BY start_timestamp ASC'),
|
|
9
|
+
findByRecordingAndType: db.prepare('SELECT * FROM clusters WHERE recording_id = ? AND type = ? ORDER BY start_timestamp ASC'),
|
|
10
|
+
insert: db.prepare(`
|
|
11
|
+
INSERT INTO clusters (id, recording_id, type, start_timestamp, end_timestamp, observation_count, centroid, classification, metadata, created_at)
|
|
12
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
13
|
+
`),
|
|
14
|
+
linkObservation: db.prepare(`
|
|
15
|
+
INSERT OR REPLACE INTO observation_clusters (observation_id, cluster_id, distance)
|
|
16
|
+
VALUES (?, ?, ?)
|
|
17
|
+
`),
|
|
18
|
+
getObservations: db.prepare(`
|
|
19
|
+
SELECT o.* FROM observations o
|
|
20
|
+
JOIN observation_clusters oc ON o.id = oc.observation_id
|
|
21
|
+
WHERE oc.cluster_id = ?
|
|
22
|
+
ORDER BY o.timestamp ASC
|
|
23
|
+
`),
|
|
24
|
+
updateClassification: db.prepare('UPDATE clusters SET classification = ? WHERE id = ?'),
|
|
25
|
+
updateCentroid: db.prepare('UPDATE clusters SET centroid = ? WHERE id = ?'),
|
|
26
|
+
saveMerge: db.prepare(`
|
|
27
|
+
INSERT OR REPLACE INTO cluster_merges (visual_cluster_id, audio_cluster_id, similarity_score, merge_reason)
|
|
28
|
+
VALUES (?, ?, ?, ?)
|
|
29
|
+
`),
|
|
30
|
+
getMergedAudioClusters: db.prepare(`
|
|
31
|
+
SELECT c.* FROM clusters c
|
|
32
|
+
JOIN cluster_merges cm ON c.id = cm.audio_cluster_id
|
|
33
|
+
WHERE cm.visual_cluster_id = ?
|
|
34
|
+
`),
|
|
35
|
+
delete: db.prepare('DELETE FROM clusters WHERE id = ?'),
|
|
36
|
+
deleteByRecording: db.prepare('DELETE FROM clusters WHERE recording_id = ?'),
|
|
37
|
+
};
|
|
38
|
+
const insertBatch = db.transaction((clusters) => {
|
|
39
|
+
for (const c of clusters) {
|
|
40
|
+
stmts.insert.run(c.id, c.recording_id, c.type, c.start_timestamp, c.end_timestamp, c.observation_count, c.centroid, c.classification, c.metadata, nowISO());
|
|
41
|
+
}
|
|
42
|
+
});
|
|
43
|
+
const linkBatch = db.transaction((links) => {
|
|
44
|
+
for (const link of links) {
|
|
45
|
+
stmts.linkObservation.run(link.observationId, link.clusterId, link.distance ?? null);
|
|
46
|
+
}
|
|
47
|
+
});
|
|
48
|
+
return {
|
|
49
|
+
findById(id) {
|
|
50
|
+
return stmts.findById.get(id) ?? null;
|
|
51
|
+
},
|
|
52
|
+
findByRecording(recordingId) {
|
|
53
|
+
return stmts.findByRecording.all(recordingId);
|
|
54
|
+
},
|
|
55
|
+
findByRecordingAndType(recordingId, type) {
|
|
56
|
+
return stmts.findByRecordingAndType.all(recordingId, type);
|
|
57
|
+
},
|
|
58
|
+
save(cluster) {
|
|
59
|
+
stmts.insert.run(cluster.id, cluster.recording_id, cluster.type, cluster.start_timestamp, cluster.end_timestamp, cluster.observation_count, cluster.centroid, cluster.classification, cluster.metadata, nowISO());
|
|
60
|
+
},
|
|
61
|
+
saveBatch(clusters) {
|
|
62
|
+
insertBatch(clusters);
|
|
63
|
+
},
|
|
64
|
+
linkObservation(obsId, clusterId, distance) {
|
|
65
|
+
stmts.linkObservation.run(obsId, clusterId, distance ?? null);
|
|
66
|
+
},
|
|
67
|
+
linkObservationsBatch(links) {
|
|
68
|
+
linkBatch(links);
|
|
69
|
+
},
|
|
70
|
+
getObservations(clusterId) {
|
|
71
|
+
return stmts.getObservations.all(clusterId);
|
|
72
|
+
},
|
|
73
|
+
updateClassification(id, classification) {
|
|
74
|
+
stmts.updateClassification.run(classification, id);
|
|
75
|
+
},
|
|
76
|
+
updateCentroid(id, centroid) {
|
|
77
|
+
stmts.updateCentroid.run(Buffer.from(new Float32Array(centroid).buffer), id);
|
|
78
|
+
},
|
|
79
|
+
saveMerge(visualId, audioId, similarity, reason) {
|
|
80
|
+
stmts.saveMerge.run(visualId, audioId, similarity, reason);
|
|
81
|
+
},
|
|
82
|
+
getMergedAudioClusters(visualId) {
|
|
83
|
+
return stmts.getMergedAudioClusters.all(visualId);
|
|
84
|
+
},
|
|
85
|
+
delete(id) {
|
|
86
|
+
stmts.delete.run(id);
|
|
87
|
+
},
|
|
88
|
+
deleteByRecording(recordingId) {
|
|
89
|
+
stmts.deleteByRecording.run(recordingId);
|
|
90
|
+
},
|
|
91
|
+
};
|
|
92
|
+
}
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Context Repository - SQLite Implementation
|
|
3
|
+
*/
|
|
4
|
+
import { nowISO } from '../helpers.js';
|
|
5
|
+
export function createSqliteContextRepository(db) {
|
|
6
|
+
const stmts = {
|
|
7
|
+
findById: db.prepare('SELECT * FROM contexts WHERE id = ?'),
|
|
8
|
+
findByTypeAndName: db.prepare('SELECT * FROM contexts WHERE type = ? AND name = ?'),
|
|
9
|
+
findAll: db.prepare('SELECT * FROM contexts ORDER BY created_at DESC'),
|
|
10
|
+
insert: db.prepare(`
|
|
11
|
+
INSERT INTO contexts (id, type, name, metadata, created_at)
|
|
12
|
+
VALUES (?, ?, ?, ?, ?)
|
|
13
|
+
`),
|
|
14
|
+
insertOrIgnore: db.prepare(`
|
|
15
|
+
INSERT OR IGNORE INTO contexts (id, type, name, metadata, created_at)
|
|
16
|
+
VALUES (?, ?, ?, ?, ?)
|
|
17
|
+
`),
|
|
18
|
+
linkObservation: db.prepare(`
|
|
19
|
+
INSERT OR REPLACE INTO observation_contexts (observation_id, context_id, confidence)
|
|
20
|
+
VALUES (?, ?, ?)
|
|
21
|
+
`),
|
|
22
|
+
unlinkObservation: db.prepare(`
|
|
23
|
+
DELETE FROM observation_contexts WHERE observation_id = ? AND context_id = ?
|
|
24
|
+
`),
|
|
25
|
+
getObservationLinks: db.prepare(`
|
|
26
|
+
SELECT * FROM observation_contexts WHERE context_id = ?
|
|
27
|
+
`),
|
|
28
|
+
getObservationLinksByObservation: db.prepare(`
|
|
29
|
+
SELECT * FROM observation_contexts WHERE observation_id = ?
|
|
30
|
+
`),
|
|
31
|
+
getLinksByRecording: db.prepare(`
|
|
32
|
+
SELECT oc.* FROM observation_contexts oc
|
|
33
|
+
JOIN observations o ON oc.observation_id = o.id
|
|
34
|
+
WHERE o.recording_id = ?
|
|
35
|
+
`),
|
|
36
|
+
delete: db.prepare('DELETE FROM contexts WHERE id = ?'),
|
|
37
|
+
};
|
|
38
|
+
return {
|
|
39
|
+
findById(id) {
|
|
40
|
+
const row = stmts.findById.get(id);
|
|
41
|
+
return row ?? null;
|
|
42
|
+
},
|
|
43
|
+
findByTypeAndName(type, name) {
|
|
44
|
+
const row = stmts.findByTypeAndName.get(type, name);
|
|
45
|
+
return row ?? null;
|
|
46
|
+
},
|
|
47
|
+
findAll() {
|
|
48
|
+
return stmts.findAll.all();
|
|
49
|
+
},
|
|
50
|
+
save(context) {
|
|
51
|
+
stmts.insert.run(context.id, context.type, context.name, context.metadata, nowISO());
|
|
52
|
+
},
|
|
53
|
+
saveOrIgnore(context) {
|
|
54
|
+
stmts.insertOrIgnore.run(context.id, context.type, context.name, context.metadata, nowISO());
|
|
55
|
+
},
|
|
56
|
+
linkObservation(observationId, contextId, confidence = 1.0) {
|
|
57
|
+
stmts.linkObservation.run(observationId, contextId, confidence);
|
|
58
|
+
},
|
|
59
|
+
unlinkObservation(observationId, contextId) {
|
|
60
|
+
stmts.unlinkObservation.run(observationId, contextId);
|
|
61
|
+
},
|
|
62
|
+
getObservationLinks(contextId) {
|
|
63
|
+
return stmts.getObservationLinks.all(contextId);
|
|
64
|
+
},
|
|
65
|
+
getObservationLinksByObservation(observationId) {
|
|
66
|
+
return stmts.getObservationLinksByObservation.all(observationId);
|
|
67
|
+
},
|
|
68
|
+
getLinksByRecording(recordingId) {
|
|
69
|
+
return stmts.getLinksByRecording.all(recordingId);
|
|
70
|
+
},
|
|
71
|
+
delete(id) {
|
|
72
|
+
stmts.delete.run(id);
|
|
73
|
+
},
|
|
74
|
+
};
|
|
75
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Repository exports
|
|
3
|
+
*/
|
|
4
|
+
export { createSqliteArtifactRepository } from './artifact.sqlite.js';
|
|
5
|
+
export { createSqliteClusterRepository } from './cluster.sqlite.js';
|
|
6
|
+
export { createSqliteContextRepository } from './context.sqlite.js';
|
|
7
|
+
export { createSqliteObservationRepository } from './observation.sqlite.js';
|
|
8
|
+
export { createSqliteRecordingRepository } from './recording.sqlite.js';
|
|
9
|
+
export { createSqliteSubjectRepository } from './subject.sqlite.js';
|
|
10
|
+
export { createSqliteTopicBlockRepository } from './topic-block.sqlite.js';
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Observation Repository - SQLite Implementation
|
|
3
|
+
*/
|
|
4
|
+
import { nowISO } from '../helpers.js';
|
|
5
|
+
export function createSqliteObservationRepository(db) {
|
|
6
|
+
const stmts = {
|
|
7
|
+
findById: db.prepare('SELECT * FROM observations WHERE id = ?'),
|
|
8
|
+
findByRecording: db.prepare('SELECT * FROM observations WHERE recording_id = ? ORDER BY timestamp ASC'),
|
|
9
|
+
findByRecordingAndType: db.prepare('SELECT * FROM observations WHERE recording_id = ? AND type = ? ORDER BY timestamp ASC'),
|
|
10
|
+
findByContext: db.prepare(`
|
|
11
|
+
SELECT o.* FROM observations o
|
|
12
|
+
JOIN observation_contexts oc ON o.id = oc.observation_id
|
|
13
|
+
WHERE oc.context_id = ?
|
|
14
|
+
ORDER BY o.created_at DESC
|
|
15
|
+
`),
|
|
16
|
+
insert: db.prepare(`
|
|
17
|
+
INSERT INTO observations (
|
|
18
|
+
id, recording_id, type, timestamp, end_timestamp,
|
|
19
|
+
image_path, ocr_text, vlm_description, vlm_raw_response, activity_type, apps, topics,
|
|
20
|
+
text, audio_source, audio_type, embedding, created_at
|
|
21
|
+
)
|
|
22
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
23
|
+
`),
|
|
24
|
+
delete: db.prepare('DELETE FROM observations WHERE id = ?'),
|
|
25
|
+
deleteByRecording: db.prepare('DELETE FROM observations WHERE recording_id = ?'),
|
|
26
|
+
};
|
|
27
|
+
return {
|
|
28
|
+
findById(id) {
|
|
29
|
+
const row = stmts.findById.get(id);
|
|
30
|
+
return row ?? null;
|
|
31
|
+
},
|
|
32
|
+
findByRecording(recordingId) {
|
|
33
|
+
return stmts.findByRecording.all(recordingId);
|
|
34
|
+
},
|
|
35
|
+
findByRecordingAndType(recordingId, type) {
|
|
36
|
+
return stmts.findByRecordingAndType.all(recordingId, type);
|
|
37
|
+
},
|
|
38
|
+
findByContext(contextId) {
|
|
39
|
+
return stmts.findByContext.all(contextId);
|
|
40
|
+
},
|
|
41
|
+
save(observation) {
|
|
42
|
+
const now = nowISO();
|
|
43
|
+
stmts.insert.run(observation.id, observation.recording_id, observation.type, observation.timestamp, observation.end_timestamp, observation.image_path, observation.ocr_text, observation.vlm_description, observation.vlm_raw_response ?? null, observation.activity_type ?? null, observation.apps ?? null, observation.topics ?? null, observation.text, observation.audio_source, observation.audio_type, observation.embedding, now);
|
|
44
|
+
},
|
|
45
|
+
saveBatch(observations) {
|
|
46
|
+
const now = nowISO();
|
|
47
|
+
const insertMany = db.transaction((obsList) => {
|
|
48
|
+
for (const obs of obsList) {
|
|
49
|
+
stmts.insert.run(obs.id, obs.recording_id, obs.type, obs.timestamp, obs.end_timestamp, obs.image_path, obs.ocr_text, obs.vlm_description, obs.vlm_raw_response ?? null, obs.activity_type ?? null, obs.apps ?? null, obs.topics ?? null, obs.text, obs.audio_source, obs.audio_type, obs.embedding, now);
|
|
50
|
+
}
|
|
51
|
+
});
|
|
52
|
+
insertMany(observations);
|
|
53
|
+
},
|
|
54
|
+
updateEmbedding(id, embedding) {
|
|
55
|
+
const stmt = db.prepare('UPDATE observations SET embedding = ? WHERE id = ?');
|
|
56
|
+
const buffer = Buffer.from(new Float32Array(embedding).buffer);
|
|
57
|
+
stmt.run(buffer, id);
|
|
58
|
+
},
|
|
59
|
+
updateVLMDescription(id, description) {
|
|
60
|
+
const stmt = db.prepare('UPDATE observations SET vlm_description = ? WHERE id = ?');
|
|
61
|
+
stmt.run(description, id);
|
|
62
|
+
},
|
|
63
|
+
delete(id) {
|
|
64
|
+
stmts.delete.run(id);
|
|
65
|
+
},
|
|
66
|
+
deleteByRecording(recordingId) {
|
|
67
|
+
stmts.deleteByRecording.run(recordingId);
|
|
68
|
+
},
|
|
69
|
+
};
|
|
70
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Recording Repository - SQLite Implementation
|
|
3
|
+
*/
|
|
4
|
+
import { nowISO } from '../helpers.js';
|
|
5
|
+
export function createSqliteRecordingRepository(db) {
|
|
6
|
+
// Prepare statements once
|
|
7
|
+
const stmts = {
|
|
8
|
+
findById: db.prepare('SELECT * FROM recordings WHERE id = ?'),
|
|
9
|
+
findByStatus: db.prepare('SELECT * FROM recordings WHERE status = ? ORDER BY captured_at DESC'),
|
|
10
|
+
findPending: db.prepare("SELECT * FROM recordings WHERE status IN ('raw', 'processing') ORDER BY captured_at ASC"),
|
|
11
|
+
insert: db.prepare(`
|
|
12
|
+
INSERT INTO recordings (
|
|
13
|
+
id, video_path, audio_mic_path, audio_system_path, duration,
|
|
14
|
+
captured_at, status, processing_step, source_type,
|
|
15
|
+
source_metadata, error_message, created_at, updated_at
|
|
16
|
+
)
|
|
17
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
18
|
+
`),
|
|
19
|
+
updateStatus: db.prepare(`
|
|
20
|
+
UPDATE recordings
|
|
21
|
+
SET status = ?, processing_step = ?, error_message = ?, updated_at = ?
|
|
22
|
+
WHERE id = ?
|
|
23
|
+
`),
|
|
24
|
+
updateMetadata: db.prepare(`
|
|
25
|
+
UPDATE recordings
|
|
26
|
+
SET source_metadata = ?, updated_at = ?
|
|
27
|
+
WHERE id = ?
|
|
28
|
+
`),
|
|
29
|
+
delete: db.prepare('DELETE FROM recordings WHERE id = ?'),
|
|
30
|
+
};
|
|
31
|
+
return {
|
|
32
|
+
findById(id) {
|
|
33
|
+
const row = stmts.findById.get(id);
|
|
34
|
+
return row ?? null;
|
|
35
|
+
},
|
|
36
|
+
findByStatus(status) {
|
|
37
|
+
return stmts.findByStatus.all(status);
|
|
38
|
+
},
|
|
39
|
+
findPending() {
|
|
40
|
+
return stmts.findPending.all();
|
|
41
|
+
},
|
|
42
|
+
save(recording) {
|
|
43
|
+
const now = nowISO();
|
|
44
|
+
stmts.insert.run(recording.id, recording.video_path, recording.audio_mic_path, recording.audio_system_path, recording.duration, recording.captured_at, recording.status, recording.processing_step, recording.source_type, recording.source_metadata, recording.error_message, now, now);
|
|
45
|
+
},
|
|
46
|
+
updateStatus(id, status, step, error) {
|
|
47
|
+
stmts.updateStatus.run(status, step ?? null, error ?? null, nowISO(), id);
|
|
48
|
+
},
|
|
49
|
+
updateMetadata(id, metadata) {
|
|
50
|
+
stmts.updateMetadata.run(metadata, nowISO(), id);
|
|
51
|
+
},
|
|
52
|
+
delete(id) {
|
|
53
|
+
stmts.delete.run(id);
|
|
54
|
+
},
|
|
55
|
+
};
|
|
56
|
+
}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Subject Repository - SQLite Implementation
|
|
3
|
+
*/
|
|
4
|
+
import { nowISO } from '../helpers.js';
|
|
5
|
+
export function createSqliteSubjectRepository(db) {
|
|
6
|
+
const stmts = {
|
|
7
|
+
findById: db.prepare('SELECT * FROM subjects WHERE id = ?'),
|
|
8
|
+
findByRecording: db.prepare('SELECT * FROM subjects WHERE recording_id = ? ORDER BY created_at ASC'),
|
|
9
|
+
insert: db.prepare(`
|
|
10
|
+
INSERT INTO subjects (id, recording_id, label, is_personal, duration, activity_breakdown, metadata, created_at)
|
|
11
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
12
|
+
`),
|
|
13
|
+
insertLink: db.prepare(`
|
|
14
|
+
INSERT OR IGNORE INTO subject_topic_blocks (subject_id, topic_block_id)
|
|
15
|
+
VALUES (?, ?)
|
|
16
|
+
`),
|
|
17
|
+
getTopicBlocks: db.prepare(`
|
|
18
|
+
SELECT tb.* FROM topic_blocks tb
|
|
19
|
+
INNER JOIN subject_topic_blocks stb ON tb.id = stb.topic_block_id
|
|
20
|
+
WHERE stb.subject_id = ?
|
|
21
|
+
ORDER BY tb.created_at ASC
|
|
22
|
+
`),
|
|
23
|
+
deleteByRecording: db.prepare('DELETE FROM subjects WHERE recording_id = ?'),
|
|
24
|
+
deleteLinksByRecording: db.prepare(`
|
|
25
|
+
DELETE FROM subject_topic_blocks
|
|
26
|
+
WHERE subject_id IN (SELECT id FROM subjects WHERE recording_id = ?)
|
|
27
|
+
`),
|
|
28
|
+
};
|
|
29
|
+
const insertMany = db.transaction((subjects) => {
|
|
30
|
+
for (const subject of subjects) {
|
|
31
|
+
stmts.insert.run(subject.id, subject.recording_id, subject.label, subject.is_personal ? 1 : 0, subject.duration ?? 0, subject.activity_breakdown ?? null, subject.metadata ?? null, nowISO());
|
|
32
|
+
}
|
|
33
|
+
});
|
|
34
|
+
const linkMany = db.transaction((links) => {
|
|
35
|
+
for (const link of links) {
|
|
36
|
+
stmts.insertLink.run(link.subjectId, link.topicBlockId);
|
|
37
|
+
}
|
|
38
|
+
});
|
|
39
|
+
return {
|
|
40
|
+
findById(id) {
|
|
41
|
+
const row = stmts.findById.get(id);
|
|
42
|
+
return row ?? null;
|
|
43
|
+
},
|
|
44
|
+
findByRecording(recordingId) {
|
|
45
|
+
return stmts.findByRecording.all(recordingId);
|
|
46
|
+
},
|
|
47
|
+
save(subject) {
|
|
48
|
+
stmts.insert.run(subject.id, subject.recording_id, subject.label, subject.is_personal ? 1 : 0, subject.duration ?? 0, subject.activity_breakdown ?? null, subject.metadata ?? null, nowISO());
|
|
49
|
+
},
|
|
50
|
+
saveBatch(subjects) {
|
|
51
|
+
insertMany(subjects);
|
|
52
|
+
},
|
|
53
|
+
linkTopicBlocksBatch(links) {
|
|
54
|
+
linkMany(links);
|
|
55
|
+
},
|
|
56
|
+
getTopicBlocks(subjectId) {
|
|
57
|
+
return stmts.getTopicBlocks.all(subjectId);
|
|
58
|
+
},
|
|
59
|
+
deleteByRecording(recordingId) {
|
|
60
|
+
stmts.deleteLinksByRecording.run(recordingId);
|
|
61
|
+
stmts.deleteByRecording.run(recordingId);
|
|
62
|
+
},
|
|
63
|
+
};
|
|
64
|
+
}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Topic Block Repository - SQLite Implementation
|
|
3
|
+
*/
|
|
4
|
+
import { nowISO } from '../helpers.js';
|
|
5
|
+
export function createSqliteTopicBlockRepository(db) {
|
|
6
|
+
const stmts = {
|
|
7
|
+
findById: db.prepare('SELECT * FROM topic_blocks WHERE id = ?'),
|
|
8
|
+
findByRecording: db.prepare('SELECT * FROM topic_blocks WHERE recording_id = ? ORDER BY created_at ASC'),
|
|
9
|
+
findByContext: db.prepare(`
|
|
10
|
+
SELECT * FROM topic_blocks
|
|
11
|
+
WHERE context_ids LIKE ?
|
|
12
|
+
ORDER BY created_at DESC
|
|
13
|
+
`),
|
|
14
|
+
insert: db.prepare(`
|
|
15
|
+
INSERT INTO topic_blocks (id, recording_id, context_ids, classification, duration, created_at)
|
|
16
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
17
|
+
`),
|
|
18
|
+
delete: db.prepare('DELETE FROM topic_blocks WHERE id = ?'),
|
|
19
|
+
deleteByRecording: db.prepare('DELETE FROM topic_blocks WHERE recording_id = ?'),
|
|
20
|
+
};
|
|
21
|
+
return {
|
|
22
|
+
findById(id) {
|
|
23
|
+
const row = stmts.findById.get(id);
|
|
24
|
+
return row ?? null;
|
|
25
|
+
},
|
|
26
|
+
findByRecording(recordingId) {
|
|
27
|
+
return stmts.findByRecording.all(recordingId);
|
|
28
|
+
},
|
|
29
|
+
findByContext(contextId) {
|
|
30
|
+
// Simple LIKE search for the context ID in the JSON array string
|
|
31
|
+
// NOTE: For more robust JSON searching, we could use SQLite's json_each if available.
|
|
32
|
+
// Current implementation matches substrings, which is acceptable for UUIDv7 but not ideal.
|
|
33
|
+
return stmts.findByContext.all(`%${contextId}%`);
|
|
34
|
+
},
|
|
35
|
+
save(block) {
|
|
36
|
+
stmts.insert.run(block.id, block.recording_id, block.context_ids, block.classification, block.duration, nowISO());
|
|
37
|
+
},
|
|
38
|
+
delete(id) {
|
|
39
|
+
stmts.delete.run(id);
|
|
40
|
+
},
|
|
41
|
+
deleteByRecording(recordingId) {
|
|
42
|
+
stmts.deleteByRecording.run(recordingId);
|
|
43
|
+
},
|
|
44
|
+
};
|
|
45
|
+
}
|
package/dist/db/types.js
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Escribano - Classification Value Object
|
|
3
|
+
*/
|
|
4
|
+
export const Classification = {
|
|
5
|
+
/**
|
|
6
|
+
* Get the primary session type based on scores.
|
|
7
|
+
* Returns the type with the highest score if it's above the threshold.
|
|
8
|
+
*/
|
|
9
|
+
getPrimary: (c, threshold = 25) => {
|
|
10
|
+
const sorted = Object.entries(c).sort(([, a], [, b]) => b - a);
|
|
11
|
+
const [type, score] = sorted[0];
|
|
12
|
+
return score >= threshold ? type : null;
|
|
13
|
+
},
|
|
14
|
+
/**
|
|
15
|
+
* Check if a specific type is present with a score above threshold
|
|
16
|
+
*/
|
|
17
|
+
hasType: (c, type, threshold = 50) => {
|
|
18
|
+
return (c[type] || 0) >= threshold;
|
|
19
|
+
},
|
|
20
|
+
/**
|
|
21
|
+
* Get all types that meet a significance threshold
|
|
22
|
+
*/
|
|
23
|
+
getSignificantTypes: (c, threshold = 25) => {
|
|
24
|
+
return Object.entries(c)
|
|
25
|
+
.filter(([, score]) => score >= threshold)
|
|
26
|
+
.map(([type]) => type);
|
|
27
|
+
},
|
|
28
|
+
/**
|
|
29
|
+
* Aggregate multiple classifications (e.g., from segments to session)
|
|
30
|
+
* Uses time-weighted average or simple average.
|
|
31
|
+
*/
|
|
32
|
+
aggregate: (classifications) => {
|
|
33
|
+
const result = {
|
|
34
|
+
meeting: 0,
|
|
35
|
+
debugging: 0,
|
|
36
|
+
tutorial: 0,
|
|
37
|
+
learning: 0,
|
|
38
|
+
working: 0,
|
|
39
|
+
};
|
|
40
|
+
if (classifications.length === 0)
|
|
41
|
+
return result;
|
|
42
|
+
let totalWeight = 0;
|
|
43
|
+
for (const { classification, weight } of classifications) {
|
|
44
|
+
totalWeight += weight;
|
|
45
|
+
result.meeting += (classification.meeting || 0) * weight;
|
|
46
|
+
result.debugging += (classification.debugging || 0) * weight;
|
|
47
|
+
result.tutorial += (classification.tutorial || 0) * weight;
|
|
48
|
+
result.learning += (classification.learning || 0) * weight;
|
|
49
|
+
result.working += (classification.working || 0) * weight;
|
|
50
|
+
}
|
|
51
|
+
if (totalWeight > 0) {
|
|
52
|
+
result.meeting = Math.round(result.meeting / totalWeight);
|
|
53
|
+
result.debugging = Math.round(result.debugging / totalWeight);
|
|
54
|
+
result.tutorial = Math.round(result.tutorial / totalWeight);
|
|
55
|
+
result.learning = Math.round(result.learning / totalWeight);
|
|
56
|
+
result.working = Math.round(result.working / totalWeight);
|
|
57
|
+
}
|
|
58
|
+
return result;
|
|
59
|
+
},
|
|
60
|
+
};
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Escribano - Context Extraction Domain Module
|
|
3
|
+
*/
|
|
4
|
+
export const Context = {
|
|
5
|
+
/**
|
|
6
|
+
* Extract semantic contexts from raw OCR text using regex patterns.
|
|
7
|
+
* This is a fast-path optimization for common applications and URLs.
|
|
8
|
+
*/
|
|
9
|
+
extractFromOCR: (ocrText) => {
|
|
10
|
+
const contexts = [];
|
|
11
|
+
const text = ocrText.trim();
|
|
12
|
+
if (!text)
|
|
13
|
+
return contexts;
|
|
14
|
+
// 1. App Detection
|
|
15
|
+
const apps = [
|
|
16
|
+
{ name: 'Ghostty', pattern: /Ghostty/i },
|
|
17
|
+
{ name: 'VS Code', pattern: /Visual Studio Code|VS Code/i },
|
|
18
|
+
{ name: 'Chrome', pattern: /Google Chrome/i },
|
|
19
|
+
{ name: 'Arc', pattern: /Arc/i },
|
|
20
|
+
{ name: 'Cursor', pattern: /Cursor/i },
|
|
21
|
+
{ name: 'TablePlus', pattern: /TablePlus/i },
|
|
22
|
+
{ name: 'Slack', pattern: /Slack/i },
|
|
23
|
+
{ name: 'Spotify', pattern: /Spotify/i },
|
|
24
|
+
{ name: 'YouTube Music', pattern: /YouTube Music/i },
|
|
25
|
+
];
|
|
26
|
+
for (const app of apps) {
|
|
27
|
+
if (app.pattern.test(text)) {
|
|
28
|
+
contexts.push({
|
|
29
|
+
type: 'app',
|
|
30
|
+
value: app.name,
|
|
31
|
+
confidence: 0.9,
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
// 2. URL Detection
|
|
36
|
+
const urlPattern = /https?:\/\/[^\s]+/g;
|
|
37
|
+
const urls = text.match(urlPattern);
|
|
38
|
+
if (urls) {
|
|
39
|
+
for (const url of urls) {
|
|
40
|
+
contexts.push({
|
|
41
|
+
type: 'url',
|
|
42
|
+
value: url.replace(/[,.)}>]$/, ''), // Clean trailing punctuation
|
|
43
|
+
confidence: 1.0,
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
// 3. Domain Detection (Specific known domains)
|
|
48
|
+
const domains = [
|
|
49
|
+
{ name: 'github.com', pattern: /github\.com/i },
|
|
50
|
+
{ name: 'linkedin.com', pattern: /linkedin\.com/i },
|
|
51
|
+
{ name: 'stackoverflow.com', pattern: /stackoverflow\.com/i },
|
|
52
|
+
{ name: 'docs.rs', pattern: /docs\.rs/i },
|
|
53
|
+
{ name: 'ollama.com', pattern: /ollama\.com/i },
|
|
54
|
+
];
|
|
55
|
+
for (const domain of domains) {
|
|
56
|
+
if (domain.pattern.test(text)) {
|
|
57
|
+
// Only add if not already covered by a full URL
|
|
58
|
+
if (!contexts.some((c) => c.type === 'url' && c.value.includes(domain.name))) {
|
|
59
|
+
contexts.push({
|
|
60
|
+
type: 'url',
|
|
61
|
+
value: domain.name,
|
|
62
|
+
confidence: 0.8,
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
// 4. File Path Detection
|
|
68
|
+
const pathPattern = /(?:~\/|\/Users\/)[^\s]+\.(?:ts|js|py|rs|md|go|json|yml|yaml)/g;
|
|
69
|
+
const paths = text.match(pathPattern);
|
|
70
|
+
if (paths) {
|
|
71
|
+
for (const path of paths) {
|
|
72
|
+
contexts.push({
|
|
73
|
+
type: 'file',
|
|
74
|
+
value: path,
|
|
75
|
+
confidence: 0.9,
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
// TODO: Implement Step 2 - Embedding clustering for topic grouping
|
|
80
|
+
// This will be used when regex patterns don't yield high-confidence results
|
|
81
|
+
// or when we want to group related segments together.
|
|
82
|
+
return contexts;
|
|
83
|
+
},
|
|
84
|
+
/**
|
|
85
|
+
* Aggregate multiple contexts and remove duplicates
|
|
86
|
+
*/
|
|
87
|
+
unique: (contexts) => {
|
|
88
|
+
const seen = new Set();
|
|
89
|
+
return contexts.filter((c) => {
|
|
90
|
+
const key = `${c.type}:${c.value}`;
|
|
91
|
+
if (seen.has(key))
|
|
92
|
+
return false;
|
|
93
|
+
seen.add(key);
|
|
94
|
+
return true;
|
|
95
|
+
});
|
|
96
|
+
},
|
|
97
|
+
};
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { generateId } from '../db/helpers.js';
|
|
2
|
+
/**
|
|
3
|
+
* Factory for audio observations
|
|
4
|
+
*/
|
|
5
|
+
export function createAudioObservation(params) {
|
|
6
|
+
return {
|
|
7
|
+
id: generateId(),
|
|
8
|
+
recordingId: params.recordingId,
|
|
9
|
+
type: 'audio',
|
|
10
|
+
timestamp: params.timestamp,
|
|
11
|
+
endTimestamp: params.endTimestamp,
|
|
12
|
+
text: params.text,
|
|
13
|
+
audioSource: params.audioSource,
|
|
14
|
+
audioType: 'speech',
|
|
15
|
+
confidence: params.confidence ?? null,
|
|
16
|
+
};
|
|
17
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Transition recording to processing state
|
|
3
|
+
*/
|
|
4
|
+
export function startProcessing(recording) {
|
|
5
|
+
return {
|
|
6
|
+
...recording,
|
|
7
|
+
status: 'processing',
|
|
8
|
+
processingStep: 'vad',
|
|
9
|
+
errorMessage: null,
|
|
10
|
+
};
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Advance to next processing step
|
|
14
|
+
*/
|
|
15
|
+
export function advanceStep(recording, step) {
|
|
16
|
+
return {
|
|
17
|
+
...recording,
|
|
18
|
+
processingStep: step,
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Mark recording as successfully processed
|
|
23
|
+
*/
|
|
24
|
+
export function completeProcessing(recording) {
|
|
25
|
+
return {
|
|
26
|
+
...recording,
|
|
27
|
+
status: 'processed',
|
|
28
|
+
processingStep: null,
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Mark recording as failed
|
|
33
|
+
*/
|
|
34
|
+
export function failProcessing(recording, error) {
|
|
35
|
+
return {
|
|
36
|
+
...recording,
|
|
37
|
+
status: 'error',
|
|
38
|
+
errorMessage: error,
|
|
39
|
+
// Keep processingStep to know where we failed for resume
|
|
40
|
+
};
|
|
41
|
+
}
|