@redaksjon/protokoll 0.0.8 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.cursor/rules/definition-of-done.md +89 -0
- package/.cursor/rules/no-emoticons.md +43 -0
- package/LICENSE +1 -1
- package/README.md +928 -35
- package/dist/agentic/executor.js +315 -0
- package/dist/agentic/executor.js.map +1 -0
- package/dist/agentic/index.js +19 -0
- package/dist/agentic/index.js.map +1 -0
- package/dist/agentic/registry.js +41 -0
- package/dist/agentic/registry.js.map +1 -0
- package/dist/agentic/tools/lookup-person.js +66 -0
- package/dist/agentic/tools/lookup-person.js.map +1 -0
- package/dist/agentic/tools/lookup-project.js +93 -0
- package/dist/agentic/tools/lookup-project.js.map +1 -0
- package/dist/agentic/tools/route-note.js +45 -0
- package/dist/agentic/tools/route-note.js.map +1 -0
- package/dist/agentic/tools/store-context.js +51 -0
- package/dist/agentic/tools/store-context.js.map +1 -0
- package/dist/agentic/tools/verify-spelling.js +57 -0
- package/dist/agentic/tools/verify-spelling.js.map +1 -0
- package/dist/arguments.js +23 -6
- package/dist/arguments.js.map +1 -1
- package/dist/constants.js +13 -11
- package/dist/constants.js.map +1 -1
- package/dist/context/discovery.js +114 -0
- package/dist/context/discovery.js.map +1 -0
- package/dist/context/index.js +58 -0
- package/dist/context/index.js.map +1 -0
- package/dist/context/storage.js +131 -0
- package/dist/context/storage.js.map +1 -0
- package/dist/interactive/handler.js +223 -0
- package/dist/interactive/handler.js.map +1 -0
- package/dist/interactive/index.js +18 -0
- package/dist/interactive/index.js.map +1 -0
- package/dist/interactive/onboarding.js +28 -0
- package/dist/interactive/onboarding.js.map +1 -0
- package/dist/main.js +0 -0
- package/dist/output/index.js +8 -0
- package/dist/output/index.js.map +1 -0
- package/dist/output/manager.js +105 -0
- package/dist/output/manager.js.map +1 -0
- package/dist/phases/complete.js +107 -0
- package/dist/phases/complete.js.map +1 -0
- package/dist/phases/locate.js +14 -5
- package/dist/phases/locate.js.map +1 -1
- package/dist/pipeline/index.js +8 -0
- package/dist/pipeline/index.js.map +1 -0
- package/dist/pipeline/orchestrator.js +281 -0
- package/dist/pipeline/orchestrator.js.map +1 -0
- package/dist/prompt/instructions/transcribe.md +6 -6
- package/dist/prompt/personas/transcriber.md +5 -5
- package/dist/protokoll.js +38 -5
- package/dist/protokoll.js.map +1 -1
- package/dist/reasoning/client.js +150 -0
- package/dist/reasoning/client.js.map +1 -0
- package/dist/reasoning/index.js +36 -0
- package/dist/reasoning/index.js.map +1 -0
- package/dist/reasoning/strategy.js +60 -0
- package/dist/reasoning/strategy.js.map +1 -0
- package/dist/reflection/collector.js +124 -0
- package/dist/reflection/collector.js.map +1 -0
- package/dist/reflection/index.js +16 -0
- package/dist/reflection/index.js.map +1 -0
- package/dist/reflection/reporter.js +238 -0
- package/dist/reflection/reporter.js.map +1 -0
- package/dist/routing/classifier.js +201 -0
- package/dist/routing/classifier.js.map +1 -0
- package/dist/routing/index.js +27 -0
- package/dist/routing/index.js.map +1 -0
- package/dist/routing/router.js +153 -0
- package/dist/routing/router.js.map +1 -0
- package/dist/transcription/index.js +41 -0
- package/dist/transcription/index.js.map +1 -0
- package/dist/transcription/service.js +64 -0
- package/dist/transcription/service.js.map +1 -0
- package/dist/transcription/types.js +31 -0
- package/dist/transcription/types.js.map +1 -0
- package/dist/util/media.js +4 -4
- package/dist/util/media.js.map +1 -1
- package/dist/util/metadata.js +95 -0
- package/dist/util/metadata.js.map +1 -0
- package/dist/util/storage.js +2 -2
- package/dist/util/storage.js.map +1 -1
- package/docs/examples.md +224 -0
- package/docs/index.html +5 -3
- package/docs/package-lock.json +639 -332
- package/docs/package.json +5 -4
- package/docs/troubleshooting.md +257 -0
- package/docs/vite.config.js +9 -3
- package/eslint.config.mjs +1 -0
- package/guide/architecture.md +217 -0
- package/guide/configuration.md +199 -0
- package/guide/context-system.md +215 -0
- package/guide/development.md +273 -0
- package/guide/index.md +91 -0
- package/guide/interactive.md +199 -0
- package/guide/quickstart.md +138 -0
- package/guide/reasoning.md +193 -0
- package/guide/routing.md +222 -0
- package/package.json +10 -7
- package/tsconfig.tsbuildinfo +1 -1
- package/vitest.config.ts +27 -5
- package/dist/phases/transcribe.js +0 -149
- package/dist/phases/transcribe.js.map +0 -1
- package/dist/processor.js +0 -35
- package/dist/processor.js.map +0 -1
- package/dist/prompt/transcribe.js +0 -41
- package/dist/prompt/transcribe.js.map +0 -1
- package/dist/util/general.js +0 -39
- package/dist/util/general.js.map +0 -1
- package/dist/util/openai.js +0 -92
- package/dist/util/openai.js.map +0 -1
package/guide/index.md
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# Protokoll AI Guide
|
|
2
|
+
|
|
3
|
+
This directory contains comprehensive documentation designed to help developers and AI assistants understand, integrate, debug, and extend Protokoll - an intelligent audio transcription system.
|
|
4
|
+
|
|
5
|
+
## What is Protokoll?
|
|
6
|
+
|
|
7
|
+
Protokoll transforms audio recordings into intelligent, context-enhanced transcriptions. It uses reasoning models to understand names, route notes to appropriate destinations, and build knowledge over time.
|
|
8
|
+
|
|
9
|
+
**Core Value**: Solves the "context problem" in transcription - when Whisper mishears "Priya" as "pre a", Protokoll recognizes and corrects it based on learned context.
|
|
10
|
+
|
|
11
|
+
## Guide Contents
|
|
12
|
+
|
|
13
|
+
### Getting Started
|
|
14
|
+
- [**Quick Start**](./quickstart.md): Get Protokoll working in 5 minutes
|
|
15
|
+
- [**Configuration**](./configuration.md): All configuration options
|
|
16
|
+
|
|
17
|
+
### Understanding Protokoll
|
|
18
|
+
- [**Architecture**](./architecture.md): System design and data flow
|
|
19
|
+
- [**Context System**](./context-system.md): How context storage works
|
|
20
|
+
- [**Routing**](./routing.md): Intelligent note routing
|
|
21
|
+
- [**Reasoning**](./reasoning.md): Reasoning model integration
|
|
22
|
+
|
|
23
|
+
### Development
|
|
24
|
+
- [**Development**](./development.md): Building and testing
|
|
25
|
+
- [**Interactive Mode**](./interactive.md): User interaction system
|
|
26
|
+
|
|
27
|
+
## Quick Reference
|
|
28
|
+
|
|
29
|
+
### Essential Commands
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
# Basic transcription (self-reflection enabled by default)
|
|
33
|
+
protokoll --input-directory ./recordings
|
|
34
|
+
|
|
35
|
+
# Interactive mode for learning
|
|
36
|
+
protokoll --input-directory ./recordings --interactive
|
|
37
|
+
|
|
38
|
+
# Disable self-reflection
|
|
39
|
+
protokoll --input-directory ./recordings --no-self-reflection
|
|
40
|
+
|
|
41
|
+
# Full debug mode
|
|
42
|
+
protokoll --input-directory ./recordings --debug --verbose
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
### Key Directories
|
|
46
|
+
|
|
47
|
+
```
|
|
48
|
+
~/.protokoll/ # Configuration
|
|
49
|
+
├── config.yaml # Main config
|
|
50
|
+
├── people/ # People context
|
|
51
|
+
├── projects/ # Project context
|
|
52
|
+
├── companies/ # Company context
|
|
53
|
+
└── terms/ # Terminology
|
|
54
|
+
|
|
55
|
+
./output/protokoll/ # Intermediate files
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### Environment Variables
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
export OPENAI_API_KEY="sk-..."
|
|
62
|
+
export ANTHROPIC_API_KEY="sk-ant-..."
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## For AI Assistants
|
|
66
|
+
|
|
67
|
+
If you're an AI helping someone use Protokoll:
|
|
68
|
+
|
|
69
|
+
1. **Start with** [`quickstart.md`](./quickstart.md) for basics
|
|
70
|
+
2. **Read** [`architecture.md`](./architecture.md) for system understanding
|
|
71
|
+
3. **Reference** [`configuration.md`](./configuration.md) for settings
|
|
72
|
+
4. **Check** [`context-system.md`](./context-system.md) for knowledge base questions
|
|
73
|
+
|
|
74
|
+
## Key Capabilities
|
|
75
|
+
|
|
76
|
+
1. **Context-Aware Transcription**: Corrects names based on learned context
|
|
77
|
+
2. **Intelligent Routing**: Sends notes to right directories
|
|
78
|
+
3. **Interactive Learning**: Asks questions, remembers answers
|
|
79
|
+
4. **Self-Reflection**: Reports on tool effectiveness (enabled by default)
|
|
80
|
+
5. **Full Preservation**: Not a summarizer - keeps all content
|
|
81
|
+
|
|
82
|
+
## Current Defaults
|
|
83
|
+
|
|
84
|
+
| Setting | Default Value |
|
|
85
|
+
|---------|---------------|
|
|
86
|
+
| Reasoning Model | `gpt-5.2` |
|
|
87
|
+
| Transcription Model | `whisper-1` |
|
|
88
|
+
| Self-Reflection | `true` (enabled) |
|
|
89
|
+
| Interactive Mode | `false` (disabled) |
|
|
90
|
+
| Output Structure | `month` |
|
|
91
|
+
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
# Interactive Mode
|
|
2
|
+
|
|
3
|
+
Interactive mode allows Protokoll to learn from you as it processes transcripts.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
When enabled, Protokoll will:
|
|
8
|
+
|
|
9
|
+
1. Pause when encountering unknown names
|
|
10
|
+
2. Ask for correct spellings
|
|
11
|
+
3. Offer to remember new entities
|
|
12
|
+
4. Request routing clarification
|
|
13
|
+
|
|
14
|
+
**Note**: Interactive mode is disabled by default. Use `--interactive` to enable it.
|
|
15
|
+
|
|
16
|
+
## Enabling Interactive Mode
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
protokoll --interactive --input-directory ./recordings
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
Or in config:
|
|
23
|
+
|
|
24
|
+
```yaml
|
|
25
|
+
# ~/.protokoll/config.yaml
|
|
26
|
+
features:
|
|
27
|
+
interactive: true
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Clarification Types
|
|
31
|
+
|
|
32
|
+
### Name Spelling
|
|
33
|
+
|
|
34
|
+
```
|
|
35
|
+
Name Clarification Needed
|
|
36
|
+
|
|
37
|
+
Context: "...meeting with pre a about..."
|
|
38
|
+
Detected: "pre a"
|
|
39
|
+
Suggested: "Priya"
|
|
40
|
+
|
|
41
|
+
? Enter correct spelling: Priya Sharma
|
|
42
|
+
? Remember this for future? Yes
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
### New Person
|
|
46
|
+
|
|
47
|
+
```
|
|
48
|
+
New Person Detected
|
|
49
|
+
|
|
50
|
+
Name: Priya Sharma
|
|
51
|
+
|
|
52
|
+
? Company (optional): Acme Corp
|
|
53
|
+
? Role (optional): Engineering Manager
|
|
54
|
+
? Add to context? Yes
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### Routing Decision
|
|
58
|
+
|
|
59
|
+
```
|
|
60
|
+
Routing Clarification
|
|
61
|
+
|
|
62
|
+
Content mentions: "quarterly planning"
|
|
63
|
+
|
|
64
|
+
? Which project should this go to?
|
|
65
|
+
> work
|
|
66
|
+
personal
|
|
67
|
+
quarterly-planning
|
|
68
|
+
(default)
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## Session Recording
|
|
72
|
+
|
|
73
|
+
All clarifications are recorded in the session file:
|
|
74
|
+
|
|
75
|
+
```json
|
|
76
|
+
// output/protokoll/260111-1245-abc123-session.json
|
|
77
|
+
{
|
|
78
|
+
"requests": [
|
|
79
|
+
{
|
|
80
|
+
"type": "name_spelling",
|
|
81
|
+
"term": "pre a",
|
|
82
|
+
"suggestion": "Priya"
|
|
83
|
+
}
|
|
84
|
+
],
|
|
85
|
+
"responses": [
|
|
86
|
+
{
|
|
87
|
+
"type": "name_spelling",
|
|
88
|
+
"term": "pre a",
|
|
89
|
+
"response": "Priya Sharma",
|
|
90
|
+
"shouldRemember": true
|
|
91
|
+
}
|
|
92
|
+
]
|
|
93
|
+
}
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## Non-Interactive Mode (Default)
|
|
97
|
+
|
|
98
|
+
By default, Protokoll runs without prompts:
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
protokoll --input-directory ./recordings
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
In non-interactive mode:
|
|
105
|
+
- Uses suggestions when available
|
|
106
|
+
- Skips unknown entities
|
|
107
|
+
- Uses default routing
|
|
108
|
+
- Still generates self-reflection reports
|
|
109
|
+
|
|
110
|
+
## First-Run Onboarding
|
|
111
|
+
|
|
112
|
+
On first run with `--interactive` and no existing config:
|
|
113
|
+
|
|
114
|
+
```
|
|
115
|
+
Welcome to Protokoll!
|
|
116
|
+
|
|
117
|
+
It looks like this is your first time using Protokoll.
|
|
118
|
+
Let's set up some basics.
|
|
119
|
+
|
|
120
|
+
? Default notes directory: ~/notes
|
|
121
|
+
? Default structure: month
|
|
122
|
+
? Add any projects now? Yes
|
|
123
|
+
|
|
124
|
+
Project Setup
|
|
125
|
+
|
|
126
|
+
? Project name: Work
|
|
127
|
+
? Destination: ~/work/notes
|
|
128
|
+
? Trigger phrases: work, office, meeting
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
## API
|
|
132
|
+
|
|
133
|
+
### InteractiveInstance
|
|
134
|
+
|
|
135
|
+
```typescript
|
|
136
|
+
interface InteractiveInstance {
|
|
137
|
+
// Session management
|
|
138
|
+
startSession(): void;
|
|
139
|
+
endSession(): InteractiveSession;
|
|
140
|
+
getSession(): InteractiveSession | null;
|
|
141
|
+
|
|
142
|
+
// Clarification handling
|
|
143
|
+
handleClarification(request: ClarificationRequest): Promise<ClarificationResponse>;
|
|
144
|
+
|
|
145
|
+
// State
|
|
146
|
+
isEnabled(): boolean;
|
|
147
|
+
|
|
148
|
+
// Onboarding
|
|
149
|
+
checkNeedsOnboarding(): OnboardingState;
|
|
150
|
+
}
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
### ClarificationRequest
|
|
154
|
+
|
|
155
|
+
```typescript
|
|
156
|
+
interface ClarificationRequest {
|
|
157
|
+
type: ClarificationType;
|
|
158
|
+
context: string;
|
|
159
|
+
term: string;
|
|
160
|
+
suggestion?: string;
|
|
161
|
+
options?: string[];
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
type ClarificationType =
|
|
165
|
+
| 'name_spelling'
|
|
166
|
+
| 'new_person'
|
|
167
|
+
| 'new_project'
|
|
168
|
+
| 'new_company'
|
|
169
|
+
| 'routing_decision'
|
|
170
|
+
| 'first_run_onboarding'
|
|
171
|
+
| 'general';
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
## Best Practices
|
|
175
|
+
|
|
176
|
+
1. **Start with interactive mode**: Build context quickly
|
|
177
|
+
2. **Review session files**: See what was learned
|
|
178
|
+
3. **Switch to default**: Once context is established
|
|
179
|
+
4. **Periodic interactive runs**: Catch new names
|
|
180
|
+
|
|
181
|
+
## Troubleshooting
|
|
182
|
+
|
|
183
|
+
### No Prompts Appearing
|
|
184
|
+
|
|
185
|
+
1. Check `--interactive` flag is set
|
|
186
|
+
2. Verify terminal supports prompts
|
|
187
|
+
3. Check if running in a non-TTY environment
|
|
188
|
+
|
|
189
|
+
### Too Many Prompts
|
|
190
|
+
|
|
191
|
+
1. Add more context entries
|
|
192
|
+
2. Run without `--interactive` for known content
|
|
193
|
+
3. Add sounds_like mappings
|
|
194
|
+
|
|
195
|
+
### Prompts Timing Out
|
|
196
|
+
|
|
197
|
+
1. Increase timeout in config
|
|
198
|
+
2. Run without interactive mode and review manually
|
|
199
|
+
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
# Quick Start Guide
|
|
2
|
+
|
|
3
|
+
Get Protokoll working in 5 minutes.
|
|
4
|
+
|
|
5
|
+
## Prerequisites
|
|
6
|
+
|
|
7
|
+
- Node.js 18+
|
|
8
|
+
- npm 9+
|
|
9
|
+
- OpenAI API key
|
|
10
|
+
|
|
11
|
+
## Installation
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
npm install -g @redaksjon/protokoll
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
Or from source:
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
git clone https://github.com/redaksjon/protokoll.git
|
|
21
|
+
cd protokoll
|
|
22
|
+
npm install
|
|
23
|
+
npm run build
|
|
24
|
+
npm link
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Setup
|
|
28
|
+
|
|
29
|
+
### 1. Set API Key
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
export OPENAI_API_KEY='sk-...'
|
|
33
|
+
# or for Anthropic models
|
|
34
|
+
export ANTHROPIC_API_KEY='sk-ant-...'
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
### 2. Create Config (optional)
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
mkdir -p ~/.protokoll
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
```yaml
|
|
44
|
+
# ~/.protokoll/config.yaml
|
|
45
|
+
model: "gpt-5.2"
|
|
46
|
+
routing:
|
|
47
|
+
default:
|
|
48
|
+
path: "~/notes"
|
|
49
|
+
structure: "month"
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## First Transcription
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
# Transcribe all audio files in a directory
|
|
56
|
+
protokoll --input-directory ~/recordings
|
|
57
|
+
|
|
58
|
+
# Output goes to ~/notes/2026/01/<subject>.md
|
|
59
|
+
# Self-reflection report is generated by default
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## Interactive Mode
|
|
63
|
+
|
|
64
|
+
Learn names and projects as you go:
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
protokoll --input-directory ~/recordings --interactive
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Protokoll will ask:
|
|
71
|
+
- "Is 'pre a' spelled 'Priya'?"
|
|
72
|
+
- "Should I remember this person?"
|
|
73
|
+
- "Which project should this note go to?"
|
|
74
|
+
|
|
75
|
+
## Add Context
|
|
76
|
+
|
|
77
|
+
Create context files to improve accuracy:
|
|
78
|
+
|
|
79
|
+
```yaml
|
|
80
|
+
# ~/.protokoll/people/priya-sharma.yaml
|
|
81
|
+
id: priya-sharma
|
|
82
|
+
name: Priya Sharma
|
|
83
|
+
sounds_like:
|
|
84
|
+
- "pre a"
|
|
85
|
+
- "pria"
|
|
86
|
+
context: "Colleague from engineering"
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Check Results
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
# View the transcript
|
|
93
|
+
cat ~/notes/2026/01/meeting-notes.md
|
|
94
|
+
|
|
95
|
+
# View intermediate files (always kept by default)
|
|
96
|
+
ls output/protokoll/
|
|
97
|
+
|
|
98
|
+
# View self-reflection report
|
|
99
|
+
cat output/protokoll/*-reflection.md
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## Common Options
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
# Verbose output
|
|
106
|
+
protokoll --input-directory ~/recordings --verbose
|
|
107
|
+
|
|
108
|
+
# Debug mode (more detailed intermediate files)
|
|
109
|
+
protokoll --input-directory ~/recordings --debug
|
|
110
|
+
|
|
111
|
+
# Disable self-reflection (enabled by default)
|
|
112
|
+
protokoll --input-directory ~/recordings --no-self-reflection
|
|
113
|
+
|
|
114
|
+
# Dry run (show what would happen)
|
|
115
|
+
protokoll --input-directory ~/recordings --dry-run
|
|
116
|
+
|
|
117
|
+
# Use a different model
|
|
118
|
+
protokoll --input-directory ~/recordings --model claude-3-5-sonnet
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
## Default Settings
|
|
122
|
+
|
|
123
|
+
| Option | Default |
|
|
124
|
+
|--------|---------|
|
|
125
|
+
| `--model` | `gpt-5.2` |
|
|
126
|
+
| `--transcription-model` | `whisper-1` |
|
|
127
|
+
| `--self-reflection` | `true` (enabled) |
|
|
128
|
+
| `--interactive` | `false` (disabled) |
|
|
129
|
+
| Output structure | `month` |
|
|
130
|
+
| Filename options | `date`, `time`, `subject` |
|
|
131
|
+
|
|
132
|
+
## Next Steps
|
|
133
|
+
|
|
134
|
+
- [Configure routing](./routing.md) for different projects
|
|
135
|
+
- [Add context](./context-system.md) for known names
|
|
136
|
+
- [Read about reasoning](./reasoning.md) for model options
|
|
137
|
+
- [Read architecture](./architecture.md) for system understanding
|
|
138
|
+
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
# Reasoning Integration
|
|
2
|
+
|
|
3
|
+
Protokoll uses reasoning models to enhance transcriptions.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
The reasoning system:
|
|
8
|
+
|
|
9
|
+
1. Takes raw Whisper transcript
|
|
10
|
+
2. Uses context to identify corrections
|
|
11
|
+
3. Applies tools to look up and verify
|
|
12
|
+
4. Produces enhanced transcript
|
|
13
|
+
|
|
14
|
+
## Supported Models
|
|
15
|
+
|
|
16
|
+
### OpenAI Models
|
|
17
|
+
|
|
18
|
+
| Model | Best For |
|
|
19
|
+
|-------|----------|
|
|
20
|
+
| `gpt-5.2` | **Default** - High reasoning capability |
|
|
21
|
+
| `gpt-5.1` | High reasoning, balanced |
|
|
22
|
+
| `gpt-5` | Fast and capable |
|
|
23
|
+
| `gpt-4o` | Previous gen, still capable |
|
|
24
|
+
| `gpt-4o-mini` | Fast, lower cost |
|
|
25
|
+
| `o1` | Complex reasoning |
|
|
26
|
+
| `o1-mini` | Reasoning, faster |
|
|
27
|
+
|
|
28
|
+
### Anthropic Models
|
|
29
|
+
|
|
30
|
+
| Model | Best For |
|
|
31
|
+
|-------|----------|
|
|
32
|
+
| `claude-3-5-sonnet` | Recommended for complex transcripts |
|
|
33
|
+
| `claude-3-opus` | Highest quality |
|
|
34
|
+
| `claude-3-haiku` | Fast, cost-effective |
|
|
35
|
+
|
|
36
|
+
### Google Models
|
|
37
|
+
|
|
38
|
+
| Model | Best For |
|
|
39
|
+
|-------|----------|
|
|
40
|
+
| `gemini-1.5-pro` | High quality |
|
|
41
|
+
| `gemini-1.5-flash` | Fast processing |
|
|
42
|
+
|
|
43
|
+
## Configuration
|
|
44
|
+
|
|
45
|
+
```yaml
|
|
46
|
+
# ~/.protokoll/config.yaml
|
|
47
|
+
model: "gpt-5.2"
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
Or via command line:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
protokoll --model claude-3-5-sonnet --input-directory ./recordings
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Reasoning Strategies
|
|
57
|
+
|
|
58
|
+
### Simple
|
|
59
|
+
|
|
60
|
+
Direct completion without iteration:
|
|
61
|
+
|
|
62
|
+
```typescript
|
|
63
|
+
strategy: "simple"
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
Best for: Short transcripts, fast processing
|
|
67
|
+
|
|
68
|
+
### Investigate-Then-Respond
|
|
69
|
+
|
|
70
|
+
Two-phase approach:
|
|
71
|
+
|
|
72
|
+
1. Investigation: Use tools to gather context
|
|
73
|
+
2. Response: Generate enhanced transcript
|
|
74
|
+
|
|
75
|
+
```typescript
|
|
76
|
+
strategy: "investigate-then-respond"
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
Best for: Transcripts with unknown names
|
|
80
|
+
|
|
81
|
+
### Multi-Pass
|
|
82
|
+
|
|
83
|
+
Multiple iterations refining output:
|
|
84
|
+
|
|
85
|
+
```typescript
|
|
86
|
+
strategy: "multi-pass"
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Best for: Complex transcripts needing multiple corrections
|
|
90
|
+
|
|
91
|
+
### Adaptive
|
|
92
|
+
|
|
93
|
+
Automatically selects strategy based on content:
|
|
94
|
+
|
|
95
|
+
```typescript
|
|
96
|
+
strategy: "adaptive" // default
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Best for: General use
|
|
100
|
+
|
|
101
|
+
## Self-Reflection
|
|
102
|
+
|
|
103
|
+
Self-reflection is **enabled by default**. It generates reports showing:
|
|
104
|
+
|
|
105
|
+
- Processing duration
|
|
106
|
+
- Tool call counts
|
|
107
|
+
- Success rates
|
|
108
|
+
- Quality assessment
|
|
109
|
+
- Recommendations
|
|
110
|
+
|
|
111
|
+
### Disable Self-Reflection
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
protokoll --no-self-reflection --input-directory ./recordings
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
### Report Example
|
|
118
|
+
|
|
119
|
+
```markdown
|
|
120
|
+
# Protokoll - Self-Reflection Report
|
|
121
|
+
|
|
122
|
+
## Summary
|
|
123
|
+
- Duration: 8.3s
|
|
124
|
+
- Iterations: 12
|
|
125
|
+
- Tool Calls: 7
|
|
126
|
+
- Confidence: 92.5%
|
|
127
|
+
|
|
128
|
+
## Tool Effectiveness
|
|
129
|
+
| Tool | Calls | Success Rate |
|
|
130
|
+
|------|-------|--------------|
|
|
131
|
+
| lookup_person | 3 | 100% |
|
|
132
|
+
| route_note | 1 | 100% |
|
|
133
|
+
|
|
134
|
+
## Recommendations
|
|
135
|
+
- Consider adding more context for faster processing
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
## API
|
|
139
|
+
|
|
140
|
+
### ReasoningInstance
|
|
141
|
+
|
|
142
|
+
```typescript
|
|
143
|
+
interface ReasoningInstance {
|
|
144
|
+
complete(request: ReasoningRequest): Promise<ReasoningResponse>;
|
|
145
|
+
getRecommendedStrategy(model: string): ReasoningStrategy;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
interface ReasoningRequest {
|
|
149
|
+
model: string;
|
|
150
|
+
messages: Message[];
|
|
151
|
+
tools?: Tool[];
|
|
152
|
+
temperature?: number;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
interface ReasoningResponse {
|
|
156
|
+
content: string;
|
|
157
|
+
model: string;
|
|
158
|
+
usage: {
|
|
159
|
+
promptTokens: number;
|
|
160
|
+
completionTokens: number;
|
|
161
|
+
totalTokens: number;
|
|
162
|
+
};
|
|
163
|
+
}
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
## Best Practices
|
|
167
|
+
|
|
168
|
+
1. **Start with gpt-5.2**: Default model with high reasoning capability
|
|
169
|
+
2. **Use claude-3-5-sonnet for quality**: Better name handling
|
|
170
|
+
3. **Review self-reflection reports**: Track performance over time
|
|
171
|
+
4. **Add context**: More context = fewer iterations
|
|
172
|
+
5. **Use adaptive strategy**: Let Protokoll choose
|
|
173
|
+
|
|
174
|
+
## Troubleshooting
|
|
175
|
+
|
|
176
|
+
### Slow Processing
|
|
177
|
+
|
|
178
|
+
1. Use faster model: `--model gpt-4o-mini`
|
|
179
|
+
2. Check self-reflection for bottlenecks
|
|
180
|
+
3. Add more context to reduce iterations
|
|
181
|
+
|
|
182
|
+
### Poor Quality
|
|
183
|
+
|
|
184
|
+
1. Use better model: `--model claude-3-5-sonnet`
|
|
185
|
+
2. Add more context entries
|
|
186
|
+
3. Check sounds_like mappings
|
|
187
|
+
|
|
188
|
+
### API Errors
|
|
189
|
+
|
|
190
|
+
1. Verify API key is set
|
|
191
|
+
2. Check rate limits
|
|
192
|
+
3. Use batch processing for many files
|
|
193
|
+
|