@houtini/voice-analyser 1.3.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +262 -265
- package/dist/analyzers/phrase-extraction.d.ts +30 -0
- package/dist/analyzers/phrase-extraction.d.ts.map +1 -0
- package/dist/analyzers/phrase-extraction.js +175 -0
- package/dist/analyzers/phrase-extraction.js.map +1 -0
- package/dist/analyzers/vocabulary-tiers.d.ts +27 -0
- package/dist/analyzers/vocabulary-tiers.d.ts.map +1 -0
- package/dist/analyzers/vocabulary-tiers.js +187 -0
- package/dist/analyzers/vocabulary-tiers.js.map +1 -0
- package/dist/index.js +16 -7
- package/dist/index.js.map +1 -1
- package/dist/tools/analyze-corpus.d.ts.map +1 -1
- package/dist/tools/analyze-corpus.js +8 -0
- package/dist/tools/analyze-corpus.js.map +1 -1
- package/dist/tools/generate-narrative-guide-v4.d.ts +11 -0
- package/dist/tools/generate-narrative-guide-v4.d.ts.map +1 -0
- package/dist/tools/generate-narrative-guide-v4.js +252 -0
- package/dist/tools/generate-narrative-guide-v4.js.map +1 -0
- package/package.json +6 -5
- package/dist/tools/generate-narrative-guide-v3.d.ts +0 -29
- package/dist/tools/generate-narrative-guide-v3.d.ts.map +0 -1
- package/dist/tools/generate-narrative-guide-v3.js +0 -621
- package/dist/tools/generate-narrative-guide-v3.js.map +0 -1
- package/dist/tools/generate-narrative-guide.d.ts +0 -25
- package/dist/tools/generate-narrative-guide.d.ts.map +0 -1
- package/dist/tools/generate-narrative-guide.js +0 -942
- package/dist/tools/generate-narrative-guide.js.map +0 -1
package/README.md
CHANGED
|
@@ -1,265 +1,262 @@
|
|
|
1
|
-
# Voice Analyser
|
|
2
|
-
|
|
3
|
-
[](https://www.npmjs.com/package/@houtini/voice-analyser)
|
|
4
|
-
[
|
|
1
|
+
# Voice Analyser MCP
|
|
2
|
+
|
|
3
|
+
[](https://www.npmjs.com/package/@houtini/voice-analyser)
|
|
4
|
+
[](https://snyk.io/test/github/houtini-ai/voice-analyser-mcp)
|
|
5
|
+
[](https://opensource.org/licenses/Apache-2.0)
|
|
6
|
+
|
|
7
|
+
> MCP server that analyses your published writing and generates executable style guides for voice-matched content creation.
|
|
8
|
+
|
|
9
|
+
## What This Does
|
|
10
|
+
|
|
11
|
+
I built this because traditional style guides don't work. They tell you "use short sentences" and "vary paragraph length" - rules that sound helpful but produce robotic output when you try to follow them.
|
|
12
|
+
|
|
13
|
+
This tool extracts statistical patterns from your published writing and generates a style guide that teaches through **zero tolerance rules, phrase libraries, and validation checklists** rather than vague principles.
|
|
14
|
+
|
|
15
|
+
Version 1.4.0 focuses on executable instructions: forbidden word lists with alternatives, 50+ actual phrases from your corpus, and checkbox validation that catches AI slop before you publish.
|
|
16
|
+
|
|
17
|
+
## Installation
|
|
18
|
+
|
|
19
|
+
### Claude Desktop
|
|
20
|
+
|
|
21
|
+
Add to your `claude_desktop_config.json`:
|
|
22
|
+
|
|
23
|
+
```json
|
|
24
|
+
{
|
|
25
|
+
"mcpServers": {
|
|
26
|
+
"voice-analysis": {
|
|
27
|
+
"command": "npx",
|
|
28
|
+
"args": ["-y", "@houtini/voice-analyser@latest"]
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
**Config file locations:**
|
|
35
|
+
- Windows: `%APPDATA%\Claude\claude_desktop_config.json`
|
|
36
|
+
- macOS: `~/Library/Application Support/Claude/claude_desktop_config.json`
|
|
37
|
+
- Linux: `~/.config/Claude/claude_desktop_config.json`
|
|
38
|
+
|
|
39
|
+
Restart Claude Desktop after saving.
|
|
40
|
+
|
|
41
|
+
**Requirements:** Node.js 20+
|
|
42
|
+
|
|
43
|
+
## Quick Start
|
|
44
|
+
|
|
45
|
+
### 1. Create Output Directory
|
|
46
|
+
|
|
47
|
+
Pick a directory for corpus storage and analysis:
|
|
48
|
+
|
|
49
|
+
```
|
|
50
|
+
C:\writing\voice-models\ (Windows)
|
|
51
|
+
~/writing/voice-models/ (Mac/Linux)
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
This holds collected articles, analysis JSON files, and generated style guides.
|
|
55
|
+
|
|
56
|
+
### 2. Collect Writing Corpus
|
|
57
|
+
|
|
58
|
+
In Claude Desktop:
|
|
59
|
+
```
|
|
60
|
+
Collect corpus from https://yoursite.com/post-sitemap.xml
|
|
61
|
+
Save as "my-voice" in "C:\writing\voice-models"
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Parameters:
|
|
65
|
+
- `sitemap_url` - XML sitemap URL
|
|
66
|
+
- `output_name` - Corpus identifier (e.g., "my-voice")
|
|
67
|
+
- `output_dir` - Directory you created above
|
|
68
|
+
- `max_articles` - Optional limit (default: 100)
|
|
69
|
+
|
|
70
|
+
The tool crawls your sitemap, extracts clean content, and saves markdown files.
|
|
71
|
+
|
|
72
|
+
### 3. Run Analysis
|
|
73
|
+
|
|
74
|
+
```
|
|
75
|
+
Analyse corpus "my-voice" in directory "C:\writing\voice-models"
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
This runs 15+ analysers covering:
|
|
79
|
+
- Vocabulary tiers (AI slop detection, formality scoring)
|
|
80
|
+
- Phrase extraction (opening patterns, transitions, caveats)
|
|
81
|
+
- Sentence structure and rhythm
|
|
82
|
+
- Voice markers and conversational devices
|
|
83
|
+
- Punctuation habits
|
|
84
|
+
|
|
85
|
+
### 4. Generate Style Guide v4
|
|
86
|
+
|
|
87
|
+
```
|
|
88
|
+
Generate style guide for "my-voice" in directory "C:\writing\voice-models"
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
Creates an example-first guide at:
|
|
92
|
+
`C:\writing\voice-models\my-voice\writing_style_my-voice.md`
|
|
93
|
+
|
|
94
|
+
## What v1.4.0 Changed
|
|
95
|
+
|
|
96
|
+
Previous versions generated statistical analysis that was accurate but not useful for writing. v1.4.0 restructures the output:
|
|
97
|
+
|
|
98
|
+
**Before:** 60% statistics, 40% guidance
|
|
99
|
+
**After:** 70% examples, 30% statistics
|
|
100
|
+
|
|
101
|
+
### New Style Guide Structure
|
|
102
|
+
|
|
103
|
+
**Part 1: Zero Tolerance Rules**
|
|
104
|
+
- Forbidden vocabulary (AI slop) with alternatives
|
|
105
|
+
- Formal words flagged with casual replacements
|
|
106
|
+
- Punctuation rules (em-dash detection)
|
|
107
|
+
|
|
108
|
+
**Part 2: Phrase Library (50+ Examples)**
|
|
109
|
+
- Opening patterns (personal story, direct action, protective warnings)
|
|
110
|
+
- Equipment references (possessive vs generic)
|
|
111
|
+
- Caveat phrases (honesty markers)
|
|
112
|
+
- Transition patterns
|
|
113
|
+
|
|
114
|
+
**Part 3: Sentence Patterns**
|
|
115
|
+
- Rhythm variation targets with corpus examples
|
|
116
|
+
- First-person usage frequency
|
|
117
|
+
- Natural sentence flow demonstrations
|
|
118
|
+
|
|
119
|
+
**Part 4: Validation Checklist**
|
|
120
|
+
- Critical rules (must pass)
|
|
121
|
+
- Voice match rules (should pass)
|
|
122
|
+
- Actionable checkbox format
|
|
123
|
+
|
|
124
|
+
**Part 5: Quick Reference**
|
|
125
|
+
- Top phrases by frequency
|
|
126
|
+
- Statistics summary
|
|
127
|
+
|
|
128
|
+
## Using the Style Guide
|
|
129
|
+
|
|
130
|
+
Load the generated guide into Claude conversations:
|
|
131
|
+
|
|
132
|
+
```
|
|
133
|
+
Load C:\writing\voice-models\my-voice\writing_style_my-voice.md
|
|
134
|
+
and use it to write [content type] about [topic]
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
The guide includes validation checklists. After Claude writes, run:
|
|
138
|
+
|
|
139
|
+
```
|
|
140
|
+
Check what you just wrote against the style guide validation checklist.
|
|
141
|
+
Report any violations.
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
### Critical Validation Rules
|
|
145
|
+
|
|
146
|
+
The guide flags these as must-pass:
|
|
147
|
+
- Zero AI slop words (delve, leverage, unlock, seamless, robust)
|
|
148
|
+
- Zero em-dashes if corpus doesn't use them
|
|
149
|
+
- British/American spelling consistency
|
|
150
|
+
- Equipment named specifically (not "the product")
|
|
151
|
+
|
|
152
|
+
### Voice Match Validation
|
|
153
|
+
|
|
154
|
+
The guide checks these as should-pass:
|
|
155
|
+
- First-person frequency matches target (~0.8 per 100 words typical)
|
|
156
|
+
- Sentence length varies wildly (5-word to 40-word sentences)
|
|
157
|
+
- Honest caveats present ("It's not perfect", "I wish I'd...")
|
|
158
|
+
- Opening follows corpus patterns
|
|
159
|
+
|
|
160
|
+
## Analysis Output
|
|
161
|
+
|
|
162
|
+
The tool generates these JSON files in `corpus-name/analysis/`:
|
|
163
|
+
|
|
164
|
+
**Core Analysis:**
|
|
165
|
+
- `vocabulary.json` - Word choice, domain terms, British/American markers
|
|
166
|
+
- `sentence.json` - Length distribution, complexity patterns
|
|
167
|
+
- `voice.json` - First-person usage, hedging language, conversational markers
|
|
168
|
+
- `paragraph.json` - Structure and transition patterns
|
|
169
|
+
- `punctuation.json` - Dash types, comma density, parenthetical frequency
|
|
170
|
+
|
|
171
|
+
**v1.4.0 Additions:**
|
|
172
|
+
- `vocabulary-tiers.json` - AI slop detection, formality scoring with alternatives
|
|
173
|
+
- `phrase-library.json` - 50+ extracted phrases organized by type
|
|
174
|
+
|
|
175
|
+
**Advanced Analysis:**
|
|
176
|
+
- `function-words.json` - Z-scores for style fingerprinting
|
|
177
|
+
- `anti-mechanical.json` - Naturalness scoring
|
|
178
|
+
- `argument-flow.json` - How arguments open, build, close
|
|
179
|
+
- `paragraph-transitions.json` - Cross-paragraph connection patterns
|
|
180
|
+
- `specificity-patterns.json` - Possessive vs generic references
|
|
181
|
+
|
|
182
|
+
## Minimum Corpus Requirements
|
|
183
|
+
|
|
184
|
+
- **Minimum:** 15,000 words (~20 articles)
|
|
185
|
+
- **Recommended:** 30,000 words (~40 articles)
|
|
186
|
+
- **Ideal:** 50,000+ words
|
|
187
|
+
|
|
188
|
+
Below 15k words, statistical patterns become unreliable. The phrase library needs volume to find frequently-used patterns.
|
|
189
|
+
|
|
190
|
+
## MCP Tools Reference
|
|
191
|
+
|
|
192
|
+
### collect_corpus
|
|
193
|
+
|
|
194
|
+
Crawls sitemap and collects clean writing corpus.
|
|
195
|
+
|
|
196
|
+
**Parameters:**
|
|
197
|
+
- `sitemap_url` (required) - XML sitemap URL
|
|
198
|
+
- `output_name` (required) - Corpus identifier
|
|
199
|
+
- `output_dir` (required) - Storage directory
|
|
200
|
+
- `max_articles` (optional) - Limit, default 100
|
|
201
|
+
- `article_pattern` (optional) - Regex URL filter
|
|
202
|
+
|
|
203
|
+
### analyze_corpus
|
|
204
|
+
|
|
205
|
+
Runs linguistic analysis on collected corpus.
|
|
206
|
+
|
|
207
|
+
**Parameters:**
|
|
208
|
+
- `corpus_name` (required) - Name from collect_corpus
|
|
209
|
+
- `corpus_dir` (required) - Directory containing corpus
|
|
210
|
+
- `analysis_type` (optional) - full, quick, vocabulary, syntax (default: full)
|
|
211
|
+
|
|
212
|
+
### generate_style_guide
|
|
213
|
+
|
|
214
|
+
Generates v4 executable style guide.
|
|
215
|
+
|
|
216
|
+
**Parameters:**
|
|
217
|
+
- `corpus_name` (required) - Name from analyze_corpus
|
|
218
|
+
- `corpus_dir` (required) - Directory containing analysis
|
|
219
|
+
|
|
220
|
+
## Development
|
|
221
|
+
|
|
222
|
+
```bash
|
|
223
|
+
git clone https://github.com/houtini-ai/mcp-server-voice-analysis.git
|
|
224
|
+
cd mcp-server-voice-analysis
|
|
225
|
+
npm install
|
|
226
|
+
npm run build
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
Local development mode in Claude Desktop config:
|
|
230
|
+
|
|
231
|
+
```json
|
|
232
|
+
{
|
|
233
|
+
"mcpServers": {
|
|
234
|
+
"voice-analysis": {
|
|
235
|
+
"command": "node",
|
|
236
|
+
"args": ["C:\\path\\to\\mcp-server-voice-analysis\\dist\\index.js"]
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
## Known Limitations
|
|
243
|
+
|
|
244
|
+
- Needs XML sitemap (RSS feeds not supported)
|
|
245
|
+
- Works best with single-author content
|
|
246
|
+
- Mixed authorship weakens statistical signals
|
|
247
|
+
- Heavily edited content produces less distinct voice patterns
|
|
248
|
+
- Transition phrase detection currently returns sparse results (being improved)
|
|
249
|
+
|
|
250
|
+
## What's Next
|
|
251
|
+
|
|
252
|
+
v1.5.0 planned features:
|
|
253
|
+
- Automated text validation against corpus
|
|
254
|
+
- Real-time writing feedback
|
|
255
|
+
- Custom forbidden vocabulary per corpus
|
|
256
|
+
- Improved transition phrase detection
|
|
257
|
+
|
|
258
|
+
---
|
|
259
|
+
|
|
260
|
+
**License:** Apache 2.0
|
|
261
|
+
**Author:** [Houtini](https://houtini.ai)
|
|
262
|
+
**Version:** 1.4.0
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phrase Extraction Analysis
|
|
3
|
+
*
|
|
4
|
+
* Extracts frequently-used phrases for direct imitation.
|
|
5
|
+
* Focuses on opening patterns, transitions, equipment references, and caveats.
|
|
6
|
+
*/
|
|
7
|
+
export interface PhraseExample {
|
|
8
|
+
phrase: string;
|
|
9
|
+
count: number;
|
|
10
|
+
context?: string;
|
|
11
|
+
}
|
|
12
|
+
export interface PhraseLibrary {
|
|
13
|
+
openingPatterns: {
|
|
14
|
+
personalStory: PhraseExample[];
|
|
15
|
+
directAction: PhraseExample[];
|
|
16
|
+
protectiveWarning: PhraseExample[];
|
|
17
|
+
};
|
|
18
|
+
transitionPhrases: PhraseExample[];
|
|
19
|
+
equipmentReferences: {
|
|
20
|
+
withPossessive: PhraseExample[];
|
|
21
|
+
generic: PhraseExample[];
|
|
22
|
+
};
|
|
23
|
+
caveatPhrases: PhraseExample[];
|
|
24
|
+
totalPhrases: number;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Main phrase extraction function
|
|
28
|
+
*/
|
|
29
|
+
export declare function extractPhrases(text: string): PhraseLibrary;
|
|
30
|
+
//# sourceMappingURL=phrase-extraction.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"phrase-extraction.d.ts","sourceRoot":"","sources":["../../src/analyzers/phrase-extraction.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAIH,MAAM,WAAW,aAAa;IAC5B,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,aAAa;IAC5B,eAAe,EAAE;QACf,aAAa,EAAE,aAAa,EAAE,CAAC;QAC/B,YAAY,EAAE,aAAa,EAAE,CAAC;QAC9B,iBAAiB,EAAE,aAAa,EAAE,CAAC;KACpC,CAAC;IACF,iBAAiB,EAAE,aAAa,EAAE,CAAC;IACnC,mBAAmB,EAAE;QACnB,cAAc,EAAE,aAAa,EAAE,CAAC;QAChC,OAAO,EAAE,aAAa,EAAE,CAAC;KAC1B,CAAC;IACF,aAAa,EAAE,aAAa,EAAE,CAAC;IAC/B,YAAY,EAAE,MAAM,CAAC;CACtB;AAkLD;;GAEG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,aAAa,CAsB1D"}
|