@houtini/voice-analyser 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +566 -0
- package/dist/analyzers/anti-mechanical.d.ts +58 -0
- package/dist/analyzers/anti-mechanical.d.ts.map +1 -0
- package/dist/analyzers/anti-mechanical.js +223 -0
- package/dist/analyzers/anti-mechanical.js.map +1 -0
- package/dist/analyzers/char-ngrams.d.ts +54 -0
- package/dist/analyzers/char-ngrams.d.ts.map +1 -0
- package/dist/analyzers/char-ngrams.js +208 -0
- package/dist/analyzers/char-ngrams.js.map +1 -0
- package/dist/analyzers/function-words.d.ts +41 -0
- package/dist/analyzers/function-words.d.ts.map +1 -0
- package/dist/analyzers/function-words.js +167 -0
- package/dist/analyzers/function-words.js.map +1 -0
- package/dist/analyzers/information-density.d.ts +78 -0
- package/dist/analyzers/information-density.d.ts.map +1 -0
- package/dist/analyzers/information-density.js +384 -0
- package/dist/analyzers/information-density.js.map +1 -0
- package/dist/analyzers/paragraph.d.ts +28 -0
- package/dist/analyzers/paragraph.d.ts.map +1 -0
- package/dist/analyzers/paragraph.js +78 -0
- package/dist/analyzers/paragraph.js.map +1 -0
- package/dist/analyzers/pos-ngrams.d.ts +59 -0
- package/dist/analyzers/pos-ngrams.d.ts.map +1 -0
- package/dist/analyzers/pos-ngrams.js +249 -0
- package/dist/analyzers/pos-ngrams.js.map +1 -0
- package/dist/analyzers/punctuation.d.ts +34 -0
- package/dist/analyzers/punctuation.d.ts.map +1 -0
- package/dist/analyzers/punctuation.js +174 -0
- package/dist/analyzers/punctuation.js.map +1 -0
- package/dist/analyzers/sentence.d.ts +33 -0
- package/dist/analyzers/sentence.d.ts.map +1 -0
- package/dist/analyzers/sentence.js +74 -0
- package/dist/analyzers/sentence.js.map +1 -0
- package/dist/analyzers/vocabulary.d.ts +40 -0
- package/dist/analyzers/vocabulary.d.ts.map +1 -0
- package/dist/analyzers/vocabulary.js +96 -0
- package/dist/analyzers/vocabulary.js.map +1 -0
- package/dist/analyzers/voice-markers.d.ts +88 -0
- package/dist/analyzers/voice-markers.d.ts.map +1 -0
- package/dist/analyzers/voice-markers.js +297 -0
- package/dist/analyzers/voice-markers.js.map +1 -0
- package/dist/analyzers/word-ngrams.d.ts +59 -0
- package/dist/analyzers/word-ngrams.d.ts.map +1 -0
- package/dist/analyzers/word-ngrams.js +259 -0
- package/dist/analyzers/word-ngrams.js.map +1 -0
- package/dist/index.d.ts +7 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +190 -0
- package/dist/index.js.map +1 -0
- package/dist/reference/function-words.d.ts +48 -0
- package/dist/reference/function-words.d.ts.map +1 -0
- package/dist/reference/function-words.js +164 -0
- package/dist/reference/function-words.js.map +1 -0
- package/dist/tools/analyze-corpus.d.ts +15 -0
- package/dist/tools/analyze-corpus.d.ts.map +1 -0
- package/dist/tools/analyze-corpus.js +188 -0
- package/dist/tools/analyze-corpus.js.map +1 -0
- package/dist/tools/collect-corpus.d.ts +25 -0
- package/dist/tools/collect-corpus.d.ts.map +1 -0
- package/dist/tools/collect-corpus.js +109 -0
- package/dist/tools/collect-corpus.js.map +1 -0
- package/dist/tools/generate-enhanced-guide.d.ts +45 -0
- package/dist/tools/generate-enhanced-guide.d.ts.map +1 -0
- package/dist/tools/generate-enhanced-guide.js +881 -0
- package/dist/tools/generate-enhanced-guide.js.map +1 -0
- package/dist/tools/generate-guide.d.ts +16 -0
- package/dist/tools/generate-guide.d.ts.map +1 -0
- package/dist/tools/generate-guide.js +228 -0
- package/dist/tools/generate-guide.js.map +1 -0
- package/dist/utils/cleaner.d.ts +56 -0
- package/dist/utils/cleaner.d.ts.map +1 -0
- package/dist/utils/cleaner.js +193 -0
- package/dist/utils/cleaner.js.map +1 -0
- package/dist/utils/crawler.d.ts +13 -0
- package/dist/utils/crawler.d.ts.map +1 -0
- package/dist/utils/crawler.js +66 -0
- package/dist/utils/crawler.js.map +1 -0
- package/dist/utils/delta.d.ts +56 -0
- package/dist/utils/delta.d.ts.map +1 -0
- package/dist/utils/delta.js +124 -0
- package/dist/utils/delta.js.map +1 -0
- package/dist/utils/extractor.d.ts +14 -0
- package/dist/utils/extractor.d.ts.map +1 -0
- package/dist/utils/extractor.js +92 -0
- package/dist/utils/extractor.js.map +1 -0
- package/dist/utils/ngrams.d.ts +72 -0
- package/dist/utils/ngrams.d.ts.map +1 -0
- package/dist/utils/ngrams.js +154 -0
- package/dist/utils/ngrams.js.map +1 -0
- package/dist/utils/statistics.d.ts +22 -0
- package/dist/utils/statistics.d.ts.map +1 -0
- package/dist/utils/statistics.js +54 -0
- package/dist/utils/statistics.js.map +1 -0
- package/dist/utils/zscore.d.ts +44 -0
- package/dist/utils/zscore.d.ts.map +1 -0
- package/dist/utils/zscore.js +76 -0
- package/dist/utils/zscore.js.map +1 -0
- package/package.json +67 -0
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA;;;GAGG"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Voice Analysis MCP Server
|
|
4
|
+
* Automatic tone-of-voice analysis from published writing corpus
|
|
5
|
+
*/
|
|
6
|
+
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
|
|
7
|
+
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
8
|
+
import { CallToolRequestSchema, ListToolsRequestSchema, ErrorCode, McpError } from '@modelcontextprotocol/sdk/types.js';
|
|
9
|
+
import { collectCorpus } from './tools/collect-corpus.js';
|
|
10
|
+
import { analyzeCorpus } from './tools/analyze-corpus.js';
|
|
11
|
+
import { generateTovGuide } from './tools/generate-guide.js';
|
|
12
|
+
import { generateEnhancedGuide } from './tools/generate-enhanced-guide.js';
|
|
13
|
+
const server = new Server({
|
|
14
|
+
name: 'voice-analysis-server',
|
|
15
|
+
version: '1.0.0',
|
|
16
|
+
}, {
|
|
17
|
+
capabilities: {
|
|
18
|
+
tools: {},
|
|
19
|
+
},
|
|
20
|
+
});
|
|
21
|
+
// Register tools
|
|
22
|
+
server.setRequestHandler(ListToolsRequestSchema, async () => ({
|
|
23
|
+
tools: [
|
|
24
|
+
{
|
|
25
|
+
name: 'collect_corpus',
|
|
26
|
+
description: 'Crawl sitemap and collect clean writing corpus from published articles',
|
|
27
|
+
inputSchema: {
|
|
28
|
+
type: 'object',
|
|
29
|
+
properties: {
|
|
30
|
+
sitemap_url: {
|
|
31
|
+
type: 'string',
|
|
32
|
+
description: 'URL to XML sitemap (e.g., https://example.com/post-sitemap.xml)'
|
|
33
|
+
},
|
|
34
|
+
output_name: {
|
|
35
|
+
type: 'string',
|
|
36
|
+
description: 'Corpus identifier/name (e.g., "richard-baxter")'
|
|
37
|
+
},
|
|
38
|
+
max_articles: {
|
|
39
|
+
type: 'number',
|
|
40
|
+
description: 'Maximum articles to process (default: 100)',
|
|
41
|
+
default: 100
|
|
42
|
+
},
|
|
43
|
+
article_pattern: {
|
|
44
|
+
type: 'string',
|
|
45
|
+
description: 'Optional regex to filter URLs'
|
|
46
|
+
}
|
|
47
|
+
},
|
|
48
|
+
required: ['sitemap_url', 'output_name'],
|
|
49
|
+
},
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
name: 'analyze_corpus',
|
|
53
|
+
description: 'Perform linguistic analysis on collected corpus (vocabulary, sentence structure, voice markers)',
|
|
54
|
+
inputSchema: {
|
|
55
|
+
type: 'object',
|
|
56
|
+
properties: {
|
|
57
|
+
corpus_name: {
|
|
58
|
+
type: 'string',
|
|
59
|
+
description: 'Name of corpus to analyze'
|
|
60
|
+
},
|
|
61
|
+
analysis_type: {
|
|
62
|
+
type: 'string',
|
|
63
|
+
enum: ['full', 'quick', 'vocabulary', 'syntax'],
|
|
64
|
+
description: 'Type of analysis to perform (default: full)',
|
|
65
|
+
default: 'full'
|
|
66
|
+
},
|
|
67
|
+
},
|
|
68
|
+
required: ['corpus_name'],
|
|
69
|
+
},
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
name: 'generate_tov_guide',
|
|
73
|
+
description: 'Generate tone-of-voice guide from analysis results (LLM-optimized statistical model)',
|
|
74
|
+
inputSchema: {
|
|
75
|
+
type: 'object',
|
|
76
|
+
properties: {
|
|
77
|
+
corpus_name: {
|
|
78
|
+
type: 'string',
|
|
79
|
+
description: 'Name of analyzed corpus'
|
|
80
|
+
},
|
|
81
|
+
output_format: {
|
|
82
|
+
type: 'string',
|
|
83
|
+
enum: ['llm', 'human', 'both'],
|
|
84
|
+
description: 'Output format (default: both)',
|
|
85
|
+
default: 'both'
|
|
86
|
+
},
|
|
87
|
+
template: {
|
|
88
|
+
type: 'string',
|
|
89
|
+
enum: ['minimal', 'standard', 'comprehensive'],
|
|
90
|
+
description: 'Guide template (default: standard)',
|
|
91
|
+
default: 'standard'
|
|
92
|
+
}
|
|
93
|
+
},
|
|
94
|
+
required: ['corpus_name'],
|
|
95
|
+
},
|
|
96
|
+
},
|
|
97
|
+
{
|
|
98
|
+
name: 'generate_enhanced_guide',
|
|
99
|
+
description: 'Generate ENHANCED tone-of-voice guide integrating all n-gram patterns (character, word, POS) with function words and traditional metrics. Creates comprehensive LLM instruction set with contrastive examples.',
|
|
100
|
+
inputSchema: {
|
|
101
|
+
type: 'object',
|
|
102
|
+
properties: {
|
|
103
|
+
corpus_name: {
|
|
104
|
+
type: 'string',
|
|
105
|
+
description: 'Name of analyzed corpus'
|
|
106
|
+
},
|
|
107
|
+
output_format: {
|
|
108
|
+
type: 'string',
|
|
109
|
+
enum: ['llm', 'human', 'both'],
|
|
110
|
+
description: 'Output format (default: both)',
|
|
111
|
+
default: 'both'
|
|
112
|
+
}
|
|
113
|
+
},
|
|
114
|
+
required: ['corpus_name'],
|
|
115
|
+
},
|
|
116
|
+
},
|
|
117
|
+
],
|
|
118
|
+
}));
|
|
119
|
+
// Tool handlers
|
|
120
|
+
server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
121
|
+
try {
|
|
122
|
+
switch (request.params.name) {
|
|
123
|
+
case 'collect_corpus': {
|
|
124
|
+
const params = request.params.arguments;
|
|
125
|
+
const result = await collectCorpus(params);
|
|
126
|
+
return {
|
|
127
|
+
content: [
|
|
128
|
+
{
|
|
129
|
+
type: 'text',
|
|
130
|
+
text: JSON.stringify(result, null, 2)
|
|
131
|
+
}
|
|
132
|
+
]
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
case 'analyze_corpus': {
|
|
136
|
+
const params = request.params.arguments;
|
|
137
|
+
const result = await analyzeCorpus(params);
|
|
138
|
+
return {
|
|
139
|
+
content: [
|
|
140
|
+
{
|
|
141
|
+
type: 'text',
|
|
142
|
+
text: JSON.stringify(result, null, 2)
|
|
143
|
+
}
|
|
144
|
+
]
|
|
145
|
+
};
|
|
146
|
+
}
|
|
147
|
+
case 'generate_tov_guide': {
|
|
148
|
+
const params = request.params.arguments;
|
|
149
|
+
const result = await generateTovGuide(params);
|
|
150
|
+
return {
|
|
151
|
+
content: [
|
|
152
|
+
{
|
|
153
|
+
type: 'text',
|
|
154
|
+
text: JSON.stringify(result, null, 2)
|
|
155
|
+
}
|
|
156
|
+
]
|
|
157
|
+
};
|
|
158
|
+
}
|
|
159
|
+
case 'generate_enhanced_guide': {
|
|
160
|
+
const params = request.params.arguments;
|
|
161
|
+
const result = await generateEnhancedGuide(params);
|
|
162
|
+
return {
|
|
163
|
+
content: [
|
|
164
|
+
{
|
|
165
|
+
type: 'text',
|
|
166
|
+
text: JSON.stringify(result, null, 2)
|
|
167
|
+
}
|
|
168
|
+
]
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
default:
|
|
172
|
+
throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${request.params.name}`);
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
catch (error) {
|
|
176
|
+
if (error instanceof McpError) {
|
|
177
|
+
throw error;
|
|
178
|
+
}
|
|
179
|
+
throw new McpError(ErrorCode.InternalError, `Tool execution failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
180
|
+
}
|
|
181
|
+
});
|
|
182
|
+
// Start server
|
|
183
|
+
async function main() {
|
|
184
|
+
const transport = new StdioServerTransport();
|
|
185
|
+
await server.connect(transport);
|
|
186
|
+
}
|
|
187
|
+
main().catch((error) => {
|
|
188
|
+
process.exit(1);
|
|
189
|
+
});
|
|
190
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA;;;GAGG;AAEH,OAAO,EAAE,MAAM,EAAE,MAAM,2CAA2C,CAAC;AACnE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AACjF,OAAO,EACL,qBAAqB,EACrB,sBAAsB,EACtB,SAAS,EACT,QAAQ,EACT,MAAM,oCAAoC,CAAC;AAE5C,OAAO,EAAE,aAAa,EAAuB,MAAM,2BAA2B,CAAC;AAC/E,OAAO,EAAE,aAAa,EAAuB,MAAM,2BAA2B,CAAC;AAC/E,OAAO,EAAE,gBAAgB,EAA0B,MAAM,2BAA2B,CAAC;AACrF,OAAO,EAAE,qBAAqB,EAAuB,MAAM,oCAAoC,CAAC;AAEhG,MAAM,MAAM,GAAG,IAAI,MAAM,CACvB;IACE,IAAI,EAAE,uBAAuB;IAC7B,OAAO,EAAE,OAAO;CACjB,EACD;IACE,YAAY,EAAE;QACZ,KAAK,EAAE,EAAE;KACV;CACF,CACF,CAAC;AAEF,iBAAiB;AACjB,MAAM,CAAC,iBAAiB,CAAC,sBAAsB,EAAE,KAAK,IAAI,EAAE,CAAC,CAAC;IAC5D,KAAK,EAAE;QACL;YACE,IAAI,EAAE,gBAAgB;YACtB,WAAW,EAAE,wEAAwE;YACrF,WAAW,EAAE;gBACX,IAAI,EAAE,QAAQ;gBACd,UAAU,EAAE;oBACV,WAAW,EAAE;wBACX,IAAI,EAAE,QAAQ;wBACd,WAAW,EAAE,iEAAiE;qBAC/E;oBACD,WAAW,EAAE;wBACX,IAAI,EAAE,QAAQ;wBACd,WAAW,EAAE,iDAAiD;qBAC/D;oBACD,YAAY,EAAE;wBACZ,IAAI,EAAE,QAAQ;wBACd,WAAW,EAAE,4CAA4C;wBACzD,OAAO,EAAE,GAAG;qBACb;oBACD,eAAe,EAAE;wBACf,IAAI,EAAE,QAAQ;wBACd,WAAW,EAAE,+BAA+B;qBAC7C;iBACF;gBACD,QAAQ,EAAE,CAAC,aAAa,EAAE,aAAa,CAAC;aACzC;SACF;QACD;YACE,IAAI,EAAE,gBAAgB;YACtB,WAAW,EAAE,iGAAiG;YAC9G,WAAW,EAAE;gBACX,IAAI,EAAE,QAAQ;gBACd,UAAU,EAAE;oBACV,WAAW,EAAE;wBACX,IAAI,EAAE,QAAQ;wBACd,WAAW,EAAE,2BAA2B;qBACzC;oBACD,aAAa,EAAE;wBACb,IAAI,EAAE,QAAQ;wBACd,IAAI,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,YAAY,EAAE,QAAQ,CAAC;wBAC/C,WAAW,EAAE,6CAA6C;wBAC1D,OAAO,EAAE,MAAM;qBAChB;iBACF;gBACD,QAAQ,EAAE,CAAC,aAAa,CAAC;aAC1B;SACF;QACD;YACE,IAAI,EAAE,oBAAoB;YAC1B,WAAW,EAAE,sFAAsF;YACnG,WAAW,EAAE;gBACX,IAAI,EAAE,QAAQ;gBACd,UAAU,EAAE;oBACV,WAAW,EAAE;wBACX,IAAI,EAAE,QAAQ;wBACd,WAAW,EAAE,yBAAyB;qBACvC;oBACD,aAAa,EAAE;wBACb,IAAI,EAAE,QAAQ;wBACd,IAAI,EAAE,CAAC,KAAK,EAAE,OAAO,EAAE,MAAM,CAAC;wBAC9B,WAAW,EAAE,+BAA+B;wBAC5C,OAAO,EAAE,MAAM;qBAChB;oBACD,QAAQ,EAAE;wBACR,IAAI,EAAE,QAAQ;wBACd,IAAI,EAAE,CAAC,SAAS,EAAE,UAAU,EAAE,eAAe,CAAC;wBAC9C,WAAW,EAAE,oCAAoC;wBACjD,OAAO,EAAE,UAAU;qBACpB;iBACF;gBACD,QAAQ,EAAE,CAAC,aAAa,CAAC;aAC1B;SACF;QACD;YACE,IAAI,EAAE,yBAAyB;YAC/B,WAAW,EAAE,gNAAgN;YAC7N,WAAW,EAAE;gBACX,IAAI,EAAE,QAAQ;gBACd,UAAU,EAAE;oBACV,WAAW,EAAE;wBACX,IAAI,EAAE,QAAQ;wBACd,WAAW,EAAE,yBAAyB;qBACvC;oBACD,aAAa,EAAE;wBACb,IAAI,EAAE,QAAQ;wBACd,IAAI,EAAE,CAAC,KAAK,EAAE,OAAO,EAAE,MAAM,CAAC;wBAC9B,WAAW,EAAE,+BAA+B;wBAC5C,OAAO,EAAE,MAAM;qBAChB;iBACF;gBACD,QAAQ,EAAE,CAAC,aAAa,CAAC;aAC1B;SACF;KACF;CACF,CAAC,CAAC,CAAC;AAEJ,gBAAgB;AAChB,MAAM,CAAC,iBAAiB,CAAC,qBAAqB,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE;IAChE,IAAI,CAAC;QACH,QAAQ,OAAO,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;YAC5B,KAAK,gBAAgB,CAAC,CAAC,CAAC;gBACtB,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,SAA2C,CAAC;gBAC1E,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,MAAM,CAAC,CAAC;gBAC3C,OAAO;oBACL,OAAO,EAAE;wBACP;4BACE,IAAI,EAAE,MAAM;4BACZ,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;yBACtC;qBACF;iBACF,CAAC;YACJ,CAAC;YAED,KAAK,gBAAgB,CAAC,CAAC,CAAC;gBACtB,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,SAA2C,CAAC;gBAC1E,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,MAAM,CAAC,CAAC;gBAC3C,OAAO;oBACL,OAAO,EAAE;wBACP;4BACE,IAAI,EAAE,MAAM;4BACZ,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;yBACtC;qBACF;iBACF,CAAC;YACJ,CAAC;YAED,KAAK,oBAAoB,CAAC,CAAC,CAAC;gBAC1B,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,SAA8C,CAAC;gBAC7E,MAAM,MAAM,GAAG,MAAM,gBAAgB,CAAC,MAAM,CAAC,CAAC;gBAC9C,OAAO;oBACL,OAAO,EAAE;wBACP;4BACE,IAAI,EAAE,MAAM;4BACZ,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;yBACtC;qBACF;iBACF,CAAC;YACJ,CAAC;YAED,KAAK,yBAAyB,CAAC,CAAC,CAAC;gBAC/B,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,SAA2C,CAAC;gBAC1E,MAAM,MAAM,GAAG,MAAM,qBAAqB,CAAC,MAAM,CAAC,CAAC;gBACnD,OAAO;oBACL,OAAO,EAAE;wBACP;4BACE,IAAI,EAAE,MAAM;4BACZ,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;yBACtC;qBACF;iBACF,CAAC;YACJ,CAAC;YAED;gBACE,MAAM,IAAI,QAAQ,CAChB,SAAS,CAAC,cAAc,EACxB,iBAAiB,OAAO,CAAC,MAAM,CAAC,IAAI,EAAE,CACvC,CAAC;QACN,CAAC;IACH,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,QAAQ,EAAE,CAAC;YAC9B,MAAM,KAAK,CAAC;QACd,CAAC;QAED,MAAM,IAAI,QAAQ,CAChB,SAAS,CAAC,aAAa,EACvB,0BAA0B,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CACnF,CAAC;IACJ,CAAC;AACH,CAAC,CAAC,CAAC;AAEH,eAAe;AACf,KAAK,UAAU,IAAI;IACjB,MAAM,SAAS,GAAG,IAAI,oBAAoB,EAAE,CAAC;IAC7C,MAAM,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;AAClC,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;IACrB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Function word reference list for stylometric analysis
|
|
3
|
+
*
|
|
4
|
+
* Based on Mosteller-Wallace (1963), Burrows' Delta research
|
|
5
|
+
* 70 core function words optimized for authorship fingerprinting
|
|
6
|
+
*/
|
|
7
|
+
export type FunctionWordCategory = 'article' | 'determiner' | 'preposition' | 'conjunction' | 'modal' | 'auxiliary' | 'pronoun';
|
|
8
|
+
export interface FunctionWord {
|
|
9
|
+
word: string;
|
|
10
|
+
category: FunctionWordCategory;
|
|
11
|
+
tier: 1 | 2 | 3 | 4;
|
|
12
|
+
britishMarker?: boolean;
|
|
13
|
+
notes?: string;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Core 70 function words for stylometric analysis
|
|
17
|
+
*/
|
|
18
|
+
export declare const FUNCTION_WORDS: FunctionWord[];
|
|
19
|
+
/**
|
|
20
|
+
* Get function words by tier (discriminative priority)
|
|
21
|
+
*/
|
|
22
|
+
export declare function getFunctionWordsByTier(tier: 1 | 2 | 3 | 4): FunctionWord[];
|
|
23
|
+
/**
|
|
24
|
+
* Get function words by category
|
|
25
|
+
*/
|
|
26
|
+
export declare function getFunctionWordsByCategory(category: FunctionWordCategory): FunctionWord[];
|
|
27
|
+
/**
|
|
28
|
+
* Get British marker function words
|
|
29
|
+
*/
|
|
30
|
+
export declare function getBritishMarkers(): FunctionWord[];
|
|
31
|
+
/**
|
|
32
|
+
* Create lookup map for fast access
|
|
33
|
+
*/
|
|
34
|
+
export declare function getFunctionWordMap(): Map<string, FunctionWord>;
|
|
35
|
+
/**
|
|
36
|
+
* General English reference statistics for z-score calculation
|
|
37
|
+
*
|
|
38
|
+
* These are baseline frequencies (per 1000 words) from large English corpora
|
|
39
|
+
* Source: Brown Corpus, BNC (British National Corpus)
|
|
40
|
+
*
|
|
41
|
+
* Note: These should ideally be calculated from a reference corpus
|
|
42
|
+
* For now, using typical values from linguistic research
|
|
43
|
+
*/
|
|
44
|
+
export declare const GENERAL_ENGLISH_STATS: Record<string, {
|
|
45
|
+
mean: number;
|
|
46
|
+
stdDev: number;
|
|
47
|
+
}>;
|
|
48
|
+
//# sourceMappingURL=function-words.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"function-words.d.ts","sourceRoot":"","sources":["../../src/reference/function-words.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,MAAM,MAAM,oBAAoB,GAC5B,SAAS,GACT,YAAY,GACZ,aAAa,GACb,aAAa,GACb,OAAO,GACP,WAAW,GACX,SAAS,CAAC;AAEd,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,oBAAoB,CAAC;IAC/B,IAAI,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IACpB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,eAAO,MAAM,cAAc,EAAE,YAAY,EAmFxC,CAAC;AAEF;;GAEG;AACH,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,YAAY,EAAE,CAE1E;AAED;;GAEG;AACH,wBAAgB,0BAA0B,CAAC,QAAQ,EAAE,oBAAoB,GAAG,YAAY,EAAE,CAEzF;AAED;;GAEG;AACH,wBAAgB,iBAAiB,IAAI,YAAY,EAAE,CAElD;AAED;;GAEG;AACH,wBAAgB,kBAAkB,IAAI,GAAG,CAAC,MAAM,EAAE,YAAY,CAAC,CAM9D;AAED;;;;;;;;GAQG;AACH,eAAO,MAAM,qBAAqB,EAAE,MAAM,CAAC,MAAM,EAAE;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,CAwClF,CAAC"}
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Function word reference list for stylometric analysis
|
|
3
|
+
*
|
|
4
|
+
* Based on Mosteller-Wallace (1963), Burrows' Delta research
|
|
5
|
+
* 70 core function words optimized for authorship fingerprinting
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Core 70 function words for stylometric analysis
|
|
9
|
+
*/
|
|
10
|
+
export const FUNCTION_WORDS = [
|
|
11
|
+
// Articles & Determiners (12 words)
|
|
12
|
+
{ word: 'a', category: 'article', tier: 2 },
|
|
13
|
+
{ word: 'an', category: 'article', tier: 2 },
|
|
14
|
+
{ word: 'the', category: 'article', tier: 2 },
|
|
15
|
+
{ word: 'this', category: 'determiner', tier: 2 },
|
|
16
|
+
{ word: 'that', category: 'determiner', tier: 2 },
|
|
17
|
+
{ word: 'these', category: 'determiner', tier: 2 },
|
|
18
|
+
{ word: 'those', category: 'determiner', tier: 2 },
|
|
19
|
+
{ word: 'some', category: 'determiner', tier: 2 },
|
|
20
|
+
{ word: 'any', category: 'determiner', tier: 2 },
|
|
21
|
+
{ word: 'all', category: 'determiner', tier: 2 },
|
|
22
|
+
{ word: 'every', category: 'determiner', tier: 2 },
|
|
23
|
+
{ word: 'no', category: 'determiner', tier: 2 },
|
|
24
|
+
// Prepositions (16 words)
|
|
25
|
+
{ word: 'at', category: 'preposition', tier: 2 },
|
|
26
|
+
{ word: 'by', category: 'preposition', tier: 2 },
|
|
27
|
+
{ word: 'for', category: 'preposition', tier: 2 },
|
|
28
|
+
{ word: 'from', category: 'preposition', tier: 2 },
|
|
29
|
+
{ word: 'in', category: 'preposition', tier: 2 },
|
|
30
|
+
{ word: 'into', category: 'preposition', tier: 2 },
|
|
31
|
+
{ word: 'of', category: 'preposition', tier: 2 },
|
|
32
|
+
{ word: 'on', category: 'preposition', tier: 2 },
|
|
33
|
+
{ word: 'to', category: 'preposition', tier: 2 },
|
|
34
|
+
{ word: 'upon', category: 'preposition', tier: 1, notes: 'Highly discriminative' },
|
|
35
|
+
{ word: 'with', category: 'preposition', tier: 2 },
|
|
36
|
+
{ word: 'without', category: 'preposition', tier: 1 },
|
|
37
|
+
{ word: 'through', category: 'preposition', tier: 2 },
|
|
38
|
+
{ word: 'between', category: 'preposition', tier: 2 },
|
|
39
|
+
{ word: 'within', category: 'preposition', tier: 1 },
|
|
40
|
+
{ word: 'across', category: 'preposition', tier: 2 },
|
|
41
|
+
// Conjunctions (14 words)
|
|
42
|
+
{ word: 'and', category: 'conjunction', tier: 2 },
|
|
43
|
+
{ word: 'as', category: 'conjunction', tier: 2 },
|
|
44
|
+
{ word: 'but', category: 'conjunction', tier: 2 },
|
|
45
|
+
{ word: 'if', category: 'conjunction', tier: 2 },
|
|
46
|
+
{ word: 'or', category: 'conjunction', tier: 2 },
|
|
47
|
+
{ word: 'so', category: 'conjunction', tier: 2 },
|
|
48
|
+
{ word: 'than', category: 'conjunction', tier: 2 },
|
|
49
|
+
{ word: 'that', category: 'conjunction', tier: 2 },
|
|
50
|
+
{ word: 'though', category: 'conjunction', tier: 1, notes: 'Highly discriminative' },
|
|
51
|
+
{ word: 'when', category: 'conjunction', tier: 2 },
|
|
52
|
+
{ word: 'while', category: 'conjunction', tier: 2 },
|
|
53
|
+
{ word: 'whilst', category: 'conjunction', tier: 1, britishMarker: true, notes: 'British preference' },
|
|
54
|
+
{ word: 'because', category: 'conjunction', tier: 2 },
|
|
55
|
+
{ word: 'although', category: 'conjunction', tier: 2 },
|
|
56
|
+
// Modal Verbs (9 words)
|
|
57
|
+
{ word: 'can', category: 'modal', tier: 3 },
|
|
58
|
+
{ word: 'could', category: 'modal', tier: 3 },
|
|
59
|
+
{ word: 'may', category: 'modal', tier: 1, notes: 'Highly discriminative' },
|
|
60
|
+
{ word: 'might', category: 'modal', tier: 3 },
|
|
61
|
+
{ word: 'must', category: 'modal', tier: 1, notes: 'Highly discriminative' },
|
|
62
|
+
{ word: 'shall', category: 'modal', tier: 1, notes: 'Highly discriminative' },
|
|
63
|
+
{ word: 'should', category: 'modal', tier: 3 },
|
|
64
|
+
{ word: 'will', category: 'modal', tier: 3 },
|
|
65
|
+
{ word: 'would', category: 'modal', tier: 3 },
|
|
66
|
+
// Auxiliary Verbs (10 words)
|
|
67
|
+
{ word: 'be', category: 'auxiliary', tier: 3 },
|
|
68
|
+
{ word: 'been', category: 'auxiliary', tier: 3 },
|
|
69
|
+
{ word: 'being', category: 'auxiliary', tier: 3 },
|
|
70
|
+
{ word: 'do', category: 'auxiliary', tier: 3 },
|
|
71
|
+
{ word: 'does', category: 'auxiliary', tier: 3 },
|
|
72
|
+
{ word: 'had', category: 'auxiliary', tier: 3 },
|
|
73
|
+
{ word: 'has', category: 'auxiliary', tier: 3 },
|
|
74
|
+
{ word: 'have', category: 'auxiliary', tier: 3 },
|
|
75
|
+
{ word: 'is', category: 'auxiliary', tier: 3 },
|
|
76
|
+
{ word: 'was', category: 'auxiliary', tier: 3 },
|
|
77
|
+
{ word: 'were', category: 'auxiliary', tier: 3 },
|
|
78
|
+
// Pronouns (9 words - use with caution, genre-sensitive)
|
|
79
|
+
{ word: 'I', category: 'pronoun', tier: 4, notes: 'Genre-sensitive, track separately' },
|
|
80
|
+
{ word: 'we', category: 'pronoun', tier: 4, notes: 'Genre-sensitive' },
|
|
81
|
+
{ word: 'you', category: 'pronoun', tier: 4, notes: 'Genre-sensitive' },
|
|
82
|
+
{ word: 'he', category: 'pronoun', tier: 4, notes: 'Genre-sensitive' },
|
|
83
|
+
{ word: 'she', category: 'pronoun', tier: 4, notes: 'Genre-sensitive' },
|
|
84
|
+
{ word: 'it', category: 'pronoun', tier: 4, notes: 'Genre-sensitive' },
|
|
85
|
+
{ word: 'they', category: 'pronoun', tier: 4, notes: 'Genre-sensitive' },
|
|
86
|
+
{ word: 'one', category: 'pronoun', tier: 4, notes: 'Genre-sensitive' },
|
|
87
|
+
{ word: 'who', category: 'pronoun', tier: 4, notes: 'Genre-sensitive' },
|
|
88
|
+
];
|
|
89
|
+
/**
|
|
90
|
+
* Get function words by tier (discriminative priority)
|
|
91
|
+
*/
|
|
92
|
+
export function getFunctionWordsByTier(tier) {
|
|
93
|
+
return FUNCTION_WORDS.filter(fw => fw.tier === tier);
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Get function words by category
|
|
97
|
+
*/
|
|
98
|
+
export function getFunctionWordsByCategory(category) {
|
|
99
|
+
return FUNCTION_WORDS.filter(fw => fw.category === category);
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Get British marker function words
|
|
103
|
+
*/
|
|
104
|
+
export function getBritishMarkers() {
|
|
105
|
+
return FUNCTION_WORDS.filter(fw => fw.britishMarker);
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Create lookup map for fast access
|
|
109
|
+
*/
|
|
110
|
+
export function getFunctionWordMap() {
|
|
111
|
+
const map = new Map();
|
|
112
|
+
for (const fw of FUNCTION_WORDS) {
|
|
113
|
+
map.set(fw.word.toLowerCase(), fw);
|
|
114
|
+
}
|
|
115
|
+
return map;
|
|
116
|
+
}
|
|
117
|
+
/**
|
|
118
|
+
* General English reference statistics for z-score calculation
|
|
119
|
+
*
|
|
120
|
+
* These are baseline frequencies (per 1000 words) from large English corpora
|
|
121
|
+
* Source: Brown Corpus, BNC (British National Corpus)
|
|
122
|
+
*
|
|
123
|
+
* Note: These should ideally be calculated from a reference corpus
|
|
124
|
+
* For now, using typical values from linguistic research
|
|
125
|
+
*/
|
|
126
|
+
export const GENERAL_ENGLISH_STATS = {
|
|
127
|
+
// High-frequency words (very stable across corpora)
|
|
128
|
+
'the': { mean: 60.0, stdDev: 10.0 },
|
|
129
|
+
'of': { mean: 35.0, stdDev: 8.0 },
|
|
130
|
+
'and': { mean: 28.0, stdDev: 7.0 },
|
|
131
|
+
'a': { mean: 22.0, stdDev: 5.0 },
|
|
132
|
+
'to': { mean: 25.0, stdDev: 6.0 },
|
|
133
|
+
'in': { mean: 20.0, stdDev: 5.0 },
|
|
134
|
+
'is': { mean: 10.0, stdDev: 3.0 },
|
|
135
|
+
'that': { mean: 12.0, stdDev: 4.0 },
|
|
136
|
+
'for': { mean: 12.0, stdDev: 4.0 },
|
|
137
|
+
'it': { mean: 11.0, stdDev: 3.0 },
|
|
138
|
+
'with': { mean: 9.0, stdDev: 3.0 },
|
|
139
|
+
'as': { mean: 8.0, stdDev: 3.0 },
|
|
140
|
+
'was': { mean: 7.0, stdDev: 2.5 },
|
|
141
|
+
'on': { mean: 7.0, stdDev: 2.5 },
|
|
142
|
+
'be': { mean: 7.0, stdDev: 2.5 },
|
|
143
|
+
// Modal verbs
|
|
144
|
+
'can': { mean: 3.0, stdDev: 1.5 },
|
|
145
|
+
'would': { mean: 4.0, stdDev: 2.0 },
|
|
146
|
+
'will': { mean: 3.5, stdDev: 2.0 },
|
|
147
|
+
'could': { mean: 2.0, stdDev: 1.0 },
|
|
148
|
+
'should': { mean: 1.5, stdDev: 1.0 },
|
|
149
|
+
'may': { mean: 1.0, stdDev: 0.8 },
|
|
150
|
+
'might': { mean: 0.8, stdDev: 0.6 },
|
|
151
|
+
'must': { mean: 0.9, stdDev: 0.7 },
|
|
152
|
+
'shall': { mean: 0.2, stdDev: 0.3 },
|
|
153
|
+
// British markers
|
|
154
|
+
'whilst': { mean: 0.1, stdDev: 0.2 }, // Very rare in American English
|
|
155
|
+
'upon': { mean: 0.5, stdDev: 0.5 },
|
|
156
|
+
// Pronouns (highly variable by genre)
|
|
157
|
+
'I': { mean: 5.0, stdDev: 5.0 }, // Large variance
|
|
158
|
+
'we': { mean: 3.0, stdDev: 3.0 },
|
|
159
|
+
'you': { mean: 4.0, stdDev: 4.0 },
|
|
160
|
+
'he': { mean: 2.5, stdDev: 2.0 },
|
|
161
|
+
'she': { mean: 1.5, stdDev: 1.5 },
|
|
162
|
+
'they': { mean: 2.0, stdDev: 1.5 },
|
|
163
|
+
};
|
|
164
|
+
//# sourceMappingURL=function-words.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"function-words.js","sourceRoot":"","sources":["../../src/reference/function-words.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAmBH;;GAEG;AACH,MAAM,CAAC,MAAM,cAAc,GAAmB;IAC5C,oCAAoC;IACpC,EAAE,IAAI,EAAE,GAAG,EAAE,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,EAAE;IAC3C,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,EAAE;IAC5C,EAAE,IAAI,EAAE,KAAK,EAAE,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,EAAE;IAC7C,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC,EAAE;IACjD,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC,EAAE;IACjD,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC,EAAE;IAClD,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC,EAAE;IAClD,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC,EAAE;IACjD,EAAE,IAAI,EAAE,KAAK,EAAE,QAAQ,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC,EAAE;IAChD,EAAE,IAAI,EAAE,KAAK,EAAE,QAAQ,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC,EAAE;IAChD,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC,EAAE;IAClD,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC,EAAE;IAE/C,0BAA0B;IAC1B,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE;IAChD,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE;IAChD,EAAE,IAAI,EAAE,KAAK,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE;IACjD,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE;IAClD,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE;IAChD,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE;IAClD,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE;IAChD,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE;IAChD,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE;IAChD,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,uBAAuB,EAAE;IAClF,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE;IAClD,EAAE,IAAI,EAAE,SAAS,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE;IACrD,EAAE,IAAI,EAAE,SAAS,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE;IACrD,EAAE,IAAI,EAAE,SAAS,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE;IACrD,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE;IACpD,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE;IAEpD,0BAA0B;IAC1B,EAAE,IAAI,EAAE,KAAK,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE;IACjD,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE;IAChD,EAAE,IAAI,EAAE,KAAK,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE;IACjD,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE;IAChD,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE;IAChD,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE;IAChD,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE;IAClD,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE;IAClD,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,uBAAuB,EAAE;IACpF,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE;IAClD,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE;IACnD,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE,aAAa,EAAE,IAAI,EAAE,KAAK,EAAE,oBAAoB,EAAE;IACtG,EAAE,IAAI,EAAE,SAAS,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE;IACrD,EAAE,IAAI,EAAE,UAAU,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE;IAEtD,wBAAwB;IACxB,EAAE,IAAI,EAAE,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,EAAE;IAC3C,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,EAAE;IAC7C,EAAE,IAAI,EAAE,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,uBAAuB,EAAE;IAC3E,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,EAAE;IAC7C,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,uBAAuB,EAAE;IAC5E,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,uBAAuB,EAAE;IAC7E,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,EAAE;IAC9C,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,EAAE;IAC5C,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,EAAE;IAE7C,6BAA6B;IAC7B,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC,EAAE;IAC9C,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC,EAAE;IAChD,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC,EAAE;IACjD,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC,EAAE;IAC9C,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC,EAAE;IAChD,EAAE,IAAI,EAAE,KAAK,EAAE,QAAQ,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC,EAAE;IAC/C,EAAE,IAAI,EAAE,KAAK,EAAE,QAAQ,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC,EAAE;IAC/C,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC,EAAE;IAChD,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC,EAAE;IAC9C,EAAE,IAAI,EAAE,KAAK,EAAE,QAAQ,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC,EAAE;IAC/C,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC,EAAE;IAEhD,yDAAyD;IACzD,EAAE,IAAI,EAAE,GAAG,EAAE,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,mCAAmC,EAAE;IACvF,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,iBAAiB,EAAE;IACtE,EAAE,IAAI,EAAE,KAAK,EAAE,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,iBAAiB,EAAE;IACvE,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,iBAAiB,EAAE;IACtE,EAAE,IAAI,EAAE,KAAK,EAAE,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,iBAAiB,EAAE;IACvE,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,iBAAiB,EAAE;IACtE,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,iBAAiB,EAAE;IACxE,EAAE,IAAI,EAAE,KAAK,EAAE,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,iBAAiB,EAAE;IACvE,EAAE,IAAI,EAAE,KAAK,EAAE,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,iBAAiB,EAAE;CACxE,CAAC;AAEF;;GAEG;AACH,MAAM,UAAU,sBAAsB,CAAC,IAAmB;IACxD,OAAO,cAAc,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC;AACvD,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,0BAA0B,CAAC,QAA8B;IACvE,OAAO,cAAc,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,QAAQ,KAAK,QAAQ,CAAC,CAAC;AAC/D,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,iBAAiB;IAC/B,OAAO,cAAc,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,aAAa,CAAC,CAAC;AACvD,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,kBAAkB;IAChC,MAAM,GAAG,GAAG,IAAI,GAAG,EAAwB,CAAC;IAC5C,KAAK,MAAM,EAAE,IAAI,cAAc,EAAE,CAAC;QAChC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,WAAW,EAAE,EAAE,EAAE,CAAC,CAAC;IACrC,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,CAAC,MAAM,qBAAqB,GAAqD;IACrF,oDAAoD;IACpD,KAAK,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE;IACnC,IAAI,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE;IACjC,KAAK,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE;IAClC,GAAG,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE;IAChC,IAAI,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE;IACjC,IAAI,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE;IACjC,IAAI,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE;IACjC,MAAM,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE;IACnC,KAAK,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE;IAClC,IAAI,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE;IACjC,MAAM,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE;IAClC,IAAI,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE;IAChC,KAAK,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE;IACjC,IAAI,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE;IAChC,IAAI,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE;IAEhC,cAAc;IACd,KAAK,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE;IACjC,OAAO,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE;IACnC,MAAM,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE;IAClC,OAAO,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE;IACnC,QAAQ,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE;IACpC,KAAK,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE;IACjC,OAAO,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE;IACnC,MAAM,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE;IAClC,OAAO,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE;IAEnC,kBAAkB;IAClB,QAAQ,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,EAAG,gCAAgC;IACvE,MAAM,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE;IAElC,sCAAsC;IACtC,GAAG,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,EAAG,iBAAiB;IACnD,IAAI,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE;IAChC,KAAK,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE;IACjC,IAAI,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE;IAChC,KAAK,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE;IACjC,MAAM,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE;CACnC,CAAC"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tool: analyze_corpus
|
|
3
|
+
* Perform linguistic analysis on collected corpus
|
|
4
|
+
*/
|
|
5
|
+
export interface AnalyzeCorpusParams {
|
|
6
|
+
corpus_name: string;
|
|
7
|
+
analysis_type?: 'full' | 'quick' | 'vocabulary' | 'syntax';
|
|
8
|
+
}
|
|
9
|
+
export interface AnalyzeCorpusResult {
|
|
10
|
+
success: boolean;
|
|
11
|
+
corpus_name: string;
|
|
12
|
+
analysis_path: string;
|
|
13
|
+
}
|
|
14
|
+
export declare function analyzeCorpus(params: AnalyzeCorpusParams): Promise<AnalyzeCorpusResult>;
|
|
15
|
+
//# sourceMappingURL=analyze-corpus.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"analyze-corpus.d.ts","sourceRoot":"","sources":["../../src/tools/analyze-corpus.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAgBH,MAAM,WAAW,mBAAmB;IAClC,WAAW,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,MAAM,GAAG,OAAO,GAAG,YAAY,GAAG,QAAQ,CAAC;CAC5D;AAED,MAAM,WAAW,mBAAmB;IAClC,OAAO,EAAE,OAAO,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,aAAa,EAAE,MAAM,CAAC;CACvB;AAED,wBAAsB,aAAa,CAAC,MAAM,EAAE,mBAAmB,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAuL7F"}
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tool: analyze_corpus
|
|
3
|
+
* Perform linguistic analysis on collected corpus
|
|
4
|
+
*/
|
|
5
|
+
import fs from 'fs/promises';
|
|
6
|
+
import path from 'path';
|
|
7
|
+
import { analyzeVocabulary } from '../analyzers/vocabulary.js';
|
|
8
|
+
import { analyzeSentences } from '../analyzers/sentence.js';
|
|
9
|
+
import { analyzeVoiceMarkers } from '../analyzers/voice-markers.js';
|
|
10
|
+
import { analyzeParagraphs } from '../analyzers/paragraph.js';
|
|
11
|
+
import { analyzePunctuation, summarizePunctuation } from '../analyzers/punctuation.js';
|
|
12
|
+
import { analyzeFunctionWords, summarizeFunctionWordAnalysis } from '../analyzers/function-words.js';
|
|
13
|
+
import { analyzeCharNGrams, summarizeCharNGrams } from '../analyzers/char-ngrams.js';
|
|
14
|
+
import { analyzeWordNGrams, summarizeWordNGrams } from '../analyzers/word-ngrams.js';
|
|
15
|
+
import { analyzePOSNGrams, summarizePOSNGrams } from '../analyzers/pos-ngrams.js';
|
|
16
|
+
import { analyzeAntiMechanical } from '../analyzers/anti-mechanical.js';
|
|
17
|
+
import { analyzeInformationDensity, summarizeInformationDensity } from '../analyzers/information-density.js';
|
|
18
|
+
export async function analyzeCorpus(params) {
|
|
19
|
+
const { corpus_name, analysis_type = 'full' } = params;
|
|
20
|
+
const corpusDir = path.join(process.cwd(), 'corpus', corpus_name);
|
|
21
|
+
const articlesDir = path.join(corpusDir, 'articles');
|
|
22
|
+
const analysisDir = path.join(corpusDir, 'analysis');
|
|
23
|
+
// Create analysis directory
|
|
24
|
+
await fs.mkdir(analysisDir, { recursive: true });
|
|
25
|
+
// Read all article files
|
|
26
|
+
const files = await fs.readdir(articlesDir);
|
|
27
|
+
const markdownFiles = files.filter(f => f.endsWith('.md'));
|
|
28
|
+
let combinedText = '';
|
|
29
|
+
const articleCount = markdownFiles.length;
|
|
30
|
+
for (const file of markdownFiles) {
|
|
31
|
+
const content = await fs.readFile(path.join(articlesDir, file), 'utf-8');
|
|
32
|
+
// Remove frontmatter
|
|
33
|
+
const withoutFrontmatter = content.replace(/^---[\s\S]*?---\n\n/, '');
|
|
34
|
+
combinedText += withoutFrontmatter + '\n\n';
|
|
35
|
+
}
|
|
36
|
+
// Run analyses based on type
|
|
37
|
+
if (analysis_type === 'full' || analysis_type === 'vocabulary') {
|
|
38
|
+
const vocabAnalysis = analyzeVocabulary(combinedText);
|
|
39
|
+
await fs.writeFile(path.join(analysisDir, 'vocabulary.json'), JSON.stringify(vocabAnalysis, null, 2), 'utf-8');
|
|
40
|
+
}
|
|
41
|
+
if (analysis_type === 'full' || analysis_type === 'syntax') {
|
|
42
|
+
const sentenceAnalysis = analyzeSentences(combinedText);
|
|
43
|
+
await fs.writeFile(path.join(analysisDir, 'sentence.json'), JSON.stringify(sentenceAnalysis, null, 2), 'utf-8');
|
|
44
|
+
const punctuationAnalysis = analyzePunctuation(combinedText);
|
|
45
|
+
await fs.writeFile(path.join(analysisDir, 'punctuation.json'), JSON.stringify(punctuationAnalysis, null, 2), 'utf-8');
|
|
46
|
+
// Generate punctuation summary (includes AI detection for dash consistency)
|
|
47
|
+
const punctuationSummary = summarizePunctuation(punctuationAnalysis);
|
|
48
|
+
await fs.writeFile(path.join(analysisDir, 'punctuation-summary.md'), punctuationSummary, 'utf-8');
|
|
49
|
+
const paragraphAnalysis = analyzeParagraphs(combinedText);
|
|
50
|
+
await fs.writeFile(path.join(analysisDir, 'paragraph.json'), JSON.stringify(paragraphAnalysis, null, 2), 'utf-8');
|
|
51
|
+
}
|
|
52
|
+
if (analysis_type === 'full' || analysis_type === 'quick') {
|
|
53
|
+
const voiceAnalysis = analyzeVoiceMarkers(combinedText);
|
|
54
|
+
await fs.writeFile(path.join(analysisDir, 'voice.json'), JSON.stringify(voiceAnalysis, null, 2), 'utf-8');
|
|
55
|
+
// Function word analysis with z-scores
|
|
56
|
+
const functionWordAnalysis = analyzeFunctionWords(combinedText);
|
|
57
|
+
await fs.writeFile(path.join(analysisDir, 'function-words.json'), JSON.stringify(functionWordAnalysis, null, 2), 'utf-8');
|
|
58
|
+
// Generate human-readable summary
|
|
59
|
+
const functionWordSummary = summarizeFunctionWordAnalysis(functionWordAnalysis);
|
|
60
|
+
await fs.writeFile(path.join(analysisDir, 'function-words-summary.md'), functionWordSummary, 'utf-8');
|
|
61
|
+
// Character n-gram analysis
|
|
62
|
+
const charNGramAnalysis = analyzeCharNGrams(combinedText);
|
|
63
|
+
await fs.writeFile(path.join(analysisDir, 'char-ngrams.json'), JSON.stringify(charNGramAnalysis, null, 2), 'utf-8');
|
|
64
|
+
// Generate human-readable summary for char n-grams
|
|
65
|
+
const charNGramSummary = summarizeCharNGrams(charNGramAnalysis);
|
|
66
|
+
await fs.writeFile(path.join(analysisDir, 'char-ngrams-summary.md'), charNGramSummary, 'utf-8');
|
|
67
|
+
// Word n-gram analysis
|
|
68
|
+
const wordNGramAnalysis = analyzeWordNGrams(combinedText);
|
|
69
|
+
await fs.writeFile(path.join(analysisDir, 'word-ngrams.json'), JSON.stringify(wordNGramAnalysis, null, 2), 'utf-8');
|
|
70
|
+
// Generate human-readable summary for word n-grams
|
|
71
|
+
const wordNGramSummary = summarizeWordNGrams(wordNGramAnalysis);
|
|
72
|
+
await fs.writeFile(path.join(analysisDir, 'word-ngrams-summary.md'), wordNGramSummary, 'utf-8');
|
|
73
|
+
// POS n-gram analysis
|
|
74
|
+
const posNGramAnalysis = analyzePOSNGrams(combinedText);
|
|
75
|
+
await fs.writeFile(path.join(analysisDir, 'pos-ngrams.json'), JSON.stringify(posNGramAnalysis, null, 2), 'utf-8');
|
|
76
|
+
// Generate human-readable summary for POS n-grams
|
|
77
|
+
const posNGramSummary = summarizePOSNGrams(posNGramAnalysis);
|
|
78
|
+
await fs.writeFile(path.join(analysisDir, 'pos-ngrams-summary.md'), posNGramSummary, 'utf-8');
|
|
79
|
+
// Anti-mechanical analysis
|
|
80
|
+
const antiMechanicalAnalysis = analyzeAntiMechanical(combinedText);
|
|
81
|
+
await fs.writeFile(path.join(analysisDir, 'anti-mechanical.json'), JSON.stringify(antiMechanicalAnalysis, null, 2), 'utf-8');
|
|
82
|
+
// Generate anti-mechanical summary
|
|
83
|
+
const antiMechanicalSummary = generateAntiMechanicalSummary(antiMechanicalAnalysis);
|
|
84
|
+
await fs.writeFile(path.join(analysisDir, 'anti-mechanical-summary.md'), antiMechanicalSummary, 'utf-8');
|
|
85
|
+
// NEW: Information density analysis (based on Dejan AI research)
|
|
86
|
+
const informationDensityAnalysis = analyzeInformationDensity(combinedText, articleCount);
|
|
87
|
+
await fs.writeFile(path.join(analysisDir, 'information-density.json'), JSON.stringify(informationDensityAnalysis, null, 2), 'utf-8');
|
|
88
|
+
// Generate information density summary
|
|
89
|
+
const informationDensitySummary = summarizeInformationDensity(informationDensityAnalysis);
|
|
90
|
+
await fs.writeFile(path.join(analysisDir, 'information-density-summary.md'), informationDensitySummary, 'utf-8');
|
|
91
|
+
}
|
|
92
|
+
// Generate summary
|
|
93
|
+
const summary = await generateSummary(analysisDir);
|
|
94
|
+
await fs.writeFile(path.join(analysisDir, 'summary.md'), summary, 'utf-8');
|
|
95
|
+
return {
|
|
96
|
+
success: true,
|
|
97
|
+
corpus_name,
|
|
98
|
+
analysis_path: analysisDir
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
async function generateSummary(analysisDir) {
|
|
102
|
+
const files = await fs.readdir(analysisDir);
|
|
103
|
+
const jsonFiles = files.filter(f => f.endsWith('.json'));
|
|
104
|
+
let summary = '# Analysis Summary\n\n';
|
|
105
|
+
for (const file of jsonFiles) {
|
|
106
|
+
const content = await fs.readFile(path.join(analysisDir, file), 'utf-8');
|
|
107
|
+
const data = JSON.parse(content);
|
|
108
|
+
summary += `## ${file.replace('.json', '')}\n\n`;
|
|
109
|
+
summary += '```json\n';
|
|
110
|
+
summary += JSON.stringify(data, null, 2).substring(0, 500) + '...\n';
|
|
111
|
+
summary += '```\n\n';
|
|
112
|
+
}
|
|
113
|
+
return summary;
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Generate human-readable anti-mechanical analysis summary
|
|
117
|
+
*/
|
|
118
|
+
function generateAntiMechanicalSummary(analysis) {
|
|
119
|
+
const lines = [];
|
|
120
|
+
lines.push('# Anti-Mechanical Analysis Summary');
|
|
121
|
+
lines.push('');
|
|
122
|
+
lines.push('*Evaluates writing naturalness vs robotic/AI patterns*');
|
|
123
|
+
lines.push('');
|
|
124
|
+
// Overall score
|
|
125
|
+
lines.push('## Overall Naturalness Score');
|
|
126
|
+
lines.push('');
|
|
127
|
+
lines.push(`**Total Score:** ${analysis.naturalness.totalScore}/100 (${analysis.naturalness.interpretation.replace('_', ' ')})`);
|
|
128
|
+
lines.push('');
|
|
129
|
+
lines.push('| Component | Score | Max |');
|
|
130
|
+
lines.push('|-----------|-------|-----|');
|
|
131
|
+
lines.push(`| Sentence Variation | ${analysis.naturalness.sentenceVariationScore} | 25 |`);
|
|
132
|
+
lines.push(`| Paragraph Variation | ${analysis.naturalness.paragraphVariationScore} | 25 |`);
|
|
133
|
+
lines.push(`| First-Person Distribution | ${analysis.naturalness.firstPersonScore} | 25 |`);
|
|
134
|
+
lines.push(`| Repetition Avoidance | ${analysis.naturalness.repetitionScore} | 25 |`);
|
|
135
|
+
lines.push('');
|
|
136
|
+
// Sentence variation
|
|
137
|
+
lines.push('## Sentence Length Variation');
|
|
138
|
+
lines.push('');
|
|
139
|
+
lines.push(`- **Mean length:** ${analysis.sentenceLengthVariation.mean.toFixed(1)} words`);
|
|
140
|
+
lines.push(`- **Standard deviation:** ±${analysis.sentenceLengthVariation.stdDev.toFixed(1)}`);
|
|
141
|
+
lines.push(`- **Coefficient of variation:** ${analysis.sentenceLengthVariation.coefficientOfVariation.toFixed(2)}`);
|
|
142
|
+
lines.push(`- **Natural variation:** ${analysis.sentenceLengthVariation.hasNaturalVariation ? 'Yes (CV > 0.5)' : 'No (too uniform)'}`);
|
|
143
|
+
lines.push('');
|
|
144
|
+
lines.push('**Length Distribution:**');
|
|
145
|
+
lines.push(`- Short (1-8 words): ${analysis.sentenceLengthVariation.distribution.short}`);
|
|
146
|
+
lines.push(`- Medium (9-20 words): ${analysis.sentenceLengthVariation.distribution.medium}`);
|
|
147
|
+
lines.push(`- Long (21-40 words): ${analysis.sentenceLengthVariation.distribution.long}`);
|
|
148
|
+
lines.push(`- Very long (40+ words): ${analysis.sentenceLengthVariation.distribution.veryLong}`);
|
|
149
|
+
lines.push('');
|
|
150
|
+
// Paragraph asymmetry
|
|
151
|
+
lines.push('## Paragraph Asymmetry');
|
|
152
|
+
lines.push('');
|
|
153
|
+
lines.push(`- **Mean sentences per paragraph:** ${analysis.paragraphAsymmetry.meanSentences.toFixed(1)}`);
|
|
154
|
+
lines.push(`- **Standard deviation:** ±${analysis.paragraphAsymmetry.stdDev.toFixed(1)}`);
|
|
155
|
+
lines.push(`- **Single-sentence paragraphs:** ${analysis.paragraphAsymmetry.singleSentenceParagraphs}`);
|
|
156
|
+
lines.push(`- **Long paragraphs (5+):** ${analysis.paragraphAsymmetry.longParagraphs}`);
|
|
157
|
+
lines.push('');
|
|
158
|
+
// First-person distribution
|
|
159
|
+
lines.push('## First-Person Distribution');
|
|
160
|
+
lines.push('');
|
|
161
|
+
lines.push(`- **Total first-person instances:** ${analysis.firstPersonDistribution.totalCount}`);
|
|
162
|
+
lines.push(`- **Sentence-start instances:** ${analysis.firstPersonDistribution.sentenceStartCount}`);
|
|
163
|
+
lines.push(`- **Sentence-start ratio:** ${(analysis.firstPersonDistribution.sentenceStartRatio * 100).toFixed(1)}%`);
|
|
164
|
+
lines.push(`- **Max consecutive "I" starts:** ${analysis.firstPersonDistribution.consecutiveIStart}`);
|
|
165
|
+
lines.push(`- **Balanced distribution:** ${analysis.firstPersonDistribution.isBalanced ? 'Yes' : 'No (too many sentence starts)'}`);
|
|
166
|
+
lines.push('');
|
|
167
|
+
// Repetitive starts
|
|
168
|
+
lines.push('## Repetitive Starts');
|
|
169
|
+
lines.push('');
|
|
170
|
+
lines.push(`- **Max consecutive same-start:** ${analysis.repetitiveStarts.maxConsecutiveSameStart}`);
|
|
171
|
+
lines.push(`- **Has repetition problem:** ${analysis.repetitiveStarts.hasRepetitionProblem ? 'Yes' : 'No'}`);
|
|
172
|
+
if (analysis.repetitiveStarts.problematicPatterns.length > 0) {
|
|
173
|
+
lines.push(`- **Problematic patterns:** ${analysis.repetitiveStarts.problematicPatterns.join(', ')}`);
|
|
174
|
+
}
|
|
175
|
+
lines.push('');
|
|
176
|
+
// Interpretation guide
|
|
177
|
+
lines.push('## Interpretation Guide');
|
|
178
|
+
lines.push('');
|
|
179
|
+
lines.push('| Score Range | Interpretation |');
|
|
180
|
+
lines.push('|-------------|----------------|');
|
|
181
|
+
lines.push('| 85-100 | Very natural - authentic human writing |');
|
|
182
|
+
lines.push('| 65-84 | Natural - good variation |');
|
|
183
|
+
lines.push('| 45-64 | Somewhat mechanical - needs more variation |');
|
|
184
|
+
lines.push('| 0-44 | Mechanical - likely AI-generated or very formulaic |');
|
|
185
|
+
lines.push('');
|
|
186
|
+
return lines.join('\n');
|
|
187
|
+
}
|
|
188
|
+
//# sourceMappingURL=analyze-corpus.js.map
|