@holdyourvoice/hyv 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2013 @@
1
+ #!/usr/bin/env python3
2
+ #!/usr/bin/env python3
3
+ """Portable Hold Your Voice helpers.
4
+
5
+ This script intentionally has no third-party dependencies. It is not the Hold
6
+ Your Voice product backend; it is the reusable local layer for Codex projects:
7
+ build a sample-grounded profile, scan for AI cadence, and generate line-level
8
+ rewrite prompts.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import argparse
14
+ import datetime
15
+ import html
16
+ import json
17
+ import math
18
+ import re
19
+ import sys
20
+ from pathlib import Path
21
+ from typing import Any
22
+
23
+
24
+ TEXT_EXTENSIONS = {
25
+ ".md",
26
+ ".mdx",
27
+ ".txt",
28
+ ".html",
29
+ ".htm",
30
+ ".rst",
31
+ ".adoc",
32
+ ".csv",
33
+ }
34
+
35
+ AI_PATTERN_RULES = [
36
+ # --- Binary reframing & negation ---
37
+ ("binary_reframing", re.compile(
38
+ r"\b(?:it'?s|that'?s|this\s+(?:is|was)|here'?s)\s+not\b.{0,80}\b(?:it'?s|that'?s|but)\b|"
39
+ r"\b(?:the\s+)?(?:hard\s+)?(?:part|point)\s+isn'?t\b.{0,80}\b(?:it'?s|but)\b|"
40
+ r"\byou\s+don'?t\s+need\b.{0,80}\byou\s+need\b|"
41
+ r"\b(?:brand|trust|strategy|marketing|pricing|success|growth|content|design)\s+is\s+not\s+(?:just\s+)?about\b",
42
+ re.I,
43
+ )),
44
+ ("not_just_but", re.compile(
45
+ r"\bnot\s+just\b.{3,80}\bbut\s+(?:also\s+)?|"
46
+ r"\bnot\s+only\b.{3,80}\bbut\s+(?:also\s+)?",
47
+ re.I,
48
+ )),
49
+ ("more_than_just", re.compile(
50
+ r"\bmore\s+than\s+just\b|\bit'?s\s+not\s+just\s+about\b",
51
+ re.I,
52
+ )),
53
+
54
+ # --- Truth/reality posturing ---
55
+ ("truth_harsh_reality", re.compile(
56
+ r"\b(?:the\s+)?(?:uncomfortable|hard|harsh|brutal|ugly|unsexy|real|honest)\s+(?:truth|reality)\b|"
57
+ r"\bthe\s+truth\s+is\b|\bthe\s+reality\s+is\b|\bhere'?s\s+the\s+truth\b|"
58
+ r"\bthe\s+ugly\s+truth\b|\bthe\s+harsh\s+reality\b|"
59
+ r"\b(?:brutal\s+honesty|real\s+talk)\b|\breality\s+check:",
60
+ re.I,
61
+ )),
62
+
63
+ # --- Staccato drama & performance cadence ---
64
+ ("staccato_drama", re.compile(
65
+ r"\b(?:no|not)\s+\w[^.!?\n]{0,40}[.!?]\s*(?:no|not)\s+\w[^.!?\n]{0,40}[.!?]\s*(?:no|not|just)\s+\w",
66
+ re.I,
67
+ )),
68
+ ("founder_cadence", re.compile(
69
+ r"\b(?:the\s+)?moment\b.{3,80}\bbecomes?\b|"
70
+ r"\b(?:here'?s|here\s+is)\s+(?:the\s+)?(?:thing|kicker|part\s+most\s+people\s+miss|what\s+nobody'?s\s+saying)\b|"
71
+ r"\b(?:and|but)\s+honestly\?|\bhere'?s\s+the\s+kicker\b|"
72
+ r"\bwhat\s+nobody'?s\s+(?:saying|talking\s+about)\b|"
73
+ r"\bthe\s+part\s+most\s+people\s+miss\b|"
74
+ r"\bthe\s+best\s+part\?|\bthe\s+kicker\?|"
75
+ r"\bsame\s+[^.!?\n]{1,35}[.!?]\s*(?:better|nicer|cleaner|calmer|safer)\s+[^.!?\n]{1,35}[.!?]?",
76
+ re.I,
77
+ )),
78
+ ("restatement_polish", re.compile(
79
+ r"\bwhich\s+is\s+another\s+way\s+of\s+saying\b|"
80
+ r"\bin\s+other\s+words\b|"
81
+ r"\bto\s+put\s+it\s+(?:simply|another\s+way)\b|"
82
+ r"\bin\s+a\s+nutshell\b",
83
+ re.I,
84
+ )),
85
+ ("spoiler_reveal", re.compile(
86
+ r"\bspoiler(?:\s+alert)?:\s*it'?s\s+not\b|"
87
+ r"\b(?:and|but)\s+here'?s\s+the\s+(?:truth|reality)\b",
88
+ re.I,
89
+ )),
90
+
91
+ # --- Landscape / era / temporal grandstanding ---
92
+ ("landscape_era", re.compile(
93
+ r"\b(?:in\s+)?(?:today'?s\s+)?(?:fast.paced|ever.evolving|ever.changing|digital)\s+(?:world|age|era|landscape)\b|"
94
+ r"\b(?:ever.evolving|ever.increasing|constantly\s+growing|increasingly)\s+(?:landscape|world)\b|"
95
+ r"\bin\s+today'?s\s+world\b|\bin\s+the\s+digital\s+age\b|"
96
+ r"\bin\s+this\s+era\s+of\b|"
97
+ r"\bnow\s+more\s+than\s+ever\b|"
98
+ r"\b(?:as|like)\s+never\s+before\b|"
99
+ r"\bhas\s+never\s+been\s+more\s+important\b|"
100
+ r"\bthe\s+rise\s+of\s+(?:the\s+)?\w+\b|"
101
+ r"\b(?:a|the)\s+new\s+era\s+of\b|"
102
+ r"\b(?:has\s+been\s+around\s+for\s+centuries|since\s+the\s+dawn\s+of\s+time)\b",
103
+ re.I,
104
+ )),
105
+
106
+ # --- Formulaic connectors & transitions ---
107
+ ("formulaic_connector", re.compile(
108
+ r"\b(?:firstly|secondly|thirdly|lastly|moreover|furthermore|in\s+addition\b(?:\s*,\s*|$)|"
109
+ r"in\s+conclusion|to\s+summarize|to\s+sum\s+up|to\s+recap|in\s+summary\b(?:\s*,\s*|$)|"
110
+ r"it\s+is\s+important\s+to\s+note|it'?s\s+important\s+to\s+note|it\s+should\s+be\s+noted|"
111
+ r"it'?s\s+worth\s+noting\s+that|it'?s\s+important\s+to\s+remember\b|"
112
+ r"however\s*,\s*it'?s\s+important\s+to\s+remember|"
113
+ r"keep\s+in\s+mind\s+that|remember\s+that\b|"
114
+ r"on\s+top\s+of\s+that\b)",
115
+ re.I,
116
+ )),
117
+
118
+ # --- Transitions: balance & contrast ---
119
+ ("balanced_contrast", re.compile(
120
+ r"\bon\s+the\s+one\s+hand\b|"
121
+ r"\bon\s+the\s+other\s+hand\b|"
122
+ r"\bon\s+the\s+surface\b.{0,80}\b(?:but\s+)?beneath\b|"
123
+ r"\bat\s+first\s+glance\b|"
124
+ r"\bon\s+the\s+flip\s+side\b|"
125
+ r"\bat\s+first\s*,\s*it\s+might\s+seem\b|"
126
+ r"\bon\s+paper\b.{0,80}\bin\s+practice\b|"
127
+ r"\bwhether\s+you\s+(?:love\s+it\s+or\s+hate\s+it|realize\s+it\s+or\s+not)\b|"
128
+ r"\blike\s+it\s+or\s+not\b|"
129
+ r"\bready\s+or\s+not\b",
130
+ re.I,
131
+ )),
132
+
133
+ # --- Hedging & non-committal ---
134
+ ("hedging_noncommittal", re.compile(
135
+ r"\bit\s+depends\b.{0,60}\bbut\b|"
136
+ r"\bno\s+one.size.fits.all\b|"
137
+ r"\btailor\s+(?:it|this|these|them)\b.{0,40}\bto\s+(?:your|the)\s+(?:needs|context|audience)\b|"
138
+ r"\balways\s+tailor\b|"
139
+ r"\b(?:in\s+many\s+ways|from\s+a\s+broader\s+perspective)\b|"
140
+ r"\bin\s+the\s+context\s+of\b|"
141
+ r"\b(?:chances\s+are|more\s+often\s+than\s+not|at\s+first\s*,\s*it\s+might\s+seem)\b|"
142
+ r"\bit\s+can\s+be\s+tempting\s+to\b|\byou\s+might\s+be\s+tempted\s+to\b|"
143
+ r"\bonly\s+time\s+will\s+tell\b|"
144
+ r"\bboth\s+sides\s+have\s+valid\s+points\b|"
145
+ r"\bthat\s+said\b",
146
+ re.I,
147
+ )),
148
+
149
+ # --- Let's/X invitation ---
150
+ ("lets_invitation", re.compile(
151
+ r"\blet'?s\s+(?:dive|explore|break\s+(?:it|this)\s+down|delve|be\s+honest)\b|"
152
+ r"\b(?:dive|delv(?:e|ing))\s+(?:deeper|into|the\s+intricacies)\b|"
153
+ r"\bdeep\s+dive\b|\blet'?s\s+dive\s+in\b",
154
+ re.I,
155
+ )),
156
+
157
+ # --- Empathy/validation openers ---
158
+ ("empathy_opener", re.compile(
159
+ r"\bit'?s\s+easy\s+to\s+feel\b|\byou'?re\s+not\s+alone\b|"
160
+ r"\bif\s+you'?ve\s+ever\s+felt\b|\byou'?re\s+not\s+imagining\s+it\b|"
161
+ r"\byou'?re\s+not\s+wrong\s+to\s+feel\b|"
162
+ r"\byou\s+deserve\b|\bfear\s+not\b|"
163
+ r"\bshouting\s+into\s+the\s+void\b|"
164
+ r"\bcurious\s+what\s+others\s+think\b",
165
+ re.I,
166
+ )),
167
+
168
+ # --- Journey / destination clichés ---
169
+ ("journey_cliche", re.compile(
170
+ r"\b(?:brand|learning|success|life|growth|writing|fitness|business)\s+isn'?t\s+a\s+destination\b.{0,40}\bjourney\b|"
171
+ r"\bit'?s\s+a\s+journey\b.{0,50}\bnot\s+a\s+destination\b|"
172
+ r"\bno\s+matter\s+where\s+you\s+are\s+on\s+your\s+journey\b|"
173
+ r"\bembark\s+on\s+(?:a|your|the|this)\b|"
174
+ r"\byou'?re\s+still\s+early\b|\bit'?s\s+still\s+day\s+one\b|"
175
+ r"\bfrom\s+(?:confusion\s+to\s+clarity|followers\s+to\s+fans|ideas\s+to\s+income)\b|"
176
+ r"\b(?:brand.building|writing|creative|learning)\s+journey\b",
177
+ re.I,
178
+ )),
179
+
180
+ # --- Marketing/inflated verbs ---
181
+ ("inflated_verbs", re.compile(
182
+ r"\b(?:unlock|harness|leverage)\s+the\s+power\s+of\b|"
183
+ r"\b(?:unlock|unleash)\s+(?:the\s+)?(?:potential|power)\b|"
184
+ r"\b(?:supercharge|turbocharge|revolutionize\s+the\s+way)\b|"
185
+ r"\b(?:transform|elevate|enhance|boost|improve)\s+your\s+\w+\b|"
186
+ r"\btake\s+(?:your|it|this|them|their)\b.{0,30}\bto\s+(?:the\s+next\s+level|new\s+heights)\b|"
187
+ r"\b(?:game.changer|on\s+steroids)\b|"
188
+ r"\bmaster\s+the\s+art\s+of\b|"
189
+ r"\bdiscover\s+a\s+powerful\s+way\b",
190
+ re.I,
191
+ )),
192
+
193
+ # --- Metaphor clusters ---
194
+ ("ai_metaphors", re.compile(
195
+ r"\b(?:beacon|lighthouse)\s+(?:of|for|in)\b|"
196
+ r"\b(?:tapestry|symphony|tides)\s+of\b|"
197
+ r"\b(?:flood|avalanche|tsunami)\s+of\b|"
198
+ r"\b(?:noise|signal)\b.{0,30}\b(?:signal|noise)\b|"
199
+ r"\b(?:north\s+star|double.edged\s+sword|blessing\s+and\s+a\s+curse)\b|"
200
+ r"\b(?:silent\s+killer|hidden\s+gem|hidden\s+lever|low.hanging\s+fruit)\b|"
201
+ r"\b(?:tip\s+of\s+the\s+iceberg|scratch(?:es)?\s+the\s+surface)\b|"
202
+ r"\b(?:skeleton|framework|scaffolding|blueprint|roadmap|playbook)\b\s+(?:for|to|that|as)\b|"
203
+ r"\b(?:wealth\s+of|treasure\s+trove)\b|"
204
+ r"\b(?:the\s+)?power\s+of\b.{0,40}\b(?:cannot|should\s+not|is\s+immense|is\s+real|is\s+undeniable)\b",
205
+ re.I,
206
+ )),
207
+
208
+ # --- Inflated importance claims ---
209
+ ("inflated_importance", re.compile(
210
+ r"\b(?:crucial|critical|pivotal)\s+role\b|"
211
+ r"\b(?:a\s+testament\s+to|the\s+results\s+speak\s+for\s+themselves)\b|"
212
+ r"\b(?:remarkably|incredibly|highly)\s+\w+\b|"
213
+ r"\b(?:significant\s+milestone|at\s+scale)\b|"
214
+ r"\b(?:at\s+its\s+finest|at\s+the\s+heart\s+of)\b|"
215
+ r"\b(?:the\s+power\s+of\b.{0,40}\b(?:cannot|should\s+not)\b)|"
216
+ r"\b(?:championing|advocating\s+for)\b.{0,40}\b(?:change|reform|transparency)\b",
217
+ re.I,
218
+ )),
219
+
220
+ # --- Audience-inclusion triads ---
221
+ ("audience_triad", re.compile(
222
+ r"\bwhether\s+you'?re\s+(?:a\s+)?\w+(?:\s+\w+)?\s*,\s*(?:a\s+)?\w+(?:\s+\w+)?\s*,\s*(?:or|and)\s+(?:a\s+)?\w+\b|"
223
+ r"\bfrom\s+(?:solo\s+)?(?:tiny\s+)?\w+\s+to\s+(?:large\s+)?(?:global\s+)?\w+\s*,\s*everyone\s+\w+\b|"
224
+ r"\bwhether\s+you'?re\s+(?:a\s+)?beginner\b|"
225
+ r"\bno\s+matter\s+where\s+you\s+are\b",
226
+ re.I,
227
+ )),
228
+
229
+ # --- SEO / guide framing ---
230
+ ("guide_framing", re.compile(
231
+ r"\byou'?re\s+in\s+the\s+right\s+place\b|"
232
+ r"\bhere'?s\s+a\s+step.by.step\s+guide\b|"
233
+ r"\b(?:step\s+1|step\s+2|step\s+3)\b|"
234
+ r"\b(?:first\s*,\s*second\s*,\s*third)\b|"
235
+ r"\bkey\s+(?:takeaways?|insights?)\b|"
236
+ r"\bactionable\s+tips?\b|"
237
+ r"\bno\s+fluff\b|\bno.nonsense\b",
238
+ re.I,
239
+ )),
240
+
241
+ # --- Wrapping/closing patterns ---
242
+ ("wrapping_patterns", re.compile(
243
+ r"\b(?:ultimately|at\s+the\s+end\s+of\s+the\s+day|the\s+bottom\s+line\s+is|"
244
+ r"it\s+all\s+comes\s+down\s+to)\b|"
245
+ r"\b(?:best.case\s+scenario|worst.case\s+scenario)\b|"
246
+ r"\b(?:the\s+good\s+news\s+is|the\s+bad\s+news\s+is)\b|"
247
+ r"\blet\s+that\s+sink\s+in\b|\bif\s+you\s+think\s+about\s+it\b|"
248
+ r"\bthe\s+stakes\s+are\s+high\b|"
249
+ r"\b(?:before\s+you\s+know\s+it|in\s+the\s+blink\s+of\s+an\s+eye)\b|"
250
+ r"\b(?:more\s+often\s+than\s+you\s+think|you\s+won'?t\s+believe)\b|"
251
+ r"\b(?:happens|occur|churn|happen|changes?)\s+(?:faster|quicker|sooner)\s+than\s+you\s+think\b",
252
+ re.I,
253
+ )),
254
+
255
+ # --- Temporal / trend clichés ---
256
+ ("trend_cliches", re.compile(
257
+ r"\b(?:attention|trust|retention|data)\s+is\s+the\s+new\s+(?:currency|growth\s+hack|acquisition|oil)\b|"
258
+ r"\bthe\s+best\s+time\s+(?:to\s+\w+|was)\b.{0,80}\b(?:second.best|is\s+now)\b|"
259
+ r"\b(?:low\s+barrier|high\s+leverage)\b|"
260
+ r"\b(?:quick\s+wins?|silver\s+bullet)\b|"
261
+ r"\bstart\s+small\s+and\s+iterate\b|"
262
+ r"\b(?:from\s+\w+\s+to\s+\w+\s*[,:]\s*)\b",
263
+ re.I,
264
+ )),
265
+
266
+ # --- Pain points & problem framing ---
267
+ ("pain_points_framing", re.compile(
268
+ r"\bpain\s+points?\b(?!\s+of)|\baddress\s+(?:the|their|your)\s+pain\s+points\b|"
269
+ r"\bspeak\s+(?:directly\s+)?to\s+(?:their|your)\s+pain\s+points\b",
270
+ re.I,
271
+ )),
272
+
273
+ # --- Overly structured / meta patterns ---
274
+ ("meta_structuring", re.compile(
275
+ r"\b(?:in\s+this\s+(?:article|guide|post|piece)|this\s+(?:article|guide|post|piece)\s+(?:explores|will\s+explore|discusses))\b|"
276
+ r"\b(?:this\s+essay\s+will\s+discuss|in\s+conclusion\s*,\s*this\s+essay)\b|"
277
+ r"\blet\s+me\s+know\s+if\s+you\s+need\s+(?:any|more)\s+help\b|"
278
+ r"\bfeel\s+free\s+to\s+ask\b|"
279
+ r"\b(?:if\s+you\s+have\s+follow.up\s+questions|i'?m\s+here\s+to\s+help)\b",
280
+ re.I,
281
+ )),
282
+
283
+ # --- Experience / friction words ---
284
+ ("ux_buzzwords", re.compile(
285
+ r"\b(?:seamless(?:\s+experience|\s+journey)?|frictionless(?:\s+journey|\s+experience)?|"
286
+ r"holistic\b(?:\s+\w+)?|comprehensive\b(?:\s+\w+)?|innovative\b(?:\s+\w+)?|"
287
+ r"cutting.edge|state.of.the.art|"
288
+ r"robust(?:\s+\w+)?|scalable(?:\s+\w+)?|best.in.class)\b",
289
+ re.I,
290
+ )),
291
+
292
+ # --- Story / narrative templates ---
293
+ ("story_templates", re.compile(
294
+ r"\b(?:little\s+did\s+(?:i|we)\s+know|"
295
+ r"at\s+first\s*,\s*i\s+was\s+skeptical\b.{0,80}\bbut\b|"
296
+ r"imagine\s+this|picture\s+this|"
297
+ r"you\s+wake\s+up\s+to\b)",
298
+ re.I,
299
+ )),
300
+
301
+ # --- Specifically AI-vocab density words ---
302
+ ("ai_vocab_density", re.compile(
303
+ r"\b(?:delve|underscore|testament|intricate|multifaceted|cornerstone|landscape|"
304
+ r"foster|harness|tapestry|illuminate|pivotal|elevate|empower|"
305
+ r"seamlessly|revolutionize|supercharge|transformative|holistic|comprehensive|"
306
+ r"innovative|impactful|meaningful|utilize|paradigm|navigate|endeavor|realm|"
307
+ r"profound|encapsulate|synergy|robust|facilitate|bolster|streamline|"
308
+ r"differentiate|myriad|transform|vibrant|dynamic|bustling|ecosystem|"
309
+ r"ever.increasing|constantly\s+growing|increasingly|"
310
+ r"unlock|unleash|(?:re)?imagin(?:e|ing)|curate|iterate|optimize|"
311
+ r"amplify|align|drive\s+\w+|foster|cultivate|shed\s+light\s+on|"
312
+ r"quietly|silently|behind\s+every\b|"
313
+ r"not\s+all\s+\w+\s+are\s+created\s+equal|"
314
+ r"there'?s\s+a\s+fine\s+line\s+between|"
315
+ r"the\s+line\s+between\b.{0,40}\bis\s+blurry\b|"
316
+ r"you\s+don'?t\s+have\s+to\b.{0,40}\b(?:to|you\s+can)\b|"
317
+ r"champion(?:ing|s|ed)\b|advocat(?:ing|e[ds]?)\s+for\b|"
318
+ r"more\s+often\s+than\s+you\s+think\b)",
319
+ re.I,
320
+ )),
321
+
322
+ # --- Em dash (typographic tell) ---
323
+ ("em_dash", re.compile(r"\u2014")),
324
+
325
+ # --- Buyer psychology templates ---
326
+ ("buyer_psychology", re.compile(
327
+ r"\bpeople\s+don'?t\s+(?:just\s+)?buy\b.{0,60}\bthey\s+buy\b|"
328
+ r"\bpeople\s+buy\s+the\s+feeling\b|"
329
+ r"\bpeople\s+don'?t\s+read\b.{0,40}\bthey\s+skim\b|"
330
+ r"\b(?:it'?s\s+not\s+about|people\s+don'?t\s+care\s+about)\s+your\s+product\b",
331
+ re.I,
332
+ )),
333
+
334
+ # --- The X of Y metaphoric positioning ---
335
+ ("x_of_y_metaphor", re.compile(
336
+ r"\bthe\s+(?:netflix|uber|airbnb|apple|google|spotify|tesla|amazon)\s+of\s+\w+\b|"
337
+ r"\boperating\s+system\s+(?:of|for)\s+(?:your|the)\s+\w+\b",
338
+ re.I,
339
+ )),
340
+
341
+ # --- Overwhelm-reassurance ---
342
+ ("overwhelm_reassurance", re.compile(
343
+ r"\b(?:can\s+feel|might\s+seem|can\s+be)\s+overwhelming\b.{0,80}\bbut\s+it\s+doesn'?t\s+have\s+to\s+be\b|"
344
+ r"\b(?:can\s+feel|might\s+seem)\s+(?:intimidating|complex|difficult)\b.{0,80}\bbut\b",
345
+ re.I,
346
+ )),
347
+
348
+ # --- Pros/cons framing ---
349
+ ("pros_cons_framing", re.compile(
350
+ r"\b(?:pros\s+and\s+cons|advantages\s+and\s+disadvantages)\s+(?:of|to)\b|"
351
+ r"\bhere\s+are\s+the\s+pros\s+and\s+cons\b",
352
+ re.I,
353
+ )),
354
+
355
+ # --- Triple-adjective bloat ---
356
+ ("triple_adjective", re.compile(
357
+ r"\b(?:\w+,\s+\w+,\s+(?:and\s+)?\w+\s+(?:approach|strategy|solution|framework|platform|system|tool|method|plan|process))\b|"
358
+ r"\b(?:simple|clear|easy)\s*,\s*(?:useful|effective|powerful|intuitive)\s*,\s*(?:and\s+)?(?:memorable|sustainable|scalable|actionable)\b",
359
+ re.I,
360
+ )),
361
+
362
+ # --- Behind-the-scenes / hidden depth ---
363
+ ("hidden_depth", re.compile(
364
+ r"\bbehind\s+(?:the\s+scenes|every\s+\w+)\b.{0,80}\b(?:lies|is)\b|"
365
+ r"\bbehind\s+the\s+scenes\b|"
366
+ r"\bbeneath\s+the\s+surface\b",
367
+ re.I,
368
+ )),
369
+
370
+ # --- Self-referential / AI disclaimer ---
371
+ ("self_referential", re.compile(
372
+ r"\bas\s+an\s+ai\s+(?:language\s+)?model\b|"
373
+ r"\bi\s+(?:can'?t|cannot)\s+provide\s+(?:legal|medical|financial|investment)\s+advice\b|"
374
+ r"\bi\s+don'?t\s+have\s+(?:personal\s+experiences|feelings|opinions)\b",
375
+ re.I,
376
+ )),
377
+
378
+ # --- Placeholder brackets ---
379
+ ("placeholder_brackets", re.compile(
380
+ r"\[(?:your\s+(?:brand|product|company|list|audience|name|metric|goal)|"
381
+ r"insert\s+(?:metric|name|number|value|example)|target\s+\w+)\]",
382
+ re.I,
383
+ )),
384
+
385
+ # --- Zoom / camera metaphor ---
386
+ ("zoom_camera", re.compile(
387
+ r"\b(?:zooming\s+(?:in|out)|from\s+a\s+broader\s+perspective|let'?s\s+zoom\s+(?:in|out))\b",
388
+ re.I,
389
+ )),
390
+
391
+ # --- Core/essence statements (#41, #130) ---
392
+ ("essence_statements", re.compile(
393
+ r"\bat\s+(?:its|the)\s+(?:core|heart)\b|"
394
+ r"\bat\s+(?:its|the)\s+(?:core|heart)\s*(?:of\s+)?\w+\s+(?:is|lies|are)\b",
395
+ re.I,
396
+ )),
397
+
398
+ # --- Analogy / simile invitations (#42-43) ---
399
+ ("ai_analogies", re.compile(
400
+ r"\bthink\s+of\b.{0,30}\bas\s+(?:a|the|your)\b|"
401
+ r"\b(?:your|the|a|an|\w+)\s+(?:is|are)\s+(?:like|kind\s+of\s+like)\s+(?:a|the)\b|"
402
+ r"\bimagine\s+(?:your|the|a|an|\w+)\s+as\b",
403
+ re.I,
404
+ )),
405
+
406
+ # --- "Sounds simple but" (#44) and "In fact" (#46) ---
407
+ ("simple_but_infact", re.compile(
408
+ r"\b(?:this|it|that)\s+(?:might|may|can)\s+sound\s+simple\s*[,.]?\s+but\b|"
409
+ r"\b(?:sounds?\s+simple\s*[,.]?\s+but)\b|"
410
+ r"\bin\s+fact\s*,\s*\w+",
411
+ re.I,
412
+ )),
413
+
414
+ # --- "The X you didn't know you needed" (#71) ---
415
+ ("clickbait_didnt_know", re.compile(
416
+ r"\bthe\s+\w+(?:\s+\w+)?\s+you\s+didn'?t\s+know\s+you\s+needed\b",
417
+ re.I,
418
+ )),
419
+
420
+ # --- Self-referential restatement (#79) ---
421
+ ("self_referential_restatement", re.compile(
422
+ r"\byou\s+(?:asked|wanted\s+to\s+know|wonder(?:ing)?)\s+(?:how|what|why|whether)\b.{0,80}\b(?:let'?s|so|here'?s)\b|"
423
+ r"\byou\s+(?:asked|wanted\s+to\s+know)\s+about\b.{0,80}\b(?:let'?s|so|here'?s)\s+(?:break|walk|dive|explore)\b",
424
+ re.I,
425
+ )),
426
+ ]
427
+
428
+ ABSTRACT_STYLE_WORDS = {
429
+ "alignment",
430
+ "authenticity",
431
+ "awareness",
432
+ "clarity",
433
+ "confidence",
434
+ "consistency",
435
+ "differentiation",
436
+ "execution",
437
+ "framework",
438
+ "identity",
439
+ "messaging",
440
+ "narrative",
441
+ "personality",
442
+ "positioning",
443
+ "preference",
444
+ "presence",
445
+ "recall",
446
+ "relevance",
447
+ "resonance",
448
+ "signal",
449
+ "strategy",
450
+ "trust",
451
+ "utility",
452
+ "value",
453
+ # Expanded from 220 AI patterns document
454
+ "ecosystem",
455
+ "landscape",
456
+ "space",
457
+ "realm",
458
+ "sphere",
459
+ "paradigm",
460
+ "synergy",
461
+ "holistic",
462
+ "robust",
463
+ "scalable",
464
+ "innovative",
465
+ "transformative",
466
+ "comprehensive",
467
+ "sustainable",
468
+ "impactful",
469
+ "meaningful",
470
+ "actionable",
471
+ "seamless",
472
+ "frictionless",
473
+ "cutting-edge",
474
+ "state-of-the-art",
475
+ "best-in-class",
476
+ "optimization",
477
+ "efficiency",
478
+ "productivity",
479
+ "growth",
480
+ "retention",
481
+ "acquisition",
482
+ "engagement",
483
+ "conversion",
484
+ "monetization",
485
+ "scalability",
486
+ "agility",
487
+ "resilience",
488
+ "empowerment",
489
+ "transformation",
490
+ "innovation",
491
+ "disruption",
492
+ "evolution",
493
+ "revolution",
494
+ "iteration",
495
+ "velocity",
496
+ "leverage",
497
+ "amplification",
498
+ "acceleration",
499
+ "facilitation",
500
+ "orchestration",
501
+ "curation",
502
+ "personalization",
503
+ "customization",
504
+ "democratization",
505
+ "accessibility",
506
+ "inclusivity",
507
+ "infrastructure",
508
+ "architecture",
509
+ "foundation",
510
+ "cornerstone",
511
+ "pillar",
512
+ "backbone",
513
+ "lifeblood",
514
+ "catalyst",
515
+ "enabler",
516
+ "driver",
517
+ "engine",
518
+ "flywheel",
519
+ "moat",
520
+ "advantage",
521
+ "differentiator",
522
+ "proposition",
523
+ "promise",
524
+ "mission",
525
+ "vision",
526
+ "purpose",
527
+ "intention",
528
+ "mindset",
529
+ "mindfulness",
530
+ "consciousness",
531
+ "feedback",
532
+ "vulnerability",
533
+ "transparency",
534
+ "accountability",
535
+ "responsibility",
536
+ "ownership",
537
+ "agency",
538
+ "autonomy",
539
+ "sovereignty",
540
+ "freedom",
541
+ "liberation",
542
+ "elevation",
543
+ "ascension",
544
+ "mastery",
545
+ "excellence",
546
+ "greatness",
547
+ "potential",
548
+ "possibility",
549
+ "opportunity",
550
+ "abundance",
551
+ "prosperity",
552
+ "fulfillment",
553
+ "happiness",
554
+ "wellness",
555
+ "wellbeing",
556
+ "balance",
557
+ "harmony",
558
+ "coherence",
559
+ "congruence",
560
+ "integrity",
561
+ "honor",
562
+ "dignity",
563
+ "respect",
564
+ "empathy",
565
+ "compassion",
566
+ "humanity",
567
+ "connection",
568
+ "belonging",
569
+ "tribe",
570
+ "movement",
571
+ "renaissance",
572
+ "awakening",
573
+ "enlightenment",
574
+ "breakthrough",
575
+ "tipping point",
576
+ "inflection",
577
+ "pivot",
578
+ "shift",
579
+ "transition",
580
+ "metamorphosis",
581
+ "rebirth",
582
+ "reinvention",
583
+ }
584
+
585
+ GENERIC_OPENERS = re.compile(
586
+ r"^(?:most|many|some|all)\s+(?:brands|teams|people|founders|companies|businesses|organizations|leaders)\b|"
587
+ r"^(?:in\s+)?(?:today'?s|the)\s+(?:fast.paced|ever.evolving|modern|digital|current|contemporary)\s+(?:world|age|era|landscape|economy)\b",
588
+ re.I,
589
+ )
590
+ QUESTION_OPENER = re.compile(
591
+ r"^(?:have you|do you|did you|what if|why do|how do|are you|is your|can you|will you)\b",
592
+ re.I,
593
+ )
594
+ LESSON_OPENER = re.compile(
595
+ r"^(?:the most important thing|the key to|success is|if you want to|what i learned|"
596
+ r"the hard part|the point isn'?t|you don'?t need|the hard(?:est)?\s+(?:part|thing))\b",
597
+ re.I,
598
+ )
599
+
600
+ # CTA/engagement bait endings
601
+ CTA_ENDINGS = re.compile(
602
+ r"\blet\s+me\s+know\s+if\s+you\s+need\s+(?:any\s+more\s+|any\s+|more\s+)?help\b|"
603
+ r"\bfeel\s+free\s+to\s+(?:ask|reach\s+out|contact|dm|let\s+me\s+know)\b|"
604
+ r"\bcurious\s+what\s+others\s+think\b|"
605
+ r"\bi'?m\s+here\s+to\s+help\b|"
606
+ r"\bif\s+you\s+have\s+follow.up\s+questions\b",
607
+ re.I,
608
+ )
609
+
610
+ SEVEN_WORD_SENTENCE_PATTERN = re.compile(
611
+ r"^(?:\w+\s+){6}(?:\w+)[.!?]$",
612
+ )
613
+
614
+
615
+ def strip_markup(text: str, suffix: str = "") -> str:
616
+ if suffix.lower() not in {".html", ".htm"}:
617
+ return text
618
+ text = re.sub(r"(?is)<(script|style).*?>.*?</\1>", " ", text)
619
+ text = re.sub(r"(?s)<[^>]+>", " ", text)
620
+ return html.unescape(text)
621
+
622
+
623
+ def read_text(path: Path) -> str:
624
+ raw = path.read_text(encoding="utf-8", errors="ignore")
625
+ return strip_markup(raw, path.suffix)
626
+
627
+
628
+ def iter_text_files(paths: list[str]) -> list[Path]:
629
+ files: list[Path] = []
630
+ for raw in paths:
631
+ path = Path(raw).expanduser()
632
+ if not path.exists():
633
+ raise SystemExit(f"path not found: {path}")
634
+ if path.is_file():
635
+ files.append(path)
636
+ continue
637
+ for item in sorted(path.rglob("*")):
638
+ if not item.is_file():
639
+ continue
640
+ if any(part.startswith(".") for part in item.relative_to(path).parts):
641
+ continue
642
+ if item.suffix.lower() in TEXT_EXTENSIONS:
643
+ files.append(item)
644
+ return files
645
+
646
+
647
+ def words(text: str) -> list[str]:
648
+ return re.findall(r"[a-zA-Z][a-zA-Z0-9']*", text)
649
+
650
+
651
+ def sentences(text: str) -> list[str]:
652
+ parts = re.split(r"(?<=[.!?])\s+|\n{2,}", text)
653
+ return [part.strip() for part in parts if words(part)]
654
+
655
+
656
+ def paragraphs(text: str) -> list[str]:
657
+ return [p.strip() for p in re.split(r"\n\s*\n", text) if len(words(p)) >= 6]
658
+
659
+
660
+ def variance_label(lengths: list[int]) -> str:
661
+ if len(lengths) < 3:
662
+ return "medium"
663
+ mean = sum(lengths) / len(lengths)
664
+ if mean <= 0:
665
+ return "medium"
666
+ stdev = math.sqrt(sum((length - mean) ** 2 for length in lengths) / len(lengths))
667
+ ratio = stdev / mean
668
+ if ratio < 0.35:
669
+ return "low"
670
+ if ratio > 0.85:
671
+ return "high"
672
+ return "medium"
673
+
674
+
675
+ def infer_case_style(lines: list[str]) -> str:
676
+ starters = []
677
+ properish = 0
678
+ for line in lines:
679
+ stripped = line.strip()
680
+ if not stripped:
681
+ continue
682
+ match = re.search(r"[A-Za-z]", stripped)
683
+ if not match:
684
+ continue
685
+ char = match.group(0)
686
+ starters.append(char)
687
+ if re.search(r"\b[A-Z][a-z]{2,}\b", stripped):
688
+ properish += 1
689
+ if not starters:
690
+ return "mixed"
691
+ lower_ratio = sum(1 for char in starters if char.islower()) / len(starters)
692
+ if lower_ratio >= 0.85 and properish <= len(starters) * 0.2:
693
+ return "mostly lowercase"
694
+ if lower_ratio <= 0.25:
695
+ return "standard sentence case"
696
+ return "mixed"
697
+
698
+
699
+ def infer_argument_pattern(text: str) -> str:
700
+ low = text.lower()
701
+ sentence_list = sentences(text)
702
+ if not sentence_list:
703
+ return "mixed"
704
+ question_ratio = sum(1 for sentence in sentence_list if sentence.rstrip().endswith("?")) / len(sentence_list)
705
+ first_person = len(re.findall(r"\b(?:i|we|my|our|me|us)\b", low))
706
+ contrast = len(re.findall(r"\b(?:but|actually|instead|not|wrong|real|because)\b", low))
707
+ numbers = len(re.findall(r"\b\d+(?:\.\d+)?%?\b", low))
708
+ if question_ratio > 0.18:
709
+ return "question-led"
710
+ if numbers >= max(4, len(sentence_list) // 10):
711
+ return "data-led"
712
+ if first_person >= max(6, len(sentence_list) // 4):
713
+ return "narrative"
714
+ if contrast >= max(8, len(sentence_list) // 3):
715
+ return "contrarian"
716
+ return "mixed"
717
+
718
+
719
+ def first_words(text: str, count: int = 7) -> str:
720
+ found = words(text.lower())
721
+ return " ".join(found[:count])
722
+
723
+
724
+ def top_opening_moves(paragraph_list: list[str], limit: int = 8) -> list[str]:
725
+ counts: dict[str, int] = {}
726
+ order: list[str] = []
727
+ for paragraph in paragraph_list:
728
+ move = first_words(paragraph, 6)
729
+ if len(move.split()) < 3:
730
+ continue
731
+ if move not in counts:
732
+ order.append(move)
733
+ counts[move] = counts.get(move, 0) + 1
734
+ ranked = sorted(order, key=lambda item: (-counts[item], order.index(item)))
735
+ return ranked[:limit]
736
+
737
+
738
+ def choose_anchors(paragraph_list: list[str], limit: int = 3) -> list[str]:
739
+ candidates = []
740
+ for paragraph in paragraph_list:
741
+ compact = re.sub(r"\s+", " ", paragraph).strip()
742
+ if 80 <= len(compact) <= 420:
743
+ candidates.append(compact)
744
+ if not candidates:
745
+ candidates = [re.sub(r"\s+", " ", p).strip()[:360] for p in paragraph_list if p.strip()]
746
+ anchors: list[str] = []
747
+ seen = set()
748
+ for candidate in candidates:
749
+ key = candidate[:80].lower()
750
+ if key in seen:
751
+ continue
752
+ seen.add(key)
753
+ anchors.append(candidate[:360])
754
+ if len(anchors) >= limit:
755
+ break
756
+ return anchors
757
+
758
+
759
+ def build_profile(paths: list[str], name: str) -> dict[str, Any]:
760
+ files = iter_text_files(paths)
761
+ samples = []
762
+ combined_parts = []
763
+ for path in files:
764
+ text = read_text(path).strip()
765
+ if not text:
766
+ continue
767
+ samples.append({"path": str(path), "text": text})
768
+ combined_parts.append(text)
769
+
770
+ if not samples:
771
+ raise SystemExit("no readable text samples found")
772
+
773
+ combined = "\n\n".join(combined_parts)
774
+ sentence_list = sentences(combined)
775
+ paragraph_list = paragraphs(combined)
776
+ sentence_lengths = [len(words(sentence)) for sentence in sentence_list if words(sentence)]
777
+ paragraph_sentence_counts = [max(1, len(sentences(paragraph))) for paragraph in paragraph_list]
778
+ line_list = [line for text in combined_parts for line in text.splitlines()]
779
+ avg_sentence = round(sum(sentence_lengths) / len(sentence_lengths), 1) if sentence_lengths else 0
780
+ avg_paragraph = round(sum(paragraph_sentence_counts) / len(paragraph_sentence_counts), 1) if paragraph_sentence_counts else 0
781
+ opening_moves = top_opening_moves(paragraph_list)
782
+ case_style = infer_case_style(line_list)
783
+ argument_pattern = infer_argument_pattern(combined)
784
+ anchors = choose_anchors(paragraph_list)
785
+
786
+ cadence = [
787
+ f"average sentence length around {avg_sentence} words",
788
+ f"sentence length variance is {variance_label(sentence_lengths)}",
789
+ f"average paragraph length around {avg_paragraph} sentences",
790
+ ]
791
+ if case_style == "mostly lowercase":
792
+ cadence.append("leans lowercase in visible prose")
793
+
794
+ never_list = [
795
+ "here's the thing",
796
+ "let's be honest",
797
+ "at the end of the day",
798
+ "not just x but y",
799
+ "which is another way of saying",
800
+ "in other words",
801
+ "the moment x becomes y",
802
+ "same x. better y.",
803
+ ]
804
+
805
+ voice_rules = [
806
+ "trust the supplied samples over generic style advice",
807
+ "open from a concrete observation, scene, mechanism, or quoted line",
808
+ "keep the writer's natural sentence and paragraph rhythm",
809
+ "preserve specific roughness when it carries the voice",
810
+ "repair AI-pattern drift line by line instead of rewriting clean prose",
811
+ ]
812
+ if opening_moves:
813
+ voice_rules.append("study these sample opening moves before drafting: " + "; ".join(opening_moves[:4]))
814
+
815
+ return {
816
+ "profile_version": "hold-your-voice-portable-v1",
817
+ "name": name,
818
+ "source_count": len(samples),
819
+ "sources": [{"path": sample["path"], "chars": len(sample["text"])} for sample in samples],
820
+ "word_count": len(words(combined)),
821
+ "sentence": {"avg_words": avg_sentence, "variance": variance_label(sentence_lengths)},
822
+ "paragraph": {"avg_sentences": avg_paragraph},
823
+ "signature": {
824
+ "case_style": case_style,
825
+ "argument_pattern": argument_pattern,
826
+ "opening_moves": opening_moves,
827
+ "cadence": cadence,
828
+ "anchors": anchors,
829
+ "never_list": never_list,
830
+ },
831
+ "voice_rules": voice_rules,
832
+ "ai_eliminator": {
833
+ "rewrite_scope": "flagged-lines-only",
834
+ "preserve_surrounding_lines": True,
835
+ "avoid_polished_founder_cadence": True,
836
+ },
837
+ }
838
+
839
+
840
+ def line_style_hits(line: str) -> list[dict[str, str]]:
841
+ low = (line or "").strip().lower()
842
+ if not low:
843
+ return []
844
+ hits = []
845
+ line_words = re.findall(r"[a-z']+", low)
846
+ abstract_count = sum(1 for word in line_words if word in ABSTRACT_STYLE_WORDS)
847
+ if abstract_count >= 3 and not re.search(r"\b(?:for example|for instance|such as)\b|\d", low):
848
+ hits.append({"rule": "abstract_noun_cluster", "phrase": line.strip()[:160]})
849
+ if GENERIC_OPENERS.match(low):
850
+ hits.append({"rule": "generic_opening_generalization", "phrase": line.strip()[:160]})
851
+ if QUESTION_OPENER.match(low):
852
+ hits.append({"rule": "voice_question_opener", "phrase": "opens with a question instead of a concrete observation"})
853
+ if LESSON_OPENER.match(low):
854
+ hits.append({"rule": "voice_lesson_opener", "phrase": "opens with a lesson or inspirational claim"})
855
+ if CTA_ENDINGS.search(low):
856
+ hits.append({"rule": "cta_ending", "phrase": line.strip()[:160]})
857
+ # detect TED-talk contrastive slogan pattern: "It's not X, it's Y" in a single line
858
+ if re.search(r"\bit'?s\s+not\b.{0,40}\bit'?s\b", low):
859
+ hits.append({"rule": "ted_talk_slogan", "phrase": line.strip()[:160]})
860
+ # detect perfect 6-8 word marketing sentence that starts generic + has buzzword density
861
+ line_parts = re.split(r"(?<=[.!?])\s+", line.strip())
862
+ for part in line_parts:
863
+ wc = len(re.findall(r"[a-zA-Z']+", part))
864
+ if 6 <= wc <= 8 and part and part[-1] in ".!?":
865
+ part_low = part.lower()
866
+ generic_start = re.match(r"^(?:the|your|this|a|an|it|our|most|many|some|all)", part_low)
867
+ has_buzzword = bool(re.search(r"\b(?:attention|trust|retention|brand|growth|strategy|content|value|customer|product|data)\b", part_low))
868
+ if generic_start and has_buzzword:
869
+ hits.append({"rule": "perfect_marketing_sentence", "phrase": part.strip()[:160]})
870
+ break
871
+ return hits
872
+
873
+
874
+ def _structural_analysis(text: str) -> list[dict[str, Any]]:
875
+ """Analyze structural/rhythmic properties beyond individual word patterns."""
876
+ hits: list[dict[str, Any]] = []
877
+ sentence_list = sentences(text)
878
+ paragraph_list = paragraphs(text)
879
+
880
+ if not sentence_list:
881
+ return hits
882
+
883
+ # --- Burstiness (sentence length variance) ---
884
+ lengths = [len(words(s)) for s in sentence_list if words(s)]
885
+ if len(lengths) >= 5:
886
+ mean = sum(lengths) / len(lengths)
887
+ stdev = math.sqrt(sum((l - mean) ** 2 for l in lengths) / len(lengths))
888
+ cv = stdev / mean if mean > 0 else 0
889
+ if cv < 0.35:
890
+ hits.append({
891
+ "rule": "low_burstiness",
892
+ "phrase": f"sentence length variation {cv:.2f} (< 0.35 = AI-flat rhythm)",
893
+ "line": 0,
894
+ })
895
+
896
+ # --- Mechanical paragraph structure ---
897
+ if len(paragraph_list) >= 3:
898
+ para_sent_counts = [max(1, len(sentences(p))) for p in paragraph_list]
899
+ para_mean = sum(para_sent_counts) / len(para_sent_counts)
900
+ if para_mean > 0:
901
+ para_stdev = math.sqrt(sum((c - para_mean) ** 2 for c in para_sent_counts) / len(para_sent_counts))
902
+ para_cv = para_stdev / para_mean
903
+ if para_cv < 0.30:
904
+ hits.append({
905
+ "rule": "mechanical_paragraphs",
906
+ "phrase": f"paragraphs all similar length (cv={para_cv:.2f}, mean={para_mean:.1f} sentences)",
907
+ "line": 0,
908
+ })
909
+
910
+ # --- Over-structured lists: every list has exactly 3 items? ---
911
+ list_item_pattern = re.compile(r"^[\s]*[-*•]\s+", re.M)
912
+ list_items = list_item_pattern.findall(text or "")
913
+ if len(list_items) >= 6:
914
+ line_num = (text or "").split("\n").index([l for l in (text or "").split("\n") if list_item_pattern.match(l)][0]) + 1 if text else 0
915
+ # check if list items follow a strict "X, Y, and Z" triad pattern
916
+ triad_count = sum(1 for p in paragraph_list if len(sentences(p)) == 1 and len(re.findall(r"[-*•]", p)) >= 2)
917
+ if triad_count >= 2:
918
+ hits.append({
919
+ "rule": "over_structured_lists",
920
+ "phrase": "lists follow rigid 3-item pattern throughout",
921
+ "line": line_num,
922
+ })
923
+
924
+ # --- Uniform sentence rhythm within paragraphs ---
925
+ ai_rhythm_count = 0
926
+ for para in paragraph_list:
927
+ para_sentences = sentences(para)
928
+ if len(para_sentences) >= 3:
929
+ s_lengths = [len(words(s)) for s in para_sentences if words(s)]
930
+ if s_lengths and all(12 <= l <= 22 for l in s_lengths):
931
+ ai_rhythm_count += 1
932
+ if ai_rhythm_count >= max(1, len(paragraph_list) * 0.6) and len(paragraph_list) >= 2:
933
+ hits.append({
934
+ "rule": "uniform_paragraph_rhythm",
935
+ "phrase": f"{ai_rhythm_count}/{len(paragraph_list)} paragraphs have mechanical 12-22 word sentence uniformity",
936
+ "line": 0,
937
+ })
938
+
939
+ # --- Formal/tone analysis: contractions ratio ---
940
+ contraction_pattern = re.compile(r"\b(?:don'?t|can'?t|won'?t|isn'?t|aren'?t|wasn'?t|weren'?t|"
941
+ r"hasn'?t|haven'?t|hadn'?t|shouldn'?t|wouldn'?t|couldn'?t|"
942
+ r"mightn'?t|mustn'?t|it'?s|that'?s|what'?s|there'?s|"
943
+ r"here'?s|who'?s|let'?s|i'?m|you'?re|we'?re|they'?re|"
944
+ r"i'?ve|you'?ve|we'?ve|they'?ve|i'?ll|you'?ll|we'?ll|they'?ll)\b", re.I)
945
+ contractions = len(contraction_pattern.findall(text or ""))
946
+ total_words = len(words(text or ""))
947
+ contraction_ratio = contractions / max(1, total_words / 100) # per 100 words
948
+ if total_words > 200 and contraction_ratio < 0.8:
949
+ hits.append({
950
+ "rule": "low_contractions",
951
+ "phrase": f"{contraction_ratio:.1f} contractions per 100 words (human average 1.5-3.0; overly formal/rigid)",
952
+ "line": 0,
953
+ })
954
+
955
+ # --- Overly formal hedging density ---
956
+ formal_hedges_pattern = re.compile(
957
+ r"\b(?:it\s+is\s+important\s+to\s+note|it\s+should\s+be\s+noted|it\s+is\s+worth\s+noting|"
958
+ r"it\s+is\s+crucial\s+to|it\s+is\s+essential\s+to|it\s+appears\s+that|"
959
+ r"there\s+is\s+a\s+possibility\s+that|one\s+should\s+consider|"
960
+ r"it\s+is\s+imperative\s+to|it\s+is\s+necessary\s+to)\b",
961
+ re.I,
962
+ )
963
+ formal_hedges = len(formal_hedges_pattern.findall(text or ""))
964
+ if formal_hedges >= 2:
965
+ hits.append({
966
+ "rule": "formal_hedging_density",
967
+ "phrase": f"{formal_hedges} formal hedging phrases found (institutional/overly polite tone)",
968
+ "line": 0,
969
+ })
970
+
971
+ # --- Non-specific intensifiers density ---
972
+ intensifiers_pattern = re.compile(
973
+ r"\b(?:remarkably|incredibly|amazingly|extraordinarily|exceptionally|"
974
+ r"tremendously|absolutely|completely|thoroughly|utterly)\s+\w+\b",
975
+ re.I,
976
+ )
977
+ intensifiers = len(intensifiers_pattern.findall(text or ""))
978
+ if intensifiers >= 3:
979
+ hits.append({
980
+ "rule": "generic_intensifiers",
981
+ "phrase": f"{intensifiers} generic intensifiers (remarkably/incredibly/amazingly) - marketing tone",
982
+ "line": 0,
983
+ })
984
+
985
+ # --- Perfect grammar / no fragments ---
986
+ total_sentences = len(sentence_list)
987
+ fragments = sum(1 for s in sentence_list if len(words(s)) <= 4 and s.strip() and s.strip()[-1] in ".!?"
988
+ and not re.search(r"\b(?:yes|no|hey|hi|ok|bye|wow|oh)\b", s.lower()))
989
+ fragment_ratio = fragments / max(1, total_sentences)
990
+ if total_sentences > 20 and fragment_ratio < 0.02:
991
+ hits.append({
992
+ "rule": "no_fragments",
993
+ "phrase": f"only {fragments} sentence fragments in {total_sentences} sentences - over-polished",
994
+ "line": 0,
995
+ })
996
+
997
+ return hits
998
+
999
+
1000
+ def scan_text(text: str) -> list[dict[str, Any]]:
1001
+ hits: list[dict[str, Any]] = []
1002
+ for rule_id, pattern in AI_PATTERN_RULES:
1003
+ for match in pattern.finditer(text or ""):
1004
+ snippet = match.group(0).strip()
1005
+ if not snippet:
1006
+ continue
1007
+ line_no = text[: match.start()].count("\n") + 1
1008
+ hits.append({"line": line_no, "rule": rule_id, "phrase": snippet[:160]})
1009
+
1010
+ for line_no, line in enumerate((text or "").splitlines(), 1):
1011
+ for hit in line_style_hits(line):
1012
+ hits.append({"line": line_no, "rule": hit["rule"], "phrase": hit["phrase"], "text": line.strip()[:240]})
1013
+
1014
+ # Structural / rhythmic analysis
1015
+ for structural_hit in _structural_analysis(text):
1016
+ hits.append(structural_hit)
1017
+
1018
+ # Staccato triplet detection — only fire when sentences are clearly performative
1019
+ sentence_hits = []
1020
+ for line_no, line in enumerate((text or "").splitlines(), 1):
1021
+ for sentence in re.split(r"(?<=[.!?])\s+", line):
1022
+ found = words(sentence)
1023
+ if found:
1024
+ sentence_hits.append((line_no, sentence.strip(), len(found)))
1025
+ for idx in range(len(sentence_hits) - 2):
1026
+ window = sentence_hits[idx : idx + 3]
1027
+ lengths_ok = all(count <= 5 for _, _, count in window)
1028
+ if not lengths_ok:
1029
+ continue
1030
+ combined = " ".join(s[1] for s in window).lower()
1031
+ connector_words = {"but", "and", "or", "so", "because", "then", "if", "when", "while"}
1032
+ has_connector = any(f" {w} " in f" {combined} " for w in connector_words)
1033
+ # Allow: pure performance staccato (3 verbs in a row, no connectors, no "I")
1034
+ pure_staccato = all(count <= 3 for _, _, count in window) and not has_connector
1035
+ has_i = bool(re.search(r"\b(?:i|we|my|our|me|us)\b", combined))
1036
+ if pure_staccato or (not has_connector and not has_i):
1037
+ hits.append(
1038
+ {
1039
+ "line": window[0][0],
1040
+ "rule": "voice_staccato_triplet",
1041
+ "phrase": "three short sentences in a row reads like performance",
1042
+ "text": window[0][1],
1043
+ }
1044
+ )
1045
+ break
1046
+
1047
+ return sorted(hits, key=lambda item: (item.get("line", 0), item.get("rule", "")))
1048
+
1049
+
1050
+ def format_scan_text(path: str, text: str, hits: list[dict[str, Any]]) -> str:
1051
+ if not hits:
1052
+ return f"{path}: no deterministic AI-pattern issues found"
1053
+ lines = [f"{path}: {len(hits)} issue(s)"]
1054
+ for hit in hits:
1055
+ phrase = hit.get("phrase", "")
1056
+ lines.append(f"- line {hit.get('line')}: {hit.get('rule')} - {phrase}")
1057
+ return "\n".join(lines)
1058
+
1059
+
1060
+ def load_draft(path: str) -> tuple[str, str]:
1061
+ if path == "-":
1062
+ return "stdin", sys.stdin.read()
1063
+ draft_path = Path(path).expanduser()
1064
+ if not draft_path.exists():
1065
+ raise SystemExit(f"draft not found: {draft_path}")
1066
+ return str(draft_path), read_text(draft_path)
1067
+
1068
+
1069
+ def build_rewrite_prompt(draft_name: str, draft: str, profile_text: str | None, constraints: str = "", meta: dict[str, Any] | None = None) -> str:
1070
+ hits = scan_text(draft)
1071
+ if meta:
1072
+ hits = filter_hits_by_weights(hits, meta)
1073
+ issue_lines = "\n".join(
1074
+ f"- line {hit['line']} [{hit['rule']}]: {hit.get('phrase', '')}"
1075
+ for hit in hits
1076
+ ) or "- none found by deterministic scan"
1077
+
1078
+ numbered_draft = "\n".join(f"{idx}: {line}" for idx, line in enumerate(draft.splitlines(), 1))
1079
+ profile_block = profile_text.strip() if profile_text and profile_text.strip() else "(no voice profile supplied)"
1080
+ constraints_block = constraints.strip() if constraints.strip() else "(none)"
1081
+
1082
+ return f"""Rewrite only the flagged lines. Do not rewrite the whole piece.
1083
+
1084
+ Return only valid JSON in this exact shape:
1085
+ {{"replacements":[{{"line":1,"text":"replacement line"}}]}}
1086
+
1087
+ Rules:
1088
+ - Include only flagged line numbers.
1089
+ - Preserve unflagged lines exactly by not returning them.
1090
+ - Preserve the original argument and local meaning.
1091
+ - Use the voice profile as the benchmark when present.
1092
+ - Remove AI cadence, polished founder cadence, abstract strategy-deck language, and generic lesson shapes.
1093
+ - Do not add new sections, hooks, CTAs, markdown, bullets, or commentary.
1094
+
1095
+ Voice profile:
1096
+ {profile_block}
1097
+
1098
+ Extra constraints:
1099
+ {constraints_block}
1100
+
1101
+ Flagged lines:
1102
+ {issue_lines}
1103
+
1104
+ Draft with line numbers ({draft_name}):
1105
+ {numbered_draft}
1106
+ """
1107
+
1108
+
1109
+ DEFAULT_NEVER_LIST = [
1110
+ "here's the thing",
1111
+ "let's be honest",
1112
+ "at the end of the day",
1113
+ "not just x but y",
1114
+ "which is another way of saying",
1115
+ "in other words",
1116
+ "the moment x becomes y",
1117
+ "same x. better y.",
1118
+ ]
1119
+
1120
+ SIGNAL_VERSION = "hold-your-voice-signal-v1"
1121
+ META_SIGNAL_VERSION = "hold-your-voice-signal-v2"
1122
+
1123
+ PATTERN_CONFIDENCE_THRESHOLD = 0.30 # patterns below this are auto-suppressed
1124
+ PATTERN_STATUS = ("active", "declining", "stale")
1125
+
1126
+
1127
+ def lines_changed_pct(orig_line: str, acc_line: str) -> bool:
1128
+ """return True if two lines differ meaningfully as edited text."""
1129
+ return orig_line.strip() != acc_line.strip()
1130
+
1131
+
1132
+ def build_signal_report(
1133
+ original_path: str,
1134
+ accepted_path: str,
1135
+ original_text: str,
1136
+ accepted_text: str,
1137
+ profile: dict[str, Any] | None,
1138
+ ) -> dict[str, Any]:
1139
+ """diff original vs accepted to extract learning signals."""
1140
+ orig_lines = original_text.splitlines(keepends=True)
1141
+ acc_lines = accepted_text.splitlines(keepends=True)
1142
+ orig_hits = scan_text(original_text)
1143
+
1144
+ flagged_line_nums: set[int] = set(hit["line"] for hit in orig_hits)
1145
+ # build a map: line_num -> [pattern_ids]
1146
+ line_pattern_map: dict[int, list[str]] = {}
1147
+ for hit in orig_hits:
1148
+ line_pattern_map.setdefault(hit["line"], []).append(hit["rule"])
1149
+
1150
+ patterns_accepted: dict[str, int] = {}
1151
+ patterns_overridden: dict[str, int] = {}
1152
+ changed_unflagged: dict[int, str] = {}
1153
+
1154
+ min_lines = min(len(orig_lines), len(acc_lines))
1155
+
1156
+ for i in range(min_lines):
1157
+ line_no = i + 1
1158
+ changed = lines_changed_pct(orig_lines[i], acc_lines[i])
1159
+ patterns = line_pattern_map.get(line_no, [])
1160
+
1161
+ if changed and patterns:
1162
+ for pid in patterns:
1163
+ patterns_accepted[pid] = patterns_accepted.get(pid, 0) + 1
1164
+ elif not changed and patterns:
1165
+ for pid in patterns:
1166
+ patterns_overridden[pid] = patterns_overridden.get(pid, 0) + 1
1167
+ elif changed and line_no not in flagged_line_nums:
1168
+ # user changed a line that wasn't flagged — potential new pattern
1169
+ orig_stripped = orig_lines[i].strip()
1170
+ if len(orig_stripped) > 40 and orig_stripped not in ("", "\n"):
1171
+ changed_unflagged[line_no] = orig_stripped[:240]
1172
+
1173
+ total_changed = sum(1 for i in range(min_lines) if lines_changed_pct(orig_lines[i], acc_lines[i]))
1174
+ full_rewrite = total_changed > max(1, min_lines * 0.8)
1175
+
1176
+ # session stats from accepted
1177
+ acc_sentences = sentences(accepted_text)
1178
+ acc_paragraphs = paragraphs(accepted_text)
1179
+ acc_sentence_lengths = [len(words(s)) for s in acc_sentences if words(s)]
1180
+ acc_paragraph_sentence_counts = [max(1, len(sentences(p))) for p in acc_paragraphs]
1181
+ avg_s = round(sum(acc_sentence_lengths) / len(acc_sentence_lengths), 1) if acc_sentence_lengths else 0
1182
+ avg_p = round(sum(acc_paragraph_sentence_counts) / len(acc_paragraph_sentence_counts), 1) if acc_paragraph_sentence_counts else 0
1183
+
1184
+ # simplified new_removals: surface a sample of changed-unflagged lines for review
1185
+ new_removals = []
1186
+ seen_phrases: set[str] = set()
1187
+ for line_no in sorted(changed_unflagged):
1188
+ phrase = changed_unflagged[line_no]
1189
+ key = phrase.lower().strip()[:60]
1190
+ if key not in seen_phrases:
1191
+ seen_phrases.add(key)
1192
+ new_removals.append({"line": line_no, "original_text": phrase, "context": ""})
1193
+ if len(new_removals) >= 10:
1194
+ break
1195
+
1196
+ report: dict[str, Any] = {
1197
+ "signal_version": SIGNAL_VERSION,
1198
+ "session": {
1199
+ "original_path": original_path,
1200
+ "accepted_path": accepted_path,
1201
+ "full_rewrite": full_rewrite,
1202
+ },
1203
+ "patterns_accepted": dict(sorted(patterns_accepted.items())),
1204
+ "patterns_overridden": dict(sorted(patterns_overridden.items())),
1205
+ "new_removals": new_removals,
1206
+ "session_stats": {
1207
+ "original_words": len(words(original_text)),
1208
+ "accepted_words": len(words(accepted_text)),
1209
+ "accepted_avg_sentence": avg_s,
1210
+ "accepted_avg_paragraph": avg_p,
1211
+ "accepted_sentence_count": len(acc_sentence_lengths),
1212
+ "accepted_paragraph_count": len(acc_paragraph_sentence_counts),
1213
+ },
1214
+ }
1215
+ return report
1216
+
1217
+
1218
+ def _current_date() -> str:
1219
+ return datetime.date.today().isoformat()
1220
+
1221
+
1222
+ def init_temporal_pattern(rule_id: str) -> dict[str, Any]:
1223
+ """create a new temporal pattern entry."""
1224
+ now = _current_date()
1225
+ return {
1226
+ "id": rule_id,
1227
+ "confidence": 0.0,
1228
+ "first_seen": now,
1229
+ "last_confirmed": now,
1230
+ "source_samples": [], # list of sample paths that triggered this
1231
+ "contradictions": [], # dates when pattern was flagged but overridden by user
1232
+ "superseded_by": None,
1233
+ "status": "active",
1234
+ }
1235
+
1236
+
1237
+ def evolve_meta_from_signal(
1238
+ meta: dict[str, Any],
1239
+ patterns_accepted: dict[str, int],
1240
+ patterns_overridden: dict[str, int],
1241
+ source_samples: list[str] | None = None,
1242
+ ) -> dict[str, Any]:
1243
+ """update temporal pattern weights in meta based on accept/override signals.
1244
+
1245
+ each pattern tracks: first_seen, last_confirmed, contradictions per date,
1246
+ source_samples, confidence (0.0-1.0), and status.
1247
+ """
1248
+ now = _current_date()
1249
+ temporal = meta.get("temporal_patterns", {})
1250
+
1251
+ for rule_id, count in patterns_accepted.items():
1252
+ tp = temporal.get(rule_id)
1253
+ if tp is None:
1254
+ tp = init_temporal_pattern(rule_id)
1255
+ temporal[rule_id] = tp
1256
+ tp["last_confirmed"] = now
1257
+ if source_samples:
1258
+ for s in source_samples:
1259
+ if s not in tp["source_samples"]:
1260
+ tp["source_samples"].append(s)
1261
+ # accepted signals boost confidence
1262
+ boost = min(count * 0.08, 0.40) # cap boost per session
1263
+ tp["confidence"] = min(1.0, tp["confidence"] + boost)
1264
+ tp["status"] = "active"
1265
+
1266
+ for rule_id, count in patterns_overridden.items():
1267
+ tp = temporal.get(rule_id)
1268
+ if tp is None:
1269
+ tp = init_temporal_pattern(rule_id)
1270
+ temporal[rule_id] = tp
1271
+ tp["contradictions"].append({"date": now, "count": count})
1272
+ # overridden signals decrease confidence faster
1273
+ penalty = min(count * 0.12, 0.50)
1274
+ tp["confidence"] = max(0.0, tp["confidence"] - penalty)
1275
+ # determine status
1276
+ if len(tp["contradictions"]) >= 3 and tp["confidence"] < 0.30:
1277
+ tp["status"] = "declining"
1278
+ if len(tp["contradictions"]) >= 5 and tp["confidence"] < 0.15:
1279
+ tp["status"] = "stale"
1280
+
1281
+ # decay untouched patterns whose last_confirmed is > 14 days ago
1282
+ two_weeks_ms = 14 * 24 * 60 * 60
1283
+ for tp in temporal.values():
1284
+ last = tp.get("last_confirmed", now)
1285
+ try:
1286
+ last_date = datetime.date.fromisoformat(last)
1287
+ days_since = (datetime.date.today() - last_date).days
1288
+ except (ValueError, TypeError):
1289
+ days_since = 0
1290
+ if days_since > 14:
1291
+ decay = min(days_since * 0.005, 0.15) # slow decay over time
1292
+ tp["confidence"] = max(0.0, tp["confidence"] - decay)
1293
+ if tp["confidence"] < PATTERN_CONFIDENCE_THRESHOLD and tp["status"] == "active":
1294
+ tp["status"] = "stale"
1295
+
1296
+ meta["temporal_patterns"] = temporal
1297
+ meta["signal_version"] = META_SIGNAL_VERSION
1298
+ meta["last_updated"] = now
1299
+ meta["signal_count"] = meta.get("signal_count", 0) + sum(patterns_accepted.values()) + sum(patterns_overridden.values())
1300
+
1301
+ return meta
1302
+
1303
+
1304
+ def get_active_patterns(meta: dict[str, Any]) -> list[str]:
1305
+ """return rule_ids of patterns that are active and above confidence threshold."""
1306
+ temporal = meta.get("temporal_patterns", {})
1307
+ return [
1308
+ rid for rid, tp in temporal.items()
1309
+ if tp.get("status") == "active" and tp.get("confidence", 0) >= PATTERN_CONFIDENCE_THRESHOLD
1310
+ ]
1311
+
1312
+
1313
+ def get_declining_patterns(meta: dict[str, Any]) -> list[str]:
1314
+ """return rule_ids that are declining or stale."""
1315
+ temporal = meta.get("temporal_patterns", {})
1316
+ return [rid for rid, tp in temporal.items() if tp.get("status") in ("declining", "stale")]
1317
+
1318
+
1319
+ def filter_hits_by_weights(hits: list[dict[str, Any]], meta: dict[str, Any]) -> list[dict[str, Any]]:
1320
+ """remove hits for patterns that have been learned as not applicable to this voice."""
1321
+ temporal = meta.get("temporal_patterns", {})
1322
+ if not temporal:
1323
+ return hits
1324
+ declined = {}
1325
+ for rid, tp in temporal.items():
1326
+ if tp.get("status") in ("declining", "stale"):
1327
+ declined[rid] = tp.get("confidence", 0)
1328
+ if not declined:
1329
+ return hits
1330
+ return [h for h in hits if h.get("rule") not in declined]
1331
+
1332
+
1333
+ def evolve_profile(
1334
+ profile: dict[str, Any],
1335
+ meta: dict[str, Any],
1336
+ original_text: str,
1337
+ accepted_text: str,
1338
+ original_path: str = "original",
1339
+ accepted_path: str = "accepted",
1340
+ new_samples_text: str | None = None,
1341
+ ) -> tuple[dict[str, Any], dict[str, Any]]:
1342
+ """one-shot evolution: extract signals + update meta + merge profile stats.
1343
+
1344
+ this is the core auto-improvement function. after every writing session:
1345
+ 1. diff original vs accepted to extract accept/override signals
1346
+ 2. update temporal pattern weights in meta
1347
+ 3. merge new sample stats into the profile if new_samples_text is provided
1348
+ 4. filter out declining/stale patterns
1349
+
1350
+ returns (updated_profile, updated_meta).
1351
+ """
1352
+ signal = build_signal_report(original_path, accepted_path, original_text, accepted_text, profile)
1353
+ meta = evolve_meta_from_signal(
1354
+ meta, signal["patterns_accepted"], signal["patterns_overridden"],
1355
+ source_samples=[original_path],
1356
+ )
1357
+ if new_samples_text:
1358
+ profile = update_profile(profile, new_samples_text.strip())
1359
+ return profile, meta
1360
+
1361
+
1362
+ def update_profile(profile: dict[str, Any], new_samples_text: str) -> dict[str, Any]:
1363
+ """merge new writing samples into an existing profile using rolling averages.
1364
+
1365
+ the existing profile's stats are weighted by source_count. new stats
1366
+ get their own count. values that aren't simple averages (opening_moves,
1367
+ anchors) use a merge strategy rather than a formula.
1368
+ """
1369
+ sentence_list = sentences(new_samples_text)
1370
+ paragraph_list = paragraphs(new_samples_text)
1371
+ sentence_lengths = [len(words(s)) for s in sentence_list if words(s)]
1372
+ paragraph_sentence_counts = [max(1, len(sentences(p))) for p in paragraph_list]
1373
+
1374
+ old_count = profile.get("source_count", 1)
1375
+ new_count = 1 # treating this update as one new source
1376
+ total_count = old_count + new_count
1377
+
1378
+ # rolling average for sentence length
1379
+ old_avg_words = profile.get("sentence", {}).get("avg_words", 0)
1380
+ new_avg_words = round(sum(sentence_lengths) / len(sentence_lengths), 1) if sentence_lengths else 0
1381
+ if old_avg_words and new_avg_words:
1382
+ merged_avg_words = round((old_avg_words * old_count + new_avg_words * new_count) / total_count, 1)
1383
+ else:
1384
+ merged_avg_words = old_avg_words or new_avg_words
1385
+
1386
+ # rolling average for paragraph length
1387
+ old_avg_par = profile.get("paragraph", {}).get("avg_sentences", 0)
1388
+ new_avg_par = round(sum(paragraph_sentence_counts) / len(paragraph_sentence_counts), 1) if paragraph_sentence_counts else 0
1389
+ if old_avg_par and new_avg_par:
1390
+ merged_avg_par = round((old_avg_par * old_count + new_avg_par * new_count) / total_count, 1)
1391
+ else:
1392
+ merged_avg_par = old_avg_par or new_avg_par
1393
+
1394
+ # merge opening moves: keep old ones, prepend new top moves
1395
+ existing_moves = profile.get("signature", {}).get("opening_moves", [])
1396
+ new_moves = top_opening_moves(paragraph_list, 4)
1397
+ merged_moves = list(dict.fromkeys(new_moves + existing_moves))[:8]
1398
+
1399
+ # merge anchors: keep old anchors, insert new ones that aren't near-duplicates
1400
+ existing_anchors = profile.get("signature", {}).get("anchors", [])
1401
+ new_anchors = choose_anchors(paragraph_list, 2)
1402
+ seen = {a[:80].lower() for a in existing_anchors}
1403
+ for anchor in new_anchors:
1404
+ if anchor[:80].lower() not in seen:
1405
+ seen.add(anchor[:80].lower())
1406
+ existing_anchors.append(anchor)
1407
+ if len(existing_anchors) >= 5:
1408
+ break
1409
+
1410
+ # rebuild variance label using combined length estimate
1411
+ # we approximate the combined variance since we don't store raw lengths
1412
+ # conservative: keep old variance unless new samples strongly suggest otherwise
1413
+ new_variance = variance_label(sentence_lengths) if len(sentence_lengths) >= 3 else None
1414
+ old_variance = profile.get("sentence", {}).get("variance", "medium")
1415
+ merged_variance = new_variance if new_variance and new_variance != old_variance else old_variance
1416
+
1417
+ # update cadence
1418
+ existing_cadence = profile.get("signature", {}).get("cadence", [])
1419
+ updated_cadence = [
1420
+ f"average sentence length around {merged_avg_words} words",
1421
+ f"sentence length variance is {merged_variance}",
1422
+ f"average paragraph length around {merged_avg_par} sentences",
1423
+ ]
1424
+ case_style = profile.get("signature", {}).get("case_style", "mixed")
1425
+ if case_style == "mostly lowercase" and "leans lowercase in visible prose" not in [c for c in updated_cadence]:
1426
+ updated_cadence.append("leans lowercase in visible prose")
1427
+
1428
+ # rebuild profile
1429
+ profile["source_count"] = total_count
1430
+ profile["word_count"] = profile.get("word_count", 0) + len(words(new_samples_text))
1431
+ profile["sentence"] = {"avg_words": merged_avg_words, "variance": merged_variance}
1432
+ profile["paragraph"] = {"avg_sentences": merged_avg_par}
1433
+ profile["signature"]["opening_moves"] = merged_moves
1434
+ profile["signature"]["anchors"] = existing_anchors[:5]
1435
+ profile["signature"]["cadence"] = updated_cadence
1436
+
1437
+ return profile
1438
+
1439
+
1440
+ def cmd_profile_update(args: argparse.Namespace) -> int:
1441
+ """merge new samples into an existing profile."""
1442
+ profile_path = Path(args.profile).expanduser()
1443
+ if not profile_path.exists():
1444
+ raise SystemExit(f"profile not found: {profile_path}")
1445
+ profile = json.loads(profile_path.read_text(encoding="utf-8", errors="ignore"))
1446
+
1447
+ combined_text = ""
1448
+ for raw_path in args.paths:
1449
+ files = iter_text_files([raw_path])
1450
+ for path in files:
1451
+ combined_text += "\n\n" + read_text(path)
1452
+
1453
+ if not combined_text.strip():
1454
+ print("no new text samples found; profile unchanged")
1455
+ return 0
1456
+
1457
+ profile = update_profile(profile, combined_text.strip())
1458
+ rendered = json.dumps(profile, indent=2, ensure_ascii=False)
1459
+ write_or_print(rendered, args.out)
1460
+ return 0
1461
+
1462
+
1463
+ def cmd_profile_export(args: argparse.Namespace) -> int:
1464
+ """bundle a profile + optional meta into a portable .hyv file."""
1465
+ profile_path = Path(args.profile).expanduser()
1466
+ if not profile_path.exists():
1467
+ raise SystemExit(f"profile not found: {profile_path}")
1468
+ profile = json.loads(profile_path.read_text(encoding="utf-8", errors="ignore"))
1469
+
1470
+ bundle: dict[str, Any] = {
1471
+ "bundle_version": "hold-your-voice-bundle-v1",
1472
+ "exported_at": datetime.datetime.now().isoformat()[:19],
1473
+ "profile": profile,
1474
+ }
1475
+ if args.meta:
1476
+ meta_path = Path(args.meta).expanduser()
1477
+ if meta_path.exists():
1478
+ bundle["meta"] = json.loads(meta_path.read_text(encoding="utf-8", errors="ignore"))
1479
+ write_or_print(json.dumps(bundle, indent=2, ensure_ascii=False), args.out)
1480
+ return 0
1481
+
1482
+
1483
+ def cmd_profile_import(args: argparse.Namespace) -> int:
1484
+ """import a .hyv bundle into a destination profile."""
1485
+ source_path = Path(args.source).expanduser()
1486
+ if not source_path.exists():
1487
+ raise SystemExit(f"source not found: {source_path}")
1488
+ source = json.loads(source_path.read_text(encoding="utf-8", errors="ignore"))
1489
+ if source.get("bundle_version") != "hold-your-voice-bundle-v1":
1490
+ raise SystemExit(f"unknown bundle version: {source.get('bundle_version')}")
1491
+
1492
+ dest_profile: dict[str, Any]
1493
+ dest_path = Path(args.profile).expanduser()
1494
+ if dest_path.exists():
1495
+ dest_profile = json.loads(dest_path.read_text(encoding="utf-8", errors="ignore"))
1496
+ else:
1497
+ dest_profile = {}
1498
+
1499
+ source_profile = source.get("profile", {})
1500
+ src_count = source_profile.get("source_count", 0)
1501
+ dest_count = dest_profile.get("source_count", 0)
1502
+
1503
+ # if destination is empty, this is a pure copy of the source profile
1504
+ if not dest_profile:
1505
+ dest_profile = dict(source_profile)
1506
+ write_or_print(json.dumps(dest_profile, indent=2, ensure_ascii=False), args.profile)
1507
+ print(f"imported into {args.profile}")
1508
+ # merge meta still applies
1509
+ _merge_import_meta(source, args, args.profile)
1510
+ return 0
1511
+
1512
+ src_count = source_profile.get("source_count", 0)
1513
+ dest_count = dest_profile.get("source_count", 0)
1514
+
1515
+ # merge profile: prefer higher source_count for stats
1516
+ if src_count > dest_count:
1517
+ # source has more signal; take its stats
1518
+ dest_profile["source_count"] = dest_count + src_count
1519
+ dest_profile["word_count"] = dest_profile.get("word_count", 0) + source_profile.get("word_count", 0)
1520
+ dest_profile["sentence"] = source_profile.get("sentence", {})
1521
+ dest_profile["paragraph"] = source_profile.get("paragraph", {})
1522
+ # merge signature fields
1523
+ dest_sig = dest_profile.get("signature", {})
1524
+ src_sig = source_profile.get("signature", {})
1525
+ merged_moves = list(dict.fromkeys(src_sig.get("opening_moves", []) + dest_sig.get("opening_moves", [])))[:8]
1526
+ merged_anchors = list(dict.fromkeys(src_sig.get("anchors", []) + dest_sig.get("anchors", [])))[:5]
1527
+ merged_never = list(dict.fromkeys(src_sig.get("never_list", DEFAULT_NEVER_LIST) + dest_sig.get("never_list", DEFAULT_NEVER_LIST)))
1528
+ dest_sig["opening_moves"] = merged_moves
1529
+ dest_sig["anchors"] = merged_anchors
1530
+ dest_sig["never_list"] = merged_never
1531
+ dest_profile["signature"] = dest_sig
1532
+ else:
1533
+ # destination has more or equal signal; keep its stats, merge in source anchors/moves
1534
+ dest_profile["source_count"] = dest_count + src_count
1535
+ dest_profile["word_count"] = dest_profile.get("word_count", 0) + source_profile.get("word_count", 0)
1536
+ dest_sig = dest_profile.get("signature", {})
1537
+ src_sig = source_profile.get("signature", {})
1538
+ merged_moves = list(dict.fromkeys(dest_sig.get("opening_moves", []) + src_sig.get("opening_moves", [])))[:8]
1539
+ merged_anchors = list(dict.fromkeys(dest_sig.get("anchors", []) + src_sig.get("anchors", [])))[:5]
1540
+ dest_sig["opening_moves"] = merged_moves
1541
+ dest_sig["anchors"] = merged_anchors
1542
+ dest_profile["signature"] = dest_sig
1543
+
1544
+ # merge meta if present
1545
+ write_or_print(json.dumps(dest_profile, indent=2, ensure_ascii=False), args.profile)
1546
+ print(f"imported into {args.profile}", end="")
1547
+ _merge_import_meta(source, args, str(dest_path))
1548
+ print()
1549
+ return 0
1550
+
1551
+
1552
+ def _merge_import_meta(source: dict[str, Any], args: argparse.Namespace, dest_profile_path: str) -> None:
1553
+ """merge meta from a .hyv bundle into a destination meta file."""
1554
+ source_meta = source.get("meta", {})
1555
+ if not source_meta:
1556
+ return
1557
+
1558
+ meta_path_str = args.meta
1559
+ if meta_path_str:
1560
+ mpath = Path(meta_path_str).expanduser()
1561
+ else:
1562
+ mpath = Path(dest_profile_path).with_suffix(".meta.json")
1563
+
1564
+ dest_meta: dict[str, Any] = {}
1565
+ if mpath.exists():
1566
+ try:
1567
+ dest_meta = json.loads(mpath.read_text(encoding="utf-8", errors="ignore"))
1568
+ except (json.JSONDecodeError, OSError):
1569
+ dest_meta = {}
1570
+
1571
+ # merge pattern_weights: take higher signal_count
1572
+ dest_weights = dest_meta.get("pattern_weights", {})
1573
+ src_weights = source_meta.get("pattern_weights", {})
1574
+ for key, src_w in src_weights.items():
1575
+ if key not in dest_weights or src_w > dest_weights[key]:
1576
+ dest_weights[key] = src_w
1577
+ if src_weights:
1578
+ dest_meta["pattern_weights"] = dest_weights
1579
+ dest_meta["signal_count"] = dest_meta.get("signal_count", 0) + source_meta.get("signal_count", 0)
1580
+
1581
+ mpath.parent.mkdir(parents=True, exist_ok=True)
1582
+ mpath.write_text(json.dumps(dest_meta, indent=2, ensure_ascii=False), encoding="utf-8")
1583
+ print(f" + {mpath}")
1584
+
1585
+
1586
+ def render_voice_md(profile: dict[str, Any], meta: dict[str, Any]) -> str:
1587
+ """render voice.md — the human-readable voice profile summary."""
1588
+ lines: list[str] = []
1589
+ name = profile.get("name", "unnamed")
1590
+ lines.append(f"# voice for {name}")
1591
+ lines.append("")
1592
+ lines.append("> continuously learned by hold your voice from your writing signals")
1593
+ lines.append("")
1594
+
1595
+ # temporal pattern weights
1596
+ temporal = meta.get("temporal_patterns", {})
1597
+ signal_count = meta.get("signal_count", 0)
1598
+ if temporal:
1599
+ lines.append("# evolved pattern weights")
1600
+ lines.append("")
1601
+ lines.append("| pattern | confidence | status | confirmed |")
1602
+ lines.append("|---------|-----------|--------|-----------|")
1603
+ sorted_patterns = sorted(temporal.items(), key=lambda x: -x[1].get("confidence", 0))
1604
+ for pid, tp in sorted_patterns:
1605
+ w = tp.get("confidence", 0)
1606
+ s = tp.get("status", "active")
1607
+ c = tp.get("last_confirmed", "?")
1608
+ lines.append(f"| {pid} | {w:.2f} | {s} | {c} |")
1609
+ if signal_count:
1610
+ lines.append("")
1611
+ lines.append(f"_(based on {signal_count} signals)_")
1612
+ lines.append("")
1613
+
1614
+ # voice stats section
1615
+ lines.append("# voice stats")
1616
+ lines.append("")
1617
+ sentence = profile.get("sentence", {})
1618
+ paragraph = profile.get("paragraph", {})
1619
+ lines.append(f"- sentence length: {sentence.get('avg_words', '?')} words avg (`{sentence.get('variance', '?')}` variance)")
1620
+ lines.append(f"- paragraph length: {paragraph.get('avg_sentences', '?')} sentences avg")
1621
+ sig = profile.get("signature", {})
1622
+ lines.append(f"- case style: {sig.get('case_style', '?')}")
1623
+ lines.append(f"- argument pattern: {sig.get('argument_pattern', '?')}")
1624
+ lines.append("")
1625
+
1626
+ # cadence
1627
+ cadence = sig.get("cadence", [])
1628
+ if cadence:
1629
+ lines.append("# cadence")
1630
+ lines.append("")
1631
+ for note in cadence:
1632
+ lines.append(f"- {note}")
1633
+ lines.append("")
1634
+
1635
+ # opening moves
1636
+ moves = sig.get("opening_moves", [])
1637
+ if moves:
1638
+ lines.append("# opening moves")
1639
+ lines.append("")
1640
+ for i, move in enumerate(moves[:6], 1):
1641
+ lines.append(f"{i}. \"{move}...\"")
1642
+ lines.append("")
1643
+
1644
+ # never list
1645
+ never_list = sig.get("never_list", [])
1646
+ if never_list:
1647
+ lines.append("# banned patterns")
1648
+ lines.append("")
1649
+ for phrase in never_list:
1650
+ lines.append(f"- {phrase}")
1651
+ lines.append("")
1652
+
1653
+ # anchors
1654
+ anchors = sig.get("anchors", [])
1655
+ if anchors:
1656
+ lines.append("# voice anchors")
1657
+ lines.append("")
1658
+ for anchor in anchors[:3]:
1659
+ lines.append(f"> {anchor[:240]}")
1660
+ lines.append("")
1661
+
1662
+ # sources
1663
+ sources = profile.get("sources", [])
1664
+ if sources:
1665
+ lines.append("# sources")
1666
+ lines.append(f"profile built from {profile.get('source_count', len(sources))} source(s):")
1667
+ for s in sources[:10]:
1668
+ lines.append(f"- [{s.get('path', '?')}]({s.get('path', '?')}) ({s.get('chars', 0)} chars)")
1669
+ lines.append("")
1670
+
1671
+ # meta
1672
+ if signal_count:
1673
+ lines.append("*last updated: {0} | signals processed: {1}*".format(meta.get("last_updated", "unknown"), signal_count))
1674
+ lines.append("")
1675
+
1676
+ return "\n".join(lines)
1677
+
1678
+
1679
+ def cmd_profile_status(args: argparse.Namespace) -> int:
1680
+ """pretty-print the learning state of a profile."""
1681
+ profile_path = Path(args.profile).expanduser()
1682
+ if not profile_path.exists():
1683
+ raise SystemExit(f"profile not found: {profile_path}")
1684
+ profile = json.loads(profile_path.read_text(encoding="utf-8", errors="ignore"))
1685
+
1686
+ # try to load meta if present
1687
+ meta: dict[str, Any] = {}
1688
+ meta_path = None
1689
+ meta_path_str = args.meta
1690
+ if meta_path_str:
1691
+ meta_path = Path(meta_path_str).expanduser()
1692
+ else:
1693
+ meta_path = profile_path.with_suffix(".meta.json")
1694
+ if meta_path and meta_path.exists():
1695
+ try:
1696
+ meta = json.loads(meta_path.read_text(encoding="utf-8", errors="ignore"))
1697
+ except (json.JSONDecodeError, OSError):
1698
+ meta = {}
1699
+
1700
+ lines: list[str] = []
1701
+
1702
+ # header
1703
+ name = profile.get("name", "unnamed")
1704
+ ver = profile.get("profile_version", "?")
1705
+ lines.append(f"voice profile: {name}")
1706
+ lines.append(f" version: {ver}")
1707
+ lines.append(f" source_count: {profile.get('source_count', 0)}")
1708
+ lines.append(f" word_count: {profile.get('word_count', 0)}")
1709
+ signal_count = meta.get("signal_count", 0)
1710
+ lines.append(f" signals_processed: {signal_count}")
1711
+ if meta.get("last_updated"):
1712
+ lines.append(f" last_updated: {meta['last_updated']}")
1713
+ lines.append("")
1714
+
1715
+ # temporal pattern weights
1716
+ temporal = meta.get("temporal_patterns", {})
1717
+ if temporal:
1718
+ lines.append("pattern weights (evolved):")
1719
+ lines.append(f" {'pattern':<35} {'confidence':<12} {'status':<12} {'confirmed':<12}")
1720
+ lines.append(f" {'─'*34:<35} {'─'*11:<12} {'─'*11:<12} {'─'*11:<12}")
1721
+ sorted_patterns = sorted(temporal.items(), key=lambda x: -x[1].get("confidence", 0))
1722
+ for pid, tp in sorted_patterns:
1723
+ w = tp.get("confidence", 0)
1724
+ bar_len = int(w * 20)
1725
+ bar = "█" * bar_len + "░" * (20 - bar_len)
1726
+ status = tp.get("status", "active")
1727
+ confirmed = tp.get("last_confirmed", "?")
1728
+ lines.append(f" {pid:<35} {bar} {w:.2f} {status:<12} {confirmed:<12}")
1729
+ lines.append("")
1730
+
1731
+ # voice stats
1732
+ lines.append("voice stats:")
1733
+ sentence = profile.get("sentence", {})
1734
+ paragraph = profile.get("paragraph", {})
1735
+ lines.append(f" sentence length: {sentence.get('avg_words', '?')} words avg ({sentence.get('variance', '?')} variance)")
1736
+ lines.append(f" paragraph length: {paragraph.get('avg_sentences', '?')} sentences avg")
1737
+ sig = profile.get("signature", {})
1738
+ lines.append(f" case style: {sig.get('case_style', '?')}")
1739
+ lines.append(f" argument pattern: {sig.get('argument_pattern', '?')}")
1740
+ lines.append("")
1741
+
1742
+ # opening moves
1743
+ moves = sig.get("opening_moves", [])
1744
+ if moves:
1745
+ lines.append("top opening moves:")
1746
+ for i, move in enumerate(moves[:6], 1):
1747
+ lines.append(f" {i}. \"{move}...\"")
1748
+ lines.append("")
1749
+
1750
+ # never_list
1751
+ never_list = sig.get("never_list", [])
1752
+ if never_list:
1753
+ lines.append(f"banned phrases: {len(never_list)}")
1754
+ for phrase in never_list[:8]:
1755
+ lines.append(f" - {phrase}")
1756
+ lines.append("")
1757
+
1758
+ # sources
1759
+ sources = profile.get("sources", [])
1760
+ if sources:
1761
+ lines.append(f"sources ({len(sources)}):")
1762
+ for s in sources[:5]:
1763
+ lines.append(f" - {s.get('path', '?')} ({s.get('chars', 0)} chars)")
1764
+ if len(sources) > 5:
1765
+ lines.append(f" ... and {len(sources) - 5} more")
1766
+ lines.append("")
1767
+
1768
+ print("\n".join(lines))
1769
+
1770
+ # optionally write taste markdown
1771
+ if args.write_voice:
1772
+ voice_md = render_voice_md(profile, meta)
1773
+ voice_path = args.write_voice
1774
+ if voice_path == "-":
1775
+ print("--- voice.md ---")
1776
+ print(voice_md)
1777
+ else:
1778
+ out_path = Path(voice_path).expanduser()
1779
+ out_path.parent.mkdir(parents=True, exist_ok=True)
1780
+ out_path.write_text(voice_md, encoding="utf-8")
1781
+ print(f"\nvoice written to {out_path}")
1782
+
1783
+ return 0
1784
+
1785
+
1786
+ def cmd_reinforce(args: argparse.Namespace) -> int:
1787
+ """diff original vs accepted draft and emit a signal report."""
1788
+ orig_path, orig_text = load_draft(args.original)
1789
+ acc_path, acc_text = load_draft(args.accepted)
1790
+ profile: dict[str, Any] | None = None
1791
+ if args.profile:
1792
+ p = Path(args.profile).expanduser()
1793
+ if not p.exists():
1794
+ raise SystemExit(f"profile not found: {p}")
1795
+ profile = json.loads(p.read_text(encoding="utf-8", errors="ignore"))
1796
+ report = build_signal_report(orig_path, acc_path, orig_text, acc_text, profile)
1797
+ write_or_print(json.dumps(report, indent=2, ensure_ascii=False), args.out)
1798
+ return 0
1799
+
1800
+
1801
+ def cmd_profile_evolve(args: argparse.Namespace) -> int:
1802
+ """one-shot evolution: extract signals, update meta, merge profile stats."""
1803
+ profile_path = Path(args.profile).expanduser()
1804
+ if not profile_path.exists():
1805
+ raise SystemExit(f"profile not found: {profile_path}")
1806
+ profile = json.loads(profile_path.read_text(encoding="utf-8", errors="ignore"))
1807
+
1808
+ meta_path = Path(args.meta).expanduser() if args.meta else profile_path.with_suffix(".meta.json")
1809
+ meta: dict[str, Any] = {}
1810
+ if meta_path.exists():
1811
+ try:
1812
+ meta = json.loads(meta_path.read_text(encoding="utf-8", errors="ignore"))
1813
+ except (json.JSONDecodeError, OSError):
1814
+ meta = {}
1815
+
1816
+ orig_path, orig_text = load_draft(args.original)
1817
+ acc_path, acc_text = load_draft(args.accepted)
1818
+
1819
+ new_samples_text: str | None = None
1820
+ if args.new_samples:
1821
+ parts = []
1822
+ for raw_path in args.new_samples:
1823
+ files = iter_text_files([raw_path])
1824
+ for f in files:
1825
+ parts.append(read_text(f))
1826
+ if parts:
1827
+ new_samples_text = "\n\n".join(parts)
1828
+
1829
+ profile, meta = evolve_profile(
1830
+ profile, meta, orig_text, acc_text,
1831
+ original_path=orig_path, accepted_path=acc_path,
1832
+ new_samples_text=new_samples_text,
1833
+ )
1834
+
1835
+ profile_path.write_text(json.dumps(profile, indent=2, ensure_ascii=False), encoding="utf-8")
1836
+ meta_path.parent.mkdir(parents=True, exist_ok=True)
1837
+ meta_path.write_text(json.dumps(meta, indent=2, ensure_ascii=False), encoding="utf-8")
1838
+
1839
+ active = get_active_patterns(meta)
1840
+ declining = get_declining_patterns(meta)
1841
+ print(f"evolved {profile_path}")
1842
+ print(f" meta: {meta_path}")
1843
+ print(f" active patterns: {len(active)}")
1844
+ print(f" declining/stale: {len(declining)}")
1845
+ print(f" total signals: {meta.get('signal_count', 0)}")
1846
+
1847
+ synced = _auto_sync(profile_path, meta_path)
1848
+ if synced:
1849
+ print(f" synced to cloud (R2)")
1850
+ return 0
1851
+
1852
+
1853
+ def write_or_print(value: str, out: str | None) -> None:
1854
+ if out:
1855
+ output_path = Path(out).expanduser()
1856
+ output_path.parent.mkdir(parents=True, exist_ok=True)
1857
+ output_path.write_text(value, encoding="utf-8")
1858
+ print(output_path)
1859
+ else:
1860
+ print(value)
1861
+
1862
+
1863
+ def cmd_profile(args: argparse.Namespace) -> int:
1864
+ profile = build_profile(args.paths, args.name)
1865
+ rendered = json.dumps(profile, indent=2, ensure_ascii=False)
1866
+ write_or_print(rendered, args.out)
1867
+ return 0
1868
+
1869
+
1870
+ def _auto_sync(profile_path: Path, meta_path: Path) -> bool:
1871
+ """try to sync to cloud if hold_voice_sync.py is available and env is configured.
1872
+ syncs only if > 23h since last sync. fails silently if sync script or boto3 is missing."""
1873
+ sync_script = Path(__file__).resolve().parent / "hold_voice_sync.py"
1874
+ if not sync_script.exists():
1875
+ return False
1876
+ import subprocess
1877
+ try:
1878
+ result = subprocess.run(
1879
+ [sys.executable, str(sync_script), "--profile", str(profile_path), "--meta", str(meta_path)],
1880
+ capture_output=True, text=True, timeout=30,
1881
+ )
1882
+ return result.returncode == 0
1883
+ except (subprocess.TimeoutExpired, OSError):
1884
+ return False
1885
+
1886
+
1887
+ def cmd_scan(args: argparse.Namespace) -> int:
1888
+ meta: dict[str, Any] = {}
1889
+ if args.meta:
1890
+ meta_path = Path(args.meta).expanduser()
1891
+ if meta_path.exists():
1892
+ try:
1893
+ meta = json.loads(meta_path.read_text(encoding="utf-8", errors="ignore"))
1894
+ except (json.JSONDecodeError, OSError):
1895
+ pass
1896
+
1897
+ results = []
1898
+ text_outputs = []
1899
+ had_hits = False
1900
+ for raw_path in args.paths:
1901
+ name, text = load_draft(raw_path)
1902
+ hits = scan_text(text)
1903
+ if meta:
1904
+ hits = filter_hits_by_weights(hits, meta)
1905
+ had_hits = had_hits or bool(hits)
1906
+ results.append({"path": name, "issue_count": len(hits), "issues": hits})
1907
+ text_outputs.append(format_scan_text(name, text, hits))
1908
+
1909
+ if args.format == "json":
1910
+ print(json.dumps({"files": results}, indent=2, ensure_ascii=False))
1911
+ else:
1912
+ print("\n\n".join(text_outputs))
1913
+
1914
+ return 2 if args.fail_on_hit and had_hits else 0
1915
+
1916
+
1917
+ def cmd_rewrite_prompt(args: argparse.Namespace) -> int:
1918
+ draft_name, draft = load_draft(args.draft)
1919
+ profile_text = None
1920
+ if args.profile:
1921
+ profile_path = Path(args.profile).expanduser()
1922
+ if not profile_path.exists():
1923
+ raise SystemExit(f"profile not found: {profile_path}")
1924
+ profile_text = profile_path.read_text(encoding="utf-8", errors="ignore")
1925
+ meta: dict[str, Any] | None = None
1926
+ if args.meta:
1927
+ meta_path = Path(args.meta).expanduser()
1928
+ if meta_path.exists():
1929
+ try:
1930
+ meta = json.loads(meta_path.read_text(encoding="utf-8", errors="ignore"))
1931
+ except (json.JSONDecodeError, OSError):
1932
+ meta = None
1933
+ prompt = build_rewrite_prompt(draft_name, draft, profile_text, args.constraints or "", meta=meta)
1934
+ write_or_print(prompt, args.out)
1935
+ return 0
1936
+
1937
+
1938
+ def build_parser() -> argparse.ArgumentParser:
1939
+ parser = argparse.ArgumentParser(description="Portable Hold Your Voice helpers")
1940
+ sub = parser.add_subparsers(dest="command", required=True)
1941
+
1942
+ profile = sub.add_parser("profile", help="build a voice profile from sample files or directories")
1943
+ profile.add_argument("paths", nargs="+", help="sample files or directories")
1944
+ profile.add_argument("--name", default="project voice", help="profile name")
1945
+ profile.add_argument("--out", help="write profile JSON to this path")
1946
+ profile.set_defaults(func=cmd_profile)
1947
+
1948
+ scan = sub.add_parser("scan", help="scan drafts for AI-writing patterns")
1949
+ scan.add_argument("paths", nargs="+", help="draft files, or '-' for stdin")
1950
+ scan.add_argument("--format", choices=["json", "text"], default="json")
1951
+ scan.add_argument("--fail-on-hit", action="store_true", help="exit 2 when issues are found")
1952
+ scan.add_argument("--meta", help="meta JSON file for learned pattern filtering")
1953
+ scan.set_defaults(func=cmd_scan)
1954
+
1955
+ rewrite = sub.add_parser("rewrite-prompt", help="generate a line-level rewrite prompt")
1956
+ rewrite.add_argument("draft", help="draft file, or '-' for stdin")
1957
+ rewrite.add_argument("--profile", help="voice profile JSON file")
1958
+ rewrite.add_argument("--constraints", default="", help="extra rewrite constraints")
1959
+ rewrite.add_argument("--out", help="write prompt to this path")
1960
+ rewrite.add_argument("--meta", help="meta JSON file for learned pattern filtering")
1961
+ rewrite.set_defaults(func=cmd_rewrite_prompt)
1962
+
1963
+ pu = sub.add_parser("profile-update", help="merge new writing samples into an existing profile using rolling averages")
1964
+ pu.add_argument("--profile", required=True, help="existing profile JSON file")
1965
+ pu.add_argument("paths", nargs="+", help="new sample files or directories")
1966
+ pu.add_argument("--out", help="write updated profile to this path (default: in-place)")
1967
+ pu.set_defaults(func=cmd_profile_update)
1968
+
1969
+ pex = sub.add_parser("profile-export", help="bundle a voice profile into a portable .hyv file")
1970
+ pex.add_argument("--profile", required=True, help="voice profile JSON file")
1971
+ pex.add_argument("--meta", help="optional signal meta JSON file to include")
1972
+ pex.add_argument("--out", required=True, help="output .hyv file path")
1973
+ pex.set_defaults(func=cmd_profile_export)
1974
+
1975
+ pim = sub.add_parser("profile-import", help="import a .hyv bundle into a destination profile")
1976
+ pim.add_argument("--profile", required=True, help="destination profile JSON file (will be updated)")
1977
+ pim.add_argument("--meta", help="destination meta JSON file path (default: profile path with .meta.json)")
1978
+ pim.add_argument("--source", required=True, help=".hyv bundle file to import from")
1979
+ pim.set_defaults(func=cmd_profile_import)
1980
+
1981
+ pst = sub.add_parser("profile-status", help="pretty-print the learning state of a profile")
1982
+ pst.add_argument("--profile", required=True, help="voice profile JSON file")
1983
+ pst.add_argument("--meta", help="signal meta JSON file (default: profile path with .meta.json)")
1984
+ pst.add_argument("--write-voice", nargs="?", const="-", default=None,
1985
+ help="write voice.md (optional path; no arg = stdout)")
1986
+ pst.set_defaults(func=cmd_profile_status)
1987
+
1988
+ reinforce = sub.add_parser("reinforce", help="diff original vs accepted draft to extract learning signals")
1989
+ reinforce.add_argument("--original", required=True, help="original draft file, or '-' for stdin")
1990
+ reinforce.add_argument("--accepted", required=True, help="accepted/final draft file, or '-' for stdin")
1991
+ reinforce.add_argument("--profile", help="voice profile JSON file (optional)")
1992
+ reinforce.add_argument("--out", help="write signal report to this path")
1993
+ reinforce.set_defaults(func=cmd_reinforce)
1994
+
1995
+ pev = sub.add_parser("profile-evolve", help="one-shot evolution: signal extraction + meta update + profile merge")
1996
+ pev.add_argument("--original", required=True, help="original (AI) draft file, or '-' for stdin")
1997
+ pev.add_argument("--accepted", required=True, help="accepted (user-edited) draft file, or '-' for stdin")
1998
+ pev.add_argument("--profile", required=True, help="voice profile JSON file")
1999
+ pev.add_argument("--meta", help="meta JSON file path (default: profile path with .meta.json)")
2000
+ pev.add_argument("--new-samples", nargs="*", default=None, help="additional new writing samples to merge (optional)")
2001
+ pev.set_defaults(func=cmd_profile_evolve)
2002
+
2003
+ return parser
2004
+
2005
+
2006
+ def main(argv: list[str] | None = None) -> int:
2007
+ parser = build_parser()
2008
+ args = parser.parse_args(argv)
2009
+ return args.func(args)
2010
+
2011
+
2012
+ if __name__ == "__main__":
2013
+ raise SystemExit(main())