@holdyourvoice/hyv 2.3.1 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +97 -65
- package/package.json +18 -36
- package/scripts/hold_voice.py +1005 -26
- package/agents/chatgpt.md +0 -18
- package/agents/claude-code.md +0 -39
- package/agents/codex.md +0 -22
- package/agents/cursor.md +0 -19
- package/agents/generic.md +0 -30
- package/agents/windsurf.md +0 -12
- package/assets/ai-eliminator-skill.md +0 -63
- package/assets/chatgpt-instructions.txt +0 -8
- package/assets/claude-code-skill.md +0 -24
- package/assets/cursor-rules.md +0 -12
- package/assets/hold-your-voice-skill.md +0 -174
- package/assets/voice-dna-template.md +0 -88
- package/assets/voice-matcher-skill.md +0 -57
- package/dist/index.js +0 -14728
- package/scripts/postinstall.js +0 -110
package/scripts/hold_voice.py
CHANGED
|
@@ -582,6 +582,49 @@ ABSTRACT_STYLE_WORDS = {
|
|
|
582
582
|
"reinvention",
|
|
583
583
|
}
|
|
584
584
|
|
|
585
|
+
# --- Expanded AI vocabulary for 2025-2026 models ---
|
|
586
|
+
AI_VOCAB_EXPANDED = {
|
|
587
|
+
# GPT-4o / Claude fingerprint words
|
|
588
|
+
"inherently", "underscores", "arguably", "notably", "intrinsically",
|
|
589
|
+
"fundamentally", "nuanced", "multifaceted", "underscores", "encapsulate",
|
|
590
|
+
"underscores", "delve", "tapestry", "underscore", "testament",
|
|
591
|
+
# Phrase-level compounds (checked as substrings)
|
|
592
|
+
"in the realm of", "it's worth diving into", "the intersection of",
|
|
593
|
+
"a nuanced understanding", "the broader implications", "shed light on",
|
|
594
|
+
"robust framework", "it's important to note", "worth noting that",
|
|
595
|
+
"at the end of the day", "the reality is", "here's the thing",
|
|
596
|
+
# 2025-2026 model fingerprints
|
|
597
|
+
"it's worth mentioning", "let's unpack", "let's break down",
|
|
598
|
+
"to put it simply", "in a nutshell", "the bottom line",
|
|
599
|
+
"what's fascinating", "what's interesting", "what's remarkable",
|
|
600
|
+
"the key takeaway", "the key insight", "the key difference",
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
# --- Writing craft signals (from Magnetic Email principles) ---
|
|
604
|
+
STORYTELLING_SIGNALS = re.compile(
|
|
605
|
+
r"\b(?:yesterday|last\s+(?:week|month|year|night)|this\s+morning|earlier\s+today)\b|"
|
|
606
|
+
r"\b(?:i\s+was\s+(?:sitting|standing|walking|driving|lying)|we\s+were\s+(?:enjoying|having|drinking))\b|"
|
|
607
|
+
r"\b(?:my\s+(?:wife|husband|friend|mother|father|brother|sister|colleague)\s+(?:said|told|asked|laughed))\b|"
|
|
608
|
+
r"\b(?:i\s+remember|i\s+recall|i\s+once|i\s+used\s+to)\b|"
|
|
609
|
+
r"\b(?:the\s+sort\s+of|the\s+kind\s+of)\s+\w+\s+(?:you|that)\b",
|
|
610
|
+
re.I,
|
|
611
|
+
)
|
|
612
|
+
|
|
613
|
+
CONVERSATIONAL_SIGNALS = re.compile(
|
|
614
|
+
r"\b(?:let'?s\s+be\s+real|look|listen|here'?s\s+what|here'?s\s+why|think\s+about\s+it)\b|"
|
|
615
|
+
r"\b(?:you\s+know|right\?|see\?|get\s+it\?|makes\s+sense\?)\b|"
|
|
616
|
+
r"\b(?:i'?m\s+not\s+(?:gonna|going\s+to)\s+lie|i'?ll\s+be\s+honest|real\s+talk)\b|"
|
|
617
|
+
r"\b(?:picture\s+this|imagine\s+this|close\s+your\s+eyes)\b|"
|
|
618
|
+
r"\b(?:by\s+the\s+way|btw|funny\s+thing|random\s+thought)\b",
|
|
619
|
+
re.I,
|
|
620
|
+
)
|
|
621
|
+
|
|
622
|
+
SPECIFICITY_SIGNALS = re.compile(
|
|
623
|
+
r"\b\d{1,3}(?:,\d{3})*(?:\.\d+)?(?:%|percent|k|K|M|B)?\b|"
|
|
624
|
+
r"\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2}\b|"
|
|
625
|
+
r"\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)+\b", # Proper nouns
|
|
626
|
+
)
|
|
627
|
+
|
|
585
628
|
GENERIC_OPENERS = re.compile(
|
|
586
629
|
r"^(?:most|many|some|all)\s+(?:brands|teams|people|founders|companies|businesses|organizations|leaders)\b|"
|
|
587
630
|
r"^(?:in\s+)?(?:today'?s|the)\s+(?:fast.paced|ever.evolving|modern|digital|current|contemporary)\s+(?:world|age|era|landscape|economy)\b",
|
|
@@ -716,6 +759,535 @@ def infer_argument_pattern(text: str) -> str:
|
|
|
716
759
|
return "mixed"
|
|
717
760
|
|
|
718
761
|
|
|
762
|
+
# =============================================================================
|
|
763
|
+
# VOICE-FIRST ANALYSIS FUNCTIONS
|
|
764
|
+
# =============================================================================
|
|
765
|
+
|
|
766
|
+
def vocabulary_fingerprint(text: str, limit: int = 50) -> dict[str, Any]:
|
|
767
|
+
"""Extract vocabulary fingerprint: distinctive words, signature phrases, sentence starters."""
|
|
768
|
+
word_list = [w.lower() for w in words(text)]
|
|
769
|
+
total = len(word_list)
|
|
770
|
+
if total < 10:
|
|
771
|
+
return {"distinctive_words": [], "signature_phrases": [], "sentence_starters": [], "total_words": total}
|
|
772
|
+
|
|
773
|
+
# Word frequency
|
|
774
|
+
freq: dict[str, int] = {}
|
|
775
|
+
for w in word_list:
|
|
776
|
+
freq[w] = freq.get(w, 0) + 1
|
|
777
|
+
|
|
778
|
+
# Distinctive words: appear 2+ times but not in top 50 most common English words
|
|
779
|
+
COMMON_WORDS = {
|
|
780
|
+
"the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
|
|
781
|
+
"have", "has", "had", "do", "does", "did", "will", "would", "could",
|
|
782
|
+
"should", "may", "might", "shall", "can", "to", "of", "in", "for",
|
|
783
|
+
"on", "with", "at", "by", "from", "as", "into", "through", "during",
|
|
784
|
+
"before", "after", "above", "below", "between", "and", "but", "or",
|
|
785
|
+
"nor", "not", "so", "yet", "both", "either", "neither", "each",
|
|
786
|
+
"every", "all", "any", "few", "more", "most", "other", "some", "such",
|
|
787
|
+
"no", "only", "own", "same", "than", "too", "very", "just", "because",
|
|
788
|
+
"if", "when", "where", "how", "what", "which", "who", "whom", "this",
|
|
789
|
+
"that", "these", "those", "i", "me", "my", "we", "our", "you", "your",
|
|
790
|
+
"he", "him", "his", "she", "her", "it", "its", "they", "them", "their",
|
|
791
|
+
}
|
|
792
|
+
distinctive = sorted(
|
|
793
|
+
[(w, c) for w, c in freq.items() if c >= 2 and w not in COMMON_WORDS and len(w) > 2],
|
|
794
|
+
key=lambda x: -x[1]
|
|
795
|
+
)[:limit]
|
|
796
|
+
|
|
797
|
+
# Signature phrases: recurring 2-4 word combinations
|
|
798
|
+
bigrams: dict[str, int] = {}
|
|
799
|
+
trigrams: dict[str, int] = {}
|
|
800
|
+
for i in range(len(word_list) - 1):
|
|
801
|
+
bg = f"{word_list[i]} {word_list[i+1]}"
|
|
802
|
+
bigrams[bg] = bigrams.get(bg, 0) + 1
|
|
803
|
+
for i in range(len(word_list) - 2):
|
|
804
|
+
tg = f"{word_list[i]} {word_list[i+1]} {word_list[i+2]}"
|
|
805
|
+
trigrams[tg] = trigrams.get(tg, 0) + 1
|
|
806
|
+
|
|
807
|
+
signature_phrases = []
|
|
808
|
+
for phrase, count in sorted(bigrams.items(), key=lambda x: -x[1]):
|
|
809
|
+
if count >= 3 and phrase.split()[0] not in COMMON_WORDS:
|
|
810
|
+
signature_phrases.append({"phrase": phrase, "count": count})
|
|
811
|
+
for phrase, count in sorted(trigrams.items(), key=lambda x: -x[1]):
|
|
812
|
+
if count >= 2:
|
|
813
|
+
signature_phrases.append({"phrase": phrase, "count": count})
|
|
814
|
+
signature_phrases = sorted(signature_phrases, key=lambda x: -x["count"])[:20]
|
|
815
|
+
|
|
816
|
+
# Sentence starters: first 2-3 words of sentences
|
|
817
|
+
sentence_list = sentences(text)
|
|
818
|
+
starters: dict[str, int] = {}
|
|
819
|
+
for sent in sentence_list:
|
|
820
|
+
sw = words(sent.lower())[:3]
|
|
821
|
+
if len(sw) >= 2:
|
|
822
|
+
key = " ".join(sw)
|
|
823
|
+
starters[key] = starters.get(key, 0) + 1
|
|
824
|
+
top_starters = sorted(starters.items(), key=lambda x: -x[1])[:10]
|
|
825
|
+
|
|
826
|
+
return {
|
|
827
|
+
"distinctive_words": [{"word": w, "count": c} for w, c in distinctive],
|
|
828
|
+
"signature_phrases": signature_phrases,
|
|
829
|
+
"sentence_starters": [{"phrase": p, "count": c} for p, c in top_starters],
|
|
830
|
+
"total_words": total,
|
|
831
|
+
"unique_words": len(freq),
|
|
832
|
+
}
|
|
833
|
+
|
|
834
|
+
|
|
835
|
+
def rhythm_markov(text: str) -> dict[str, Any]:
|
|
836
|
+
"""Build a Markov transition matrix for sentence length patterns.
|
|
837
|
+
Captures the writer's rhythm: how short sentences follow long ones and vice versa."""
|
|
838
|
+
sentence_list = sentences(text)
|
|
839
|
+
lengths = [len(words(s)) for s in sentence_list if words(s)]
|
|
840
|
+
if len(lengths) < 5:
|
|
841
|
+
return {"transitions": {}, "length_buckets": [], "pattern": "insufficient_data"}
|
|
842
|
+
|
|
843
|
+
# Bucket sentence lengths into: short (1-8), medium (9-16), long (17-25), very_long (26+)
|
|
844
|
+
def bucket(l: int) -> str:
|
|
845
|
+
if l <= 8:
|
|
846
|
+
return "short"
|
|
847
|
+
if l <= 16:
|
|
848
|
+
return "medium"
|
|
849
|
+
if l <= 25:
|
|
850
|
+
return "long"
|
|
851
|
+
return "very_long"
|
|
852
|
+
|
|
853
|
+
bucketed = [bucket(l) for l in lengths]
|
|
854
|
+
|
|
855
|
+
# Build transition counts
|
|
856
|
+
transitions: dict[str, dict[str, int]] = {}
|
|
857
|
+
for i in range(len(bucketed) - 1):
|
|
858
|
+
src = bucketed[i]
|
|
859
|
+
dst = bucketed[i + 1]
|
|
860
|
+
if src not in transitions:
|
|
861
|
+
transitions[src] = {}
|
|
862
|
+
transitions[src][dst] = transitions[src].get(dst, 0) + 1
|
|
863
|
+
|
|
864
|
+
# Normalize to probabilities
|
|
865
|
+
transition_probs: dict[str, dict[str, float]] = {}
|
|
866
|
+
for src, dsts in transitions.items():
|
|
867
|
+
total = sum(dsts.values())
|
|
868
|
+
transition_probs[src] = {dst: round(count / total, 3) for dst, count in dsts.items()}
|
|
869
|
+
|
|
870
|
+
# Compute bucket distribution
|
|
871
|
+
bucket_counts: dict[str, int] = {}
|
|
872
|
+
for b in bucketed:
|
|
873
|
+
bucket_counts[b] = bucket_counts.get(b, 0) + 1
|
|
874
|
+
bucket_dist = {b: round(c / len(bucketed), 3) for b, c in bucket_counts.items()}
|
|
875
|
+
|
|
876
|
+
# Detect dominant rhythm pattern
|
|
877
|
+
dominant = max(bucket_dist, key=bucket_dist.get) if bucket_dist else "mixed"
|
|
878
|
+
if bucket_dist.get("medium", 0) > 0.6:
|
|
879
|
+
pattern = "uniform_medium" # AI-like
|
|
880
|
+
elif bucket_dist.get("short", 0) > 0.4 and bucket_dist.get("long", 0) + bucket_dist.get("very_long", 0) > 0.2:
|
|
881
|
+
pattern = "punchy_mixed" # Human-like conversational
|
|
882
|
+
elif len(set(bucketed)) >= 3:
|
|
883
|
+
pattern = "varied" # Human-like diverse
|
|
884
|
+
else:
|
|
885
|
+
pattern = dominant
|
|
886
|
+
|
|
887
|
+
return {
|
|
888
|
+
"transitions": transition_probs,
|
|
889
|
+
"distribution": bucket_dist,
|
|
890
|
+
"pattern": pattern,
|
|
891
|
+
"avg_length": round(sum(lengths) / len(lengths), 1),
|
|
892
|
+
"length_variance": round(math.sqrt(sum((l - sum(lengths)/len(lengths))**2 for l in lengths) / len(lengths)), 1),
|
|
893
|
+
}
|
|
894
|
+
|
|
895
|
+
|
|
896
|
+
def emotional_tone(text: str) -> dict[str, float]:
|
|
897
|
+
"""Score text on simple emotional axes using keyword-based scoring.
|
|
898
|
+
Returns formality, energy, cynicism, warmth scores (0-10)."""
|
|
899
|
+
low = text.lower()
|
|
900
|
+
word_list = [w.lower() for w in words(low)]
|
|
901
|
+
total = max(1, len(word_list))
|
|
902
|
+
|
|
903
|
+
# Formality: formal words vs casual words
|
|
904
|
+
FORMAL = {"therefore", "furthermore", "moreover", "consequently", "nevertheless", "hence",
|
|
905
|
+
"accordingly", "thus", "whereby", "herein", "thereof", "wherein", "shall", "henceforth"}
|
|
906
|
+
CASUAL = {"gonna", "wanna", "gotta", "kinda", "sorta", "yeah", "nah", "yep", "nope",
|
|
907
|
+
"ok", "okay", "cool", "awesome", "stuff", "things", "basically", "honestly",
|
|
908
|
+
"literally", "totally", "pretty", "super", "really", "damn", "hell", "crap"}
|
|
909
|
+
formal_count = sum(1 for w in word_list if w in FORMAL)
|
|
910
|
+
casual_count = sum(1 for w in word_list if w in CASUAL)
|
|
911
|
+
contractions = len(re.findall(r"\b(?:n't|'re|'ve|'ll|'d|'m|'s)\b", low))
|
|
912
|
+
formality = max(0, min(10, 5 + (formal_count - casual_count - contractions * 0.3) * 10 / total))
|
|
913
|
+
|
|
914
|
+
# Energy: exclamation marks, short sentences, action verbs
|
|
915
|
+
exclamations = text.count("!")
|
|
916
|
+
short_sents = sum(1 for s in sentences(text) if len(words(s)) <= 6)
|
|
917
|
+
ACTION_VERBS = {"go", "run", "build", "create", "make", "do", "get", "take", "start",
|
|
918
|
+
"stop", "push", "pull", "drive", "hit", "crush", "nail", "smash", "kill"}
|
|
919
|
+
action_count = sum(1 for w in word_list if w in ACTION_VERBS)
|
|
920
|
+
sent_count = max(1, len(sentences(text)))
|
|
921
|
+
energy = max(0, min(10, 3 + exclamations * 2 / sent_count + short_sents / sent_count * 3 + action_count * 5 / total))
|
|
922
|
+
|
|
923
|
+
# Cynicism: negative qualifiers, hedging, dismissive words
|
|
924
|
+
CYNICAL = {"but", "however", "unfortunately", "sadly", "honestly", "actually", "look",
|
|
925
|
+
"listen", "truth", "reality", "problem", "issue", "broken", "failed", "wrong",
|
|
926
|
+
"terrible", "awful", "garbage", "rubbish", "crap", "bullshit", "stupid"}
|
|
927
|
+
cyn_count = sum(1 for w in word_list if w in CYNICAL)
|
|
928
|
+
cynicism = max(0, min(10, 2 + cyn_count * 8 / total))
|
|
929
|
+
|
|
930
|
+
# Warmth: personal pronouns, empathy words, inclusive language
|
|
931
|
+
WARMTH = {"we", "us", "our", "together", "friend", "love", "care", "hope", "wish",
|
|
932
|
+
"happy", "glad", "grateful", "thankful", "appreciate", "welcome", "please"}
|
|
933
|
+
warmth_count = sum(1 for w in word_list if w in WARMTH)
|
|
934
|
+
first_person = sum(1 for w in word_list if w in {"i", "me", "my", "we", "us", "our"})
|
|
935
|
+
warmth = max(0, min(10, 3 + warmth_count * 8 / total + first_person * 3 / total))
|
|
936
|
+
|
|
937
|
+
return {
|
|
938
|
+
"formality": round(formality, 1),
|
|
939
|
+
"energy": round(energy, 1),
|
|
940
|
+
"cynicism": round(cynicism, 1),
|
|
941
|
+
"warmth": round(warmth, 1),
|
|
942
|
+
}
|
|
943
|
+
|
|
944
|
+
|
|
945
|
+
def vocabulary_diversity(text: str) -> dict[str, float]:
|
|
946
|
+
"""Compute vocabulary diversity metrics: TTR, Yule's K, hapax ratio."""
|
|
947
|
+
word_list = [w.lower() for w in words(text)]
|
|
948
|
+
total = len(word_list)
|
|
949
|
+
if total < 20:
|
|
950
|
+
return {"ttr": 0, "yules_k": 0, "hapax_ratio": 0, "total_words": total}
|
|
951
|
+
|
|
952
|
+
freq: dict[str, int] = {}
|
|
953
|
+
for w in word_list:
|
|
954
|
+
freq[w] = freq.get(w, 0) + 1
|
|
955
|
+
|
|
956
|
+
# Type-Token Ratio (unique / total)
|
|
957
|
+
ttr = len(freq) / total
|
|
958
|
+
|
|
959
|
+
# Hapax legomena ratio (words appearing once / total)
|
|
960
|
+
hapax = sum(1 for c in freq.values() if c == 1)
|
|
961
|
+
hapax_ratio = hapax / total
|
|
962
|
+
|
|
963
|
+
# Yule's K (vocabulary richness — lower is more diverse)
|
|
964
|
+
freq_of_freq: dict[int, int] = {}
|
|
965
|
+
for c in freq.values():
|
|
966
|
+
freq_of_freq[c] = freq_of_freq.get(c, 0) + 1
|
|
967
|
+
yules_k = 10000 * sum(i * i * freq_of_freq.get(i, 0) for i in range(1, max(freq_of_freq.keys(), default=0) + 1)) / (total * total) if total > 0 else 0
|
|
968
|
+
|
|
969
|
+
return {
|
|
970
|
+
"ttr": round(ttr, 3),
|
|
971
|
+
"yules_k": round(yules_k, 1),
|
|
972
|
+
"hapax_ratio": round(hapax_ratio, 3),
|
|
973
|
+
"total_words": total,
|
|
974
|
+
"unique_words": len(freq),
|
|
975
|
+
}
|
|
976
|
+
|
|
977
|
+
|
|
978
|
+
def ngram_repetition(text: str) -> dict[str, Any]:
|
|
979
|
+
"""Detect repeated n-gram patterns that indicate AI-like repetition."""
|
|
980
|
+
word_list = [w.lower() for w in words(text)]
|
|
981
|
+
if len(word_list) < 20:
|
|
982
|
+
return {"repeated_trigrams": [], "echo_score": 0}
|
|
983
|
+
|
|
984
|
+
# Trigram frequency
|
|
985
|
+
trigrams: dict[str, int] = {}
|
|
986
|
+
for i in range(len(word_list) - 2):
|
|
987
|
+
tg = f"{word_list[i]} {word_list[i+1]} {word_list[i+2]}"
|
|
988
|
+
trigrams[tg] = trigrams.get(tg, 0) + 1
|
|
989
|
+
|
|
990
|
+
# Repeated trigrams (3+ times)
|
|
991
|
+
repeated = sorted(
|
|
992
|
+
[(tg, c) for tg, c in trigrams.items() if c >= 3],
|
|
993
|
+
key=lambda x: -x[1]
|
|
994
|
+
)[:20]
|
|
995
|
+
|
|
996
|
+
# 4-gram frequency
|
|
997
|
+
fourgrams: dict[str, int] = {}
|
|
998
|
+
for i in range(len(word_list) - 3):
|
|
999
|
+
fg = f"{word_list[i]} {word_list[i+1]} {word_list[i+2]} {word_list[i+3]}"
|
|
1000
|
+
fourgrams[fg] = fourgrams.get(fg, 0) + 1
|
|
1001
|
+
repeated_4 = sorted(
|
|
1002
|
+
[(fg, c) for fg, c in fourgrams.items() if c >= 2],
|
|
1003
|
+
key=lambda x: -x[1]
|
|
1004
|
+
)[:10]
|
|
1005
|
+
|
|
1006
|
+
# Echo score: proportion of words that are part of repeated trigrams
|
|
1007
|
+
words_in_repeats = sum(c * 3 for _, c in repeated)
|
|
1008
|
+
echo_score = min(1.0, words_in_repeats / max(1, len(word_list)))
|
|
1009
|
+
|
|
1010
|
+
return {
|
|
1011
|
+
"repeated_trigrams": [{"phrase": t, "count": c} for t, c in repeated],
|
|
1012
|
+
"repeated_fourgrams": [{"phrase": f, "count": c} for f, c in repeated_4],
|
|
1013
|
+
"echo_score": round(echo_score, 3),
|
|
1014
|
+
}
|
|
1015
|
+
|
|
1016
|
+
|
|
1017
|
+
def perplexity_proxy(text: str) -> dict[str, Any]:
|
|
1018
|
+
"""Estimate perplexity using word transition predictability.
|
|
1019
|
+
Low perplexity = predictable = AI-like. High perplexity = surprising = human-like."""
|
|
1020
|
+
word_list = [w.lower() for w in words(text)]
|
|
1021
|
+
if len(word_list) < 10:
|
|
1022
|
+
return {"avg_predictability": 0, "low_perplexity_sentences": [], "score": 0}
|
|
1023
|
+
|
|
1024
|
+
# Build bigram frequencies from the text itself
|
|
1025
|
+
bigrams: dict[str, dict[str, int]] = {}
|
|
1026
|
+
for i in range(len(word_list) - 1):
|
|
1027
|
+
w1, w2 = word_list[i], word_list[i + 1]
|
|
1028
|
+
if w1 not in bigrams:
|
|
1029
|
+
bigrams[w1] = {}
|
|
1030
|
+
bigrams[w1][w2] = bigrams[w1].get(w2, 0) + 1
|
|
1031
|
+
|
|
1032
|
+
# Score each sentence for predictability
|
|
1033
|
+
sentence_list = sentences(text)
|
|
1034
|
+
sentence_scores: list[tuple[int, float, str]] = []
|
|
1035
|
+
for sent in sentence_list:
|
|
1036
|
+
sw = [w.lower() for w in words(sent)]
|
|
1037
|
+
if len(sw) < 3:
|
|
1038
|
+
continue
|
|
1039
|
+
predictability = 0
|
|
1040
|
+
count = 0
|
|
1041
|
+
for i in range(len(sw) - 1):
|
|
1042
|
+
w1, w2 = sw[i], sw[i + 1]
|
|
1043
|
+
if w1 in bigrams:
|
|
1044
|
+
total_transitions = sum(bigrams[w1].values())
|
|
1045
|
+
w2_freq = bigrams[w1].get(w2, 0)
|
|
1046
|
+
predictability += w2_freq / total_transitions
|
|
1047
|
+
count += 1
|
|
1048
|
+
if count > 0:
|
|
1049
|
+
avg_pred = predictability / count
|
|
1050
|
+
line_no = text[:text.find(sent)].count("\n") + 1 if sent in text else 0
|
|
1051
|
+
sentence_scores.append((line_no, avg_pred, sent.strip()[:120]))
|
|
1052
|
+
|
|
1053
|
+
# Flag sentences with unusually high predictability (> 0.7)
|
|
1054
|
+
low_perplexity = [(line, score, sent) for line, score, sent in sentence_scores if score > 0.7]
|
|
1055
|
+
low_perplexity.sort(key=lambda x: -x[1])
|
|
1056
|
+
|
|
1057
|
+
overall_avg = sum(s for _, s, _ in sentence_scores) / max(1, len(sentence_scores))
|
|
1058
|
+
|
|
1059
|
+
return {
|
|
1060
|
+
"avg_predictability": round(overall_avg, 3),
|
|
1061
|
+
"low_perplexity_sentences": [
|
|
1062
|
+
{"line": l, "score": round(s, 3), "text": t}
|
|
1063
|
+
for l, s, t in low_perplexity[:10]
|
|
1064
|
+
],
|
|
1065
|
+
"score": round(overall_avg, 3), # Higher = more predictable = more AI-like
|
|
1066
|
+
}
|
|
1067
|
+
|
|
1068
|
+
|
|
1069
|
+
def cross_pattern_density(hits: list[dict[str, Any]], text: str) -> list[dict[str, Any]]:
|
|
1070
|
+
"""Compute pattern density per paragraph. High density = strong AI signal."""
|
|
1071
|
+
paragraph_list = paragraphs(text)
|
|
1072
|
+
if not paragraph_list:
|
|
1073
|
+
return []
|
|
1074
|
+
|
|
1075
|
+
results = []
|
|
1076
|
+
offset = 0
|
|
1077
|
+
for para in paragraph_list:
|
|
1078
|
+
para_start = text.find(para, offset)
|
|
1079
|
+
if para_start == -1:
|
|
1080
|
+
offset += 1
|
|
1081
|
+
continue
|
|
1082
|
+
para_end = para_start + len(para)
|
|
1083
|
+
para_line = text[:para_start].count("\n") + 1
|
|
1084
|
+
para_word_count = len(words(para))
|
|
1085
|
+
|
|
1086
|
+
# Count hits in this paragraph
|
|
1087
|
+
para_hits = [
|
|
1088
|
+
h for h in hits
|
|
1089
|
+
if h.get("line", 0) >= para_line and h.get("line", 0) <= para_line + para.count("\n")
|
|
1090
|
+
]
|
|
1091
|
+
|
|
1092
|
+
if para_word_count >= 20:
|
|
1093
|
+
density = len(para_hits) / para_word_count
|
|
1094
|
+
if density > 0.05: # 5% of words trigger patterns
|
|
1095
|
+
results.append({
|
|
1096
|
+
"line": para_line,
|
|
1097
|
+
"density": round(density, 3),
|
|
1098
|
+
"hits": len(para_hits),
|
|
1099
|
+
"words": para_word_count,
|
|
1100
|
+
"text": para.strip()[:160],
|
|
1101
|
+
})
|
|
1102
|
+
|
|
1103
|
+
offset = para_end
|
|
1104
|
+
|
|
1105
|
+
return sorted(results, key=lambda x: -x["density"])[:10]
|
|
1106
|
+
|
|
1107
|
+
|
|
1108
|
+
def storytelling_score(text: str) -> dict[str, Any]:
|
|
1109
|
+
"""Score text for storytelling elements (TLS: Time, Location, Senses).
|
|
1110
|
+
Based on Kieran Drew's Magnetic Email principles."""
|
|
1111
|
+
low = text.lower()
|
|
1112
|
+
sentence_list = sentences(text)
|
|
1113
|
+
total_sents = max(1, len(sentence_list))
|
|
1114
|
+
|
|
1115
|
+
# Time references
|
|
1116
|
+
time_pattern = re.compile(
|
|
1117
|
+
r"\b(?:yesterday|last\s+(?:week|month|year|night)|this\s+morning|earlier\s+today|"
|
|
1118
|
+
r"monday|tuesday|wednesday|thursday|friday|saturday|sunday|"
|
|
1119
|
+
r"\d{1,2}(?:am|pm)|o'?clock|morning|evening|afternoon)\b", re.I
|
|
1120
|
+
)
|
|
1121
|
+
time_hits = len(time_pattern.findall(low))
|
|
1122
|
+
|
|
1123
|
+
# Location references
|
|
1124
|
+
location_pattern = re.compile(
|
|
1125
|
+
r"\b(?:at\s+the|in\s+the|on\s+the|inside|outside|upstairs|downstairs|"
|
|
1126
|
+
r"kitchen|office|gym|cafe|coffee\s+shop|restaurant|car|train|plane|bed)\b", re.I
|
|
1127
|
+
)
|
|
1128
|
+
location_hits = len(location_pattern.findall(low))
|
|
1129
|
+
|
|
1130
|
+
# Sensory words
|
|
1131
|
+
senses_pattern = re.compile(
|
|
1132
|
+
r"\b(?:saw|heard|felt|tasted|smelled|smelt|touch|touched|"
|
|
1133
|
+
r"bright|dark|loud|quiet|warm|cold|hot|sweet|bitter|sour|"
|
|
1134
|
+
r"soft|hard|smooth|rough|wet|dry|sharp|dull)\b", re.I
|
|
1135
|
+
)
|
|
1136
|
+
senses_hits = len(senses_pattern.findall(low))
|
|
1137
|
+
|
|
1138
|
+
# Dialogue
|
|
1139
|
+
dialogue_hits = len(re.findall(r'[""\u201c\u201d]', text))
|
|
1140
|
+
|
|
1141
|
+
# Story opener (snapshot pattern)
|
|
1142
|
+
story_opener = bool(STORYTELLING_SIGNALS.search(text[:500]))
|
|
1143
|
+
|
|
1144
|
+
# Compute score
|
|
1145
|
+
tls_score = min(1.0, (time_hits + location_hits + senses_hits + dialogue_hits) / max(1, total_sents * 0.3))
|
|
1146
|
+
|
|
1147
|
+
return {
|
|
1148
|
+
"score": round(tls_score, 3),
|
|
1149
|
+
"time_references": time_hits,
|
|
1150
|
+
"location_references": location_hits,
|
|
1151
|
+
"sensory_words": senses_hits,
|
|
1152
|
+
"dialogue_markers": dialogue_hits,
|
|
1153
|
+
"has_story_opener": story_opener,
|
|
1154
|
+
}
|
|
1155
|
+
|
|
1156
|
+
|
|
1157
|
+
def conversational_score(text: str) -> dict[str, Any]:
|
|
1158
|
+
"""Score text for conversational tone vs. lecture/speech tone.
|
|
1159
|
+
Based on 'Write conversations not speeches' principle."""
|
|
1160
|
+
low = text.lower()
|
|
1161
|
+
sentence_list = sentences(text)
|
|
1162
|
+
total_sents = max(1, len(sentence_list))
|
|
1163
|
+
|
|
1164
|
+
# Direct address (you/your)
|
|
1165
|
+
direct_address = len(re.findall(r"\b(?:you|your|you're|you've|you'll)\b", low))
|
|
1166
|
+
|
|
1167
|
+
# Questions (conversational marker)
|
|
1168
|
+
questions = sum(1 for s in sentence_list if s.strip().endswith("?"))
|
|
1169
|
+
|
|
1170
|
+
# Contractions (casual tone)
|
|
1171
|
+
contractions = len(re.findall(r"\b(?:n't|'re|'ve|'ll|'d|'m|'s)\b", low))
|
|
1172
|
+
|
|
1173
|
+
# First person (personal)
|
|
1174
|
+
first_person = len(re.findall(r"\b(?:i|me|my|we|us|our)\b", low))
|
|
1175
|
+
|
|
1176
|
+
# Conversational phrases
|
|
1177
|
+
conv_hits = len(CONVERSATIONAL_SIGNALS.findall(low))
|
|
1178
|
+
|
|
1179
|
+
# Passive voice (anti-conversational)
|
|
1180
|
+
passive = len(re.findall(r"\b(?:is|are|was|were|been|being|be)\s+\w+ed\b", low))
|
|
1181
|
+
|
|
1182
|
+
# Compute score
|
|
1183
|
+
total_words = max(1, len(words(text)))
|
|
1184
|
+
conv_ratio = (direct_address + questions * 3 + contractions + first_person + conv_hits * 2) / total_words
|
|
1185
|
+
passive_ratio = passive / total_sents
|
|
1186
|
+
score = min(1.0, conv_ratio * 10 - passive_ratio * 0.5)
|
|
1187
|
+
|
|
1188
|
+
return {
|
|
1189
|
+
"score": round(max(0, score), 3),
|
|
1190
|
+
"direct_address": direct_address,
|
|
1191
|
+
"questions": questions,
|
|
1192
|
+
"contractions": contractions,
|
|
1193
|
+
"first_person": first_person,
|
|
1194
|
+
"conversational_phrases": conv_hits,
|
|
1195
|
+
"passive_voice": passive,
|
|
1196
|
+
}
|
|
1197
|
+
|
|
1198
|
+
|
|
1199
|
+
def specificity_score(text: str) -> dict[str, Any]:
|
|
1200
|
+
"""Score text for specificity: proper nouns, numbers, dates, concrete details.
|
|
1201
|
+
AI text is vague. Human text is specific."""
|
|
1202
|
+
word_list = words(text)
|
|
1203
|
+
total = max(1, len(word_list))
|
|
1204
|
+
|
|
1205
|
+
# Numbers
|
|
1206
|
+
numbers = len(re.findall(r"\b\d+(?:\.\d+)?(?:%|k|K|M|B)?\b", text))
|
|
1207
|
+
|
|
1208
|
+
# Proper nouns (capitalized words not at sentence start)
|
|
1209
|
+
sentences_list = sentences(text)
|
|
1210
|
+
proper_nouns = 0
|
|
1211
|
+
for sent in sentences_list:
|
|
1212
|
+
sw = words(sent)
|
|
1213
|
+
for i, w in enumerate(sw):
|
|
1214
|
+
if i > 0 and w[0].isupper() and w not in {"I", "The", "A", "An"}:
|
|
1215
|
+
proper_nouns += 1
|
|
1216
|
+
|
|
1217
|
+
# Dates
|
|
1218
|
+
dates = len(re.findall(
|
|
1219
|
+
r"\b(?:\d{1,2}[/-]\d{1,2}[/-]\d{2,4}|"
|
|
1220
|
+
r"(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\w*\s+\d{1,2}(?:,?\s+\d{4})?|"
|
|
1221
|
+
r"\d{4})\b", text
|
|
1222
|
+
))
|
|
1223
|
+
|
|
1224
|
+
# Quotes (specific attribution)
|
|
1225
|
+
quotes = len(re.findall(r'[""\u201c\u201d]', text)) // 2
|
|
1226
|
+
|
|
1227
|
+
# Specificity ratio
|
|
1228
|
+
specific_items = numbers + proper_nouns + dates + quotes
|
|
1229
|
+
ratio = specific_items / total
|
|
1230
|
+
|
|
1231
|
+
return {
|
|
1232
|
+
"score": round(min(1.0, ratio * 15), 3),
|
|
1233
|
+
"numbers": numbers,
|
|
1234
|
+
"proper_nouns": proper_nouns,
|
|
1235
|
+
"dates": dates,
|
|
1236
|
+
"quotes": quotes,
|
|
1237
|
+
"ratio": round(ratio, 4),
|
|
1238
|
+
}
|
|
1239
|
+
|
|
1240
|
+
|
|
1241
|
+
def profile_strength(profile: dict[str, Any]) -> dict[str, Any]:
|
|
1242
|
+
"""Compute profile strength score (0-100) based on source count, word count, diversity."""
|
|
1243
|
+
source_count = profile.get("source_count", 0)
|
|
1244
|
+
word_count = profile.get("word_count", 0)
|
|
1245
|
+
sources = profile.get("sources", [])
|
|
1246
|
+
signature = profile.get("signature", {})
|
|
1247
|
+
|
|
1248
|
+
# Source count score (0-30)
|
|
1249
|
+
source_score = min(30, source_count * 3)
|
|
1250
|
+
|
|
1251
|
+
# Word count score (0-30)
|
|
1252
|
+
word_score = min(30, word_count / 100)
|
|
1253
|
+
|
|
1254
|
+
# Diversity score (0-20): opening moves + anchors + distinctive words
|
|
1255
|
+
opening_moves = len(signature.get("opening_moves", []))
|
|
1256
|
+
anchors = len(signature.get("anchors", []))
|
|
1257
|
+
diversity_score = min(20, (opening_moves + anchors) * 2)
|
|
1258
|
+
|
|
1259
|
+
# Cadence score (0-10): has rhythm data
|
|
1260
|
+
cadence = signature.get("cadence", [])
|
|
1261
|
+
cadence_score = min(10, len(cadence) * 2.5)
|
|
1262
|
+
|
|
1263
|
+
# Recency score (0-10): based on source file modification times
|
|
1264
|
+
recency_score = 5 # default if we can't determine
|
|
1265
|
+
|
|
1266
|
+
total = source_score + word_score + diversity_score + cadence_score + recency_score
|
|
1267
|
+
|
|
1268
|
+
# Label
|
|
1269
|
+
if total >= 80:
|
|
1270
|
+
label = "strong"
|
|
1271
|
+
elif total >= 50:
|
|
1272
|
+
label = "moderate"
|
|
1273
|
+
elif total >= 25:
|
|
1274
|
+
label = "weak"
|
|
1275
|
+
else:
|
|
1276
|
+
label = "insufficient"
|
|
1277
|
+
|
|
1278
|
+
return {
|
|
1279
|
+
"score": round(min(100, total)),
|
|
1280
|
+
"label": label,
|
|
1281
|
+
"breakdown": {
|
|
1282
|
+
"sources": source_score,
|
|
1283
|
+
"words": word_score,
|
|
1284
|
+
"diversity": diversity_score,
|
|
1285
|
+
"cadence": cadence_score,
|
|
1286
|
+
"recency": recency_score,
|
|
1287
|
+
},
|
|
1288
|
+
}
|
|
1289
|
+
|
|
1290
|
+
|
|
719
1291
|
def first_words(text: str, count: int = 7) -> str:
|
|
720
1292
|
found = words(text.lower())
|
|
721
1293
|
return " ".join(found[:count])
|
|
@@ -813,7 +1385,7 @@ def build_profile(paths: list[str], name: str) -> dict[str, Any]:
|
|
|
813
1385
|
voice_rules.append("study these sample opening moves before drafting: " + "; ".join(opening_moves[:4]))
|
|
814
1386
|
|
|
815
1387
|
return {
|
|
816
|
-
"profile_version": "hold-your-voice-portable-
|
|
1388
|
+
"profile_version": "hold-your-voice-portable-v2",
|
|
817
1389
|
"name": name,
|
|
818
1390
|
"source_count": len(samples),
|
|
819
1391
|
"sources": [{"path": sample["path"], "chars": len(sample["text"])} for sample in samples],
|
|
@@ -828,6 +1400,11 @@ def build_profile(paths: list[str], name: str) -> dict[str, Any]:
|
|
|
828
1400
|
"anchors": anchors,
|
|
829
1401
|
"never_list": never_list,
|
|
830
1402
|
},
|
|
1403
|
+
"voice_fingerprint": vocabulary_fingerprint(combined),
|
|
1404
|
+
"rhythm": rhythm_markov(combined),
|
|
1405
|
+
"emotional_tone": emotional_tone(combined),
|
|
1406
|
+
"voice_diversity": vocabulary_diversity(combined),
|
|
1407
|
+
"voice_strength": None, # computed separately via profile_strength()
|
|
831
1408
|
"voice_rules": voice_rules,
|
|
832
1409
|
"ai_eliminator": {
|
|
833
1410
|
"rewrite_scope": "flagged-lines-only",
|
|
@@ -1007,6 +1584,19 @@ def scan_text(text: str) -> list[dict[str, Any]]:
|
|
|
1007
1584
|
line_no = text[: match.start()].count("\n") + 1
|
|
1008
1585
|
hits.append({"line": line_no, "rule": rule_id, "phrase": snippet[:160]})
|
|
1009
1586
|
|
|
1587
|
+
# Expanded AI vocabulary detection (2025-2026 model fingerprints)
|
|
1588
|
+
for line_no, line in enumerate((text or "").splitlines(), 1):
|
|
1589
|
+
low = line.lower()
|
|
1590
|
+
for term in AI_VOCAB_EXPANDED:
|
|
1591
|
+
if " " in term:
|
|
1592
|
+
# Multi-word phrase
|
|
1593
|
+
if term in low:
|
|
1594
|
+
hits.append({"line": line_no, "rule": "ai_vocab_expanded", "phrase": term})
|
|
1595
|
+
else:
|
|
1596
|
+
# Single word — match with word boundaries
|
|
1597
|
+
if re.search(rf"\b{re.escape(term)}\b", low):
|
|
1598
|
+
hits.append({"line": line_no, "rule": "ai_vocab_expanded", "phrase": term})
|
|
1599
|
+
|
|
1010
1600
|
for line_no, line in enumerate((text or "").splitlines(), 1):
|
|
1011
1601
|
for hit in line_style_hits(line):
|
|
1012
1602
|
hits.append({"line": line_no, "rule": hit["rule"], "phrase": hit["phrase"], "text": line.strip()[:240]})
|
|
@@ -1015,6 +1605,17 @@ def scan_text(text: str) -> list[dict[str, Any]]:
|
|
|
1015
1605
|
for structural_hit in _structural_analysis(text):
|
|
1016
1606
|
hits.append(structural_hit)
|
|
1017
1607
|
|
|
1608
|
+
# Voice craft signals (from Magnetic Email principles)
|
|
1609
|
+
# Lack of storytelling in long text
|
|
1610
|
+
story_hits = STORYTELLING_SIGNALS.findall(text or "")
|
|
1611
|
+
conv_hits = CONVERSATIONAL_SIGNALS.findall(text or "")
|
|
1612
|
+
word_count = len(words(text or ""))
|
|
1613
|
+
if word_count > 200:
|
|
1614
|
+
if len(story_hits) == 0:
|
|
1615
|
+
hits.append({"line": 0, "rule": "voice_no_storytelling", "phrase": f"no storytelling signals in {word_count} words — text reads like a lecture, not a conversation"})
|
|
1616
|
+
if len(conv_hits) == 0 and word_count > 300:
|
|
1617
|
+
hits.append({"line": 0, "rule": "voice_no_conversation", "phrase": f"no conversational signals in {word_count} words — text speaks at reader, not with them"})
|
|
1618
|
+
|
|
1018
1619
|
# Staccato triplet detection — only fire when sentences are clearly performative
|
|
1019
1620
|
sentence_hits = []
|
|
1020
1621
|
for line_no, line in enumerate((text or "").splitlines(), 1):
|
|
@@ -1066,44 +1667,303 @@ def load_draft(path: str) -> tuple[str, str]:
|
|
|
1066
1667
|
return str(draft_path), read_text(draft_path)
|
|
1067
1668
|
|
|
1068
1669
|
|
|
1670
|
+
# --- Pattern fix guidance: tells the LLM HOW to fix each pattern type ---
|
|
1671
|
+
PATTERN_FIX_GUIDANCE = {
|
|
1672
|
+
"landscape_era": "Replace temporal grandstanding with a concrete observation or remove entirely.",
|
|
1673
|
+
"formulaic_connector": "Replace formal transitions (Moreover, Furthermore, Additionally) with natural flow or short sentences.",
|
|
1674
|
+
"lets_invitation": "Remove the invitation to dive/explore. Just start with the point.",
|
|
1675
|
+
"inflated_verbs": "Replace marketing verbs (unlock, leverage, supercharge) with plain verbs (use, build, get).",
|
|
1676
|
+
"truth_harsh_reality": "Remove the 'reality/truth is' framing. State the point directly.",
|
|
1677
|
+
"ai_vocab_density": "Replace AI-buzzwords with specific, concrete language from the writer's vocabulary.",
|
|
1678
|
+
"ai_vocab_expanded": "Replace with plain language. If the phrase is 'it's important to note', just state the point.",
|
|
1679
|
+
"abstract_noun_cluster": "Replace abstract nouns with concrete examples, scenes, or specific actions.",
|
|
1680
|
+
"ux_buzzwords": "Replace buzzwords (robust, seamless, holistic) with specific descriptions of what the thing actually does.",
|
|
1681
|
+
"binary_reframing": "Remove the 'it's not X, it's Y' structure. State the positive claim directly.",
|
|
1682
|
+
"not_just_but": "Remove the 'not just X but Y' structure. Pick the stronger point and lead with it.",
|
|
1683
|
+
"more_than_just": "Remove 'more than just'. State what it actually is.",
|
|
1684
|
+
"founder_cadence": "Remove the performative cadence (here's the thing, the moment X becomes Y). Write plainly.",
|
|
1685
|
+
"staccato_drama": "Break the staccato pattern. Vary sentence length. Add a longer sentence.",
|
|
1686
|
+
"restatement_polish": "Remove 'in other words' / 'which is another way of saying'. Say it once, clearly.",
|
|
1687
|
+
"spoiler_reveal": "Remove 'spoiler alert' and 'here's the truth' framing.",
|
|
1688
|
+
"hedging_noncommittal": "Remove hedging (it depends, no one-size-fits-all). Take a position or cut the sentence.",
|
|
1689
|
+
"balanced_contrast": "Remove 'on the other hand' / 'on the flip side'. Pick a side or use 'but' briefly.",
|
|
1690
|
+
"empathy_opener": "Remove empathy validation (you're not alone, it's easy to feel). Start with the substance.",
|
|
1691
|
+
"journey_cliche": "Remove journey/destination metaphors. State the actual point.",
|
|
1692
|
+
"ai_metaphors": "Replace metaphor clusters (beacon, tapestry, north star) with concrete language.",
|
|
1693
|
+
"guide_framing": "Remove guide framing (step-by-step, key takeaways, actionable tips). Just write the thing.",
|
|
1694
|
+
"wrapping_patterns": "Remove conclusion patterns (at the end of the day, the bottom line). End on a specific detail or thought.",
|
|
1695
|
+
"buyer_psychology": "Remove 'people don't buy X, they buy Y' templates. State the point directly.",
|
|
1696
|
+
"overwhelm_reassurance": "Remove 'it can feel overwhelming but it doesn't have to be'. Just help.",
|
|
1697
|
+
"pros_cons_framing": "Remove pros/cons structure. Make an argument, don't list.",
|
|
1698
|
+
"triple_adjective": "Remove triple-adjective stacks. Pick the one that matters.",
|
|
1699
|
+
"hidden_depth": "Remove 'behind the scenes' / 'beneath the surface'. State the insight directly.",
|
|
1700
|
+
"self_referential": "Remove AI disclaimers (as an AI model, I can't provide).",
|
|
1701
|
+
"placeholder_brackets": "Replace [your brand] placeholders with specific examples or remove.",
|
|
1702
|
+
"story_templates": "Remove 'imagine this / picture this' templates. Use a real scene or observation.",
|
|
1703
|
+
"clickbait_didnt_know": "Remove 'the X you didn't know you needed' framing.",
|
|
1704
|
+
"self_referential_restatement": "Remove 'you asked about X, let's break it down'. Just answer.",
|
|
1705
|
+
"ted_talk_slogan": "Remove the TED-talk contrastive slogan. State the point plainly.",
|
|
1706
|
+
"perfect_marketing_sentence": "This sentence is too polished and generic. Make it specific or cut it.",
|
|
1707
|
+
"abstract_noun_cluster": "Too many abstract nouns. Replace with concrete examples or actions.",
|
|
1708
|
+
"generic_opening_generalization": "Opens with a sweeping generalization. Start with a specific observation or scene.",
|
|
1709
|
+
"voice_question_opener": "Opens with a question. Start with a statement, scene, or observation instead.",
|
|
1710
|
+
"voice_lesson_opener": "Opens with a lesson/inspiration claim. Start with a specific moment or example.",
|
|
1711
|
+
"cta_ending": "Remove the engagement-bait CTA (let me know if you need help). End on substance.",
|
|
1712
|
+
"voice_no_storytelling": "No storytelling signals found. Add a personal scene, specific moment, or concrete example.",
|
|
1713
|
+
"voice_no_conversation": "Text reads like a lecture. Address the reader directly (you/your), add a question, or use contractions.",
|
|
1714
|
+
"low_burstiness": "Sentence lengths are too uniform. Add a very short sentence (under 6 words) or break a long one.",
|
|
1715
|
+
"mechanical_paragraphs": "Paragraphs are all the same length. Combine some, split others, or add a one-liner.",
|
|
1716
|
+
"uniform_paragraph_rhythm": "Sentences within paragraphs are all 12-22 words. Vary: some 5 words, some 25.",
|
|
1717
|
+
"low_contractions": "Too few contractions. Use don't, can't, it's, you're to sound natural.",
|
|
1718
|
+
"formal_hedging_density": "Too many formal hedges (it is important to note). State things directly.",
|
|
1719
|
+
"generic_intensifiers": "Too many intensifiers (remarkably, incredibly). Cut them or use specifics.",
|
|
1720
|
+
"no_fragments": "No sentence fragments at all — reads over-polished. Add a fragment for texture.",
|
|
1721
|
+
"over_structured_lists": "Lists follow a rigid 3-item pattern. Vary list length or break the pattern.",
|
|
1722
|
+
}
|
|
1723
|
+
|
|
1724
|
+
|
|
1725
|
+
def _dedupe_hits(hits: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
1726
|
+
"""Merge multiple rules per line into one entry with combined rules."""
|
|
1727
|
+
by_line: dict[int, dict[str, Any]] = {}
|
|
1728
|
+
for hit in hits:
|
|
1729
|
+
line = hit.get("line", 0)
|
|
1730
|
+
if line not in by_line:
|
|
1731
|
+
by_line[line] = {"line": line, "rules": [], "phrases": [], "text": hit.get("text", "")}
|
|
1732
|
+
by_line[line]["rules"].append(hit.get("rule", "unknown"))
|
|
1733
|
+
phrase = hit.get("phrase", "")
|
|
1734
|
+
if phrase and phrase not in by_line[line]["phrases"]:
|
|
1735
|
+
by_line[line]["phrases"].append(phrase)
|
|
1736
|
+
return sorted(by_line.values(), key=lambda x: x["line"])
|
|
1737
|
+
|
|
1738
|
+
|
|
1739
|
+
def _compress_profile_for_prompt(profile: dict[str, Any] | None) -> str:
|
|
1740
|
+
"""Extract only the actionable voice data from a profile for the LLM prompt.
|
|
1741
|
+
Strips out structural metadata, sources, and raw analysis data."""
|
|
1742
|
+
if not profile:
|
|
1743
|
+
return ""
|
|
1744
|
+
|
|
1745
|
+
sig = profile.get("signature", {})
|
|
1746
|
+
tone = profile.get("emotional_tone", {})
|
|
1747
|
+
fp = profile.get("voice_fingerprint", {})
|
|
1748
|
+
|
|
1749
|
+
lines = []
|
|
1750
|
+
|
|
1751
|
+
# Voice anchors — the single most important thing
|
|
1752
|
+
anchors = sig.get("anchors", [])
|
|
1753
|
+
if anchors:
|
|
1754
|
+
lines.append("SOUND LIKE THIS:")
|
|
1755
|
+
lines.append(f' "{anchors[0][:200]}"')
|
|
1756
|
+
if len(anchors) > 1:
|
|
1757
|
+
lines.append(f' "{anchors[1][:200]}"')
|
|
1758
|
+
lines.append("")
|
|
1759
|
+
|
|
1760
|
+
# Rhythm + tone in one line
|
|
1761
|
+
cadence = sig.get("cadence", [])
|
|
1762
|
+
rhythm_line = cadence[0] if cadence else ""
|
|
1763
|
+
tone_parts = []
|
|
1764
|
+
if tone:
|
|
1765
|
+
if tone.get("formality", 5) < 4:
|
|
1766
|
+
tone_parts.append("casual")
|
|
1767
|
+
elif tone.get("formality", 5) > 6:
|
|
1768
|
+
tone_parts.append("formal")
|
|
1769
|
+
if tone.get("energy", 5) > 6:
|
|
1770
|
+
tone_parts.append("high-energy")
|
|
1771
|
+
if tone.get("cynicism", 5) > 5:
|
|
1772
|
+
tone_parts.append("cynical")
|
|
1773
|
+
if tone.get("warmth", 5) > 5:
|
|
1774
|
+
tone_parts.append("warm")
|
|
1775
|
+
tone_str = ", ".join(tone_parts) if tone_parts else "neutral"
|
|
1776
|
+
if rhythm_line:
|
|
1777
|
+
lines.append(f"RHYTHM: {rhythm_line}. Tone: {tone_str}.")
|
|
1778
|
+
else:
|
|
1779
|
+
lines.append(f"TONE: {tone_str}.")
|
|
1780
|
+
lines.append("")
|
|
1781
|
+
|
|
1782
|
+
# Never list — compact
|
|
1783
|
+
never = sig.get("never_list", [])
|
|
1784
|
+
if never:
|
|
1785
|
+
lines.append("BANNED: " + " | ".join(never[:6]))
|
|
1786
|
+
lines.append("")
|
|
1787
|
+
|
|
1788
|
+
return "\n".join(lines)
|
|
1789
|
+
|
|
1790
|
+
|
|
1791
|
+
def _flagged_line_to_instruction(entry: dict[str, Any]) -> str:
|
|
1792
|
+
"""Convert a deduped hit entry into a compact instruction the LLM will actually follow."""
|
|
1793
|
+
line = entry["line"]
|
|
1794
|
+
rules = entry["rules"]
|
|
1795
|
+
phrases = entry["phrases"]
|
|
1796
|
+
phrase_str = phrases[0] if phrases else ""
|
|
1797
|
+
|
|
1798
|
+
# Pick the single most specific fix guidance
|
|
1799
|
+
guidance = ""
|
|
1800
|
+
for rule in rules:
|
|
1801
|
+
if rule in PATTERN_FIX_GUIDANCE:
|
|
1802
|
+
guidance = PATTERN_FIX_GUIDANCE[rule]
|
|
1803
|
+
break
|
|
1804
|
+
|
|
1805
|
+
# Compress: line number + what's wrong + what to do
|
|
1806
|
+
if line == 0:
|
|
1807
|
+
return f"- STRUCTURAL: {guidance}"
|
|
1808
|
+
if guidance:
|
|
1809
|
+
return f"- L{line} \"{phrase_str[:60]}\": {guidance}"
|
|
1810
|
+
return f"- L{line} \"{phrase_str[:60]}\""
|
|
1811
|
+
|
|
1812
|
+
|
|
1813
|
+
def apply_replacements(draft: str, replacements_json: str) -> str:
|
|
1814
|
+
"""Apply LLM-returned replacements to a draft. Returns the patched text."""
|
|
1815
|
+
try:
|
|
1816
|
+
data = json.loads(replacements_json)
|
|
1817
|
+
replacements = data.get("replacements", [])
|
|
1818
|
+
except (json.JSONDecodeError, TypeError):
|
|
1819
|
+
return draft
|
|
1820
|
+
|
|
1821
|
+
lines = draft.splitlines()
|
|
1822
|
+
for rep in replacements:
|
|
1823
|
+
line_no = rep.get("line", 0)
|
|
1824
|
+
text = rep.get("text", "")
|
|
1825
|
+
if 1 <= line_no <= len(lines):
|
|
1826
|
+
lines[line_no - 1] = text
|
|
1827
|
+
return "\n".join(lines)
|
|
1828
|
+
|
|
1829
|
+
|
|
1830
|
+
def rewrite_with_verification(
|
|
1831
|
+
draft: str,
|
|
1832
|
+
profile_text: str | None = None,
|
|
1833
|
+
constraints: str = "",
|
|
1834
|
+
meta: dict[str, Any] | None = None,
|
|
1835
|
+
max_passes: int = 3,
|
|
1836
|
+
rewrite_fn=None,
|
|
1837
|
+
) -> dict[str, Any]:
|
|
1838
|
+
"""Scan → rewrite → rescan loop. Up to max_passes iterations.
|
|
1839
|
+
|
|
1840
|
+
Args:
|
|
1841
|
+
draft: the original draft text
|
|
1842
|
+
profile_text: voice profile JSON string (optional)
|
|
1843
|
+
constraints: extra rewrite constraints
|
|
1844
|
+
meta: signal meta for learned pattern filtering
|
|
1845
|
+
max_passes: maximum rewrite attempts (default 3)
|
|
1846
|
+
rewrite_fn: callable(draft, prompt) -> str that returns the LLM's JSON response.
|
|
1847
|
+
If None, returns the prompt only (for external LLM execution).
|
|
1848
|
+
|
|
1849
|
+
Returns dict with:
|
|
1850
|
+
- final_text: the rewritten draft after all passes
|
|
1851
|
+
- initial_hits: pattern count before any rewriting
|
|
1852
|
+
- final_hits: pattern count after last pass
|
|
1853
|
+
- passes_used: how many passes were executed
|
|
1854
|
+
- prompts: list of prompts generated (one per pass)
|
|
1855
|
+
- pass_details: per-pass hit counts
|
|
1856
|
+
"""
|
|
1857
|
+
initial_hits = scan_text(draft)
|
|
1858
|
+
if meta:
|
|
1859
|
+
initial_hits = filter_hits_by_weights(initial_hits, meta)
|
|
1860
|
+
|
|
1861
|
+
current_text = draft
|
|
1862
|
+
prompts = []
|
|
1863
|
+
pass_details = []
|
|
1864
|
+
|
|
1865
|
+
for pass_num in range(max_passes):
|
|
1866
|
+
prompt = build_rewrite_prompt("draft", current_text, profile_text, constraints, meta)
|
|
1867
|
+
prompts.append(prompt)
|
|
1868
|
+
|
|
1869
|
+
hits = scan_text(current_text)
|
|
1870
|
+
if meta:
|
|
1871
|
+
hits = filter_hits_by_weights(hits, meta)
|
|
1872
|
+
|
|
1873
|
+
pass_details.append({"pass": pass_num + 1, "hits": len(hits)})
|
|
1874
|
+
|
|
1875
|
+
if not hits:
|
|
1876
|
+
break # clean — no more patterns
|
|
1877
|
+
|
|
1878
|
+
if rewrite_fn is None:
|
|
1879
|
+
# No LLM available — return prompt for external execution
|
|
1880
|
+
break
|
|
1881
|
+
|
|
1882
|
+
# Call the LLM
|
|
1883
|
+
llm_response = rewrite_fn(current_text, prompt)
|
|
1884
|
+
patched = apply_replacements(current_text, llm_response)
|
|
1885
|
+
|
|
1886
|
+
if patched == current_text:
|
|
1887
|
+
break # LLM didn't change anything — stop
|
|
1888
|
+
|
|
1889
|
+
current_text = patched
|
|
1890
|
+
|
|
1891
|
+
final_hits = scan_text(current_text)
|
|
1892
|
+
if meta:
|
|
1893
|
+
final_hits = filter_hits_by_weights(final_hits, meta)
|
|
1894
|
+
|
|
1895
|
+
return {
|
|
1896
|
+
"final_text": current_text,
|
|
1897
|
+
"initial_hits": len(initial_hits),
|
|
1898
|
+
"final_hits": len(final_hits),
|
|
1899
|
+
"passes_used": len(pass_details),
|
|
1900
|
+
"prompts": prompts,
|
|
1901
|
+
"pass_details": pass_details,
|
|
1902
|
+
}
|
|
1903
|
+
|
|
1904
|
+
|
|
1069
1905
|
def build_rewrite_prompt(draft_name: str, draft: str, profile_text: str | None, constraints: str = "", meta: dict[str, Any] | None = None) -> str:
|
|
1070
1906
|
hits = scan_text(draft)
|
|
1071
1907
|
if meta:
|
|
1072
1908
|
hits = filter_hits_by_weights(hits, meta)
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1909
|
+
|
|
1910
|
+
deduped = _dedupe_hits(hits)
|
|
1911
|
+
|
|
1912
|
+
# Build compact issue lines with fix guidance embedded
|
|
1913
|
+
issue_lines = [_flagged_line_to_instruction(entry) for entry in deduped]
|
|
1914
|
+
issue_block = "\n".join(issue_lines) or "- none found"
|
|
1077
1915
|
|
|
1078
1916
|
numbered_draft = "\n".join(f"{idx}: {line}" for idx, line in enumerate(draft.splitlines(), 1))
|
|
1079
|
-
profile_block = profile_text.strip() if profile_text and profile_text.strip() else "(no voice profile supplied)"
|
|
1080
|
-
constraints_block = constraints.strip() if constraints.strip() else "(none)"
|
|
1081
1917
|
|
|
1082
|
-
|
|
1918
|
+
# Compress profile
|
|
1919
|
+
profile_block = ""
|
|
1920
|
+
if profile_text and profile_text.strip():
|
|
1921
|
+
try:
|
|
1922
|
+
profile = json.loads(profile_text)
|
|
1923
|
+
profile_block = _compress_profile_for_prompt(profile)
|
|
1924
|
+
except (json.JSONDecodeError, TypeError):
|
|
1925
|
+
profile_block = ""
|
|
1083
1926
|
|
|
1084
|
-
|
|
1085
|
-
{{"replacements":[{{"line":1,"text":"replacement line"}}]}}
|
|
1927
|
+
constraints_line = f"\nCONSTRAINTS: {constraints.strip()}" if constraints and constraints.strip() else ""
|
|
1086
1928
|
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
- Preserve unflagged lines exactly by not returning them.
|
|
1090
|
-
- Preserve the original argument and local meaning.
|
|
1091
|
-
- Use the voice profile as the benchmark when present.
|
|
1092
|
-
- Remove AI cadence, polished founder cadence, abstract strategy-deck language, and generic lesson shapes.
|
|
1093
|
-
- Do not add new sections, hooks, CTAs, markdown, bullets, or commentary.
|
|
1929
|
+
# Compact prompt — everything the LLM needs, nothing it doesn't
|
|
1930
|
+
prompt = f"""Fix only the flagged lines. Return JSON: {{"replacements":[{{"line":1,"text":"fixed line"}}]}}
|
|
1094
1931
|
|
|
1095
|
-
|
|
1096
|
-
|
|
1932
|
+
RULES:
|
|
1933
|
+
- Only return flagged line numbers. Leave everything else untouched.
|
|
1934
|
+
- Keep the original argument. Remove AI patterns — write like a real person.
|
|
1935
|
+
- No hooks, CTAs, summaries, or new sections.{constraints_line}
|
|
1097
1936
|
|
|
1098
|
-
|
|
1099
|
-
{
|
|
1937
|
+
{profile_block}FIX THESE:
|
|
1938
|
+
{issue_block}
|
|
1100
1939
|
|
|
1101
|
-
|
|
1102
|
-
{
|
|
1940
|
+
DRAFT ({draft_name}):
|
|
1941
|
+
{numbered_draft}"""
|
|
1103
1942
|
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1943
|
+
return prompt
|
|
1944
|
+
|
|
1945
|
+
|
|
1946
|
+
def build_voice_draft_prompt(draft: str, profile: dict[str, Any] | None, angle: str = "", constraints: str = "") -> str:
|
|
1947
|
+
"""Generate a prompt for rewriting an entire draft in the writer's voice."""
|
|
1948
|
+
profile_block = _compress_profile_for_prompt(profile) if profile else ""
|
|
1949
|
+
|
|
1950
|
+
angle_line = f"\nANGLE: {angle}" if angle else ""
|
|
1951
|
+
constraints_line = f"\nCONSTRAINTS: {constraints}" if constraints else ""
|
|
1952
|
+
|
|
1953
|
+
prompt = f"""Rewrite this draft in the voice below. Return the full text only — no commentary.
|
|
1954
|
+
|
|
1955
|
+
RULES:
|
|
1956
|
+
- Keep the argument and key points. Match the voice anchors and rhythm.
|
|
1957
|
+
- Open with a specific observation or scene, not a generalization.
|
|
1958
|
+
- Use contractions. Vary sentence length. Write to one person ("you").
|
|
1959
|
+
- No AI patterns (let's dive in, robust, holistic, moreover, furthermore).
|
|
1960
|
+
- No hooks, CTAs, summaries, or motivational closings.
|
|
1961
|
+
- End on a specific detail or quiet thought.{angle_line}{constraints_line}
|
|
1962
|
+
|
|
1963
|
+
{profile_block}DRAFT:
|
|
1964
|
+
{draft}"""
|
|
1965
|
+
|
|
1966
|
+
return prompt
|
|
1107
1967
|
|
|
1108
1968
|
|
|
1109
1969
|
DEFAULT_NEVER_LIST = [
|
|
@@ -1935,6 +2795,104 @@ def cmd_rewrite_prompt(args: argparse.Namespace) -> int:
|
|
|
1935
2795
|
return 0
|
|
1936
2796
|
|
|
1937
2797
|
|
|
2798
|
+
def cmd_voice_score(args: argparse.Namespace) -> int:
|
|
2799
|
+
"""Score text for voice quality: storytelling, conversation, specificity, tone."""
|
|
2800
|
+
name, text = load_draft(args.draft)
|
|
2801
|
+
story = storytelling_score(text)
|
|
2802
|
+
conv = conversational_score(text)
|
|
2803
|
+
spec = specificity_score(text)
|
|
2804
|
+
tone = emotional_tone(text)
|
|
2805
|
+
diversity = vocabulary_diversity(text)
|
|
2806
|
+
perplexity = perplexity_proxy(text)
|
|
2807
|
+
ngrams = ngram_repetition(text)
|
|
2808
|
+
|
|
2809
|
+
result = {
|
|
2810
|
+
"file": name,
|
|
2811
|
+
"word_count": len(words(text)),
|
|
2812
|
+
"storytelling": story,
|
|
2813
|
+
"conversation": conv,
|
|
2814
|
+
"specificity": spec,
|
|
2815
|
+
"emotional_tone": tone,
|
|
2816
|
+
"vocabulary_diversity": diversity,
|
|
2817
|
+
"perplexity_proxy": perplexity,
|
|
2818
|
+
"ngram_repetition": ngrams,
|
|
2819
|
+
"voice_quality": round(
|
|
2820
|
+
(story["score"] * 0.25 + conv["score"] * 0.25 + spec["score"] * 0.2 +
|
|
2821
|
+
(1 - perplexity["score"]) * 0.15 + diversity["ttr"] * 0.15), 3
|
|
2822
|
+
),
|
|
2823
|
+
}
|
|
2824
|
+
|
|
2825
|
+
if args.format == "json":
|
|
2826
|
+
print(json.dumps(result, indent=2, ensure_ascii=False))
|
|
2827
|
+
else:
|
|
2828
|
+
print(f"Voice Score for: {name}")
|
|
2829
|
+
print(f" Words: {result['word_count']}")
|
|
2830
|
+
print(f" Overall voice quality: {result['voice_quality']:.2f}")
|
|
2831
|
+
print(f" Storytelling: {story['score']:.2f} (time={story['time_references']}, location={story['location_references']}, senses={story['sensory_words']}, dialogue={story['dialogue_markers']})")
|
|
2832
|
+
print(f" Conversation: {conv['score']:.2f} (you/your={conv['direct_address']}, questions={conv['questions']}, contractions={conv['contractions']})")
|
|
2833
|
+
print(f" Specificity: {spec['score']:.2f} (numbers={spec['numbers']}, proper_nouns={spec['proper_nouns']}, quotes={spec['quotes']})")
|
|
2834
|
+
print(f" Tone: formality={tone['formality']}, energy={tone['energy']}, cynicism={tone['cynicism']}, warmth={tone['warmth']}")
|
|
2835
|
+
print(f" Diversity: TTR={diversity['ttr']}, Yule's K={diversity['yules_k']}, hapax={diversity['hapax_ratio']}")
|
|
2836
|
+
print(f" Perplexity: {perplexity['score']:.3f} (higher = more predictable = more AI-like)")
|
|
2837
|
+
print(f" N-gram echo: {ngrams['echo_score']:.3f}")
|
|
2838
|
+
return 0
|
|
2839
|
+
|
|
2840
|
+
|
|
2841
|
+
def cmd_verify(args: argparse.Namespace) -> int:
|
|
2842
|
+
"""Scan a draft, report before/after pattern counts."""
|
|
2843
|
+
name, text = load_draft(args.draft)
|
|
2844
|
+
hits = scan_text(text)
|
|
2845
|
+
|
|
2846
|
+
meta: dict[str, Any] = {}
|
|
2847
|
+
if args.meta:
|
|
2848
|
+
meta_path = Path(args.meta).expanduser()
|
|
2849
|
+
if meta_path.exists():
|
|
2850
|
+
try:
|
|
2851
|
+
meta = json.loads(meta_path.read_text(encoding="utf-8", errors="ignore"))
|
|
2852
|
+
except (json.JSONDecodeError, OSError):
|
|
2853
|
+
pass
|
|
2854
|
+
if meta:
|
|
2855
|
+
hits = filter_hits_by_weights(hits, meta)
|
|
2856
|
+
|
|
2857
|
+
# Group hits by rule
|
|
2858
|
+
rule_counts: dict[str, int] = {}
|
|
2859
|
+
for hit in hits:
|
|
2860
|
+
rule = hit.get("rule", "unknown")
|
|
2861
|
+
rule_counts[rule] = rule_counts.get(rule, 0) + 1
|
|
2862
|
+
|
|
2863
|
+
if args.format == "json":
|
|
2864
|
+
print(json.dumps({
|
|
2865
|
+
"file": name,
|
|
2866
|
+
"total_hits": len(hits),
|
|
2867
|
+
"by_rule": dict(sorted(rule_counts.items(), key=lambda x: -x[1])),
|
|
2868
|
+
"hits": hits,
|
|
2869
|
+
}, indent=2, ensure_ascii=False))
|
|
2870
|
+
else:
|
|
2871
|
+
print(f"Verification: {name}")
|
|
2872
|
+
print(f" Total patterns: {len(hits)}")
|
|
2873
|
+
if rule_counts:
|
|
2874
|
+
print(f" By rule:")
|
|
2875
|
+
for rule, count in sorted(rule_counts.items(), key=lambda x: -x[1]):
|
|
2876
|
+
print(f" {rule}: {count}")
|
|
2877
|
+
else:
|
|
2878
|
+
print(f" No AI patterns detected.")
|
|
2879
|
+
return 2 if args.fail_on_hit and hits else 0
|
|
2880
|
+
|
|
2881
|
+
|
|
2882
|
+
def cmd_voice_draft_prompt(args: argparse.Namespace) -> int:
|
|
2883
|
+
"""Generate a full-draft voice rewrite prompt."""
|
|
2884
|
+
name, draft = load_draft(args.draft)
|
|
2885
|
+
profile = None
|
|
2886
|
+
if args.profile:
|
|
2887
|
+
profile_path = Path(args.profile).expanduser()
|
|
2888
|
+
if not profile_path.exists():
|
|
2889
|
+
raise SystemExit(f"profile not found: {profile_path}")
|
|
2890
|
+
profile = json.loads(profile_path.read_text(encoding="utf-8", errors="ignore"))
|
|
2891
|
+
prompt = build_voice_draft_prompt(draft, profile, args.angle or "", args.constraints or "")
|
|
2892
|
+
write_or_print(prompt, args.out)
|
|
2893
|
+
return 0
|
|
2894
|
+
|
|
2895
|
+
|
|
1938
2896
|
def build_parser() -> argparse.ArgumentParser:
|
|
1939
2897
|
parser = argparse.ArgumentParser(description="Portable Hold Your Voice helpers")
|
|
1940
2898
|
sub = parser.add_subparsers(dest="command", required=True)
|
|
@@ -2000,6 +2958,27 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
2000
2958
|
pev.add_argument("--new-samples", nargs="*", default=None, help="additional new writing samples to merge (optional)")
|
|
2001
2959
|
pev.set_defaults(func=cmd_profile_evolve)
|
|
2002
2960
|
|
|
2961
|
+
# NEW: voice-first commands
|
|
2962
|
+
vs = sub.add_parser("voice-score", help="score text for voice quality: storytelling, conversation, specificity, tone")
|
|
2963
|
+
vs.add_argument("draft", help="draft file, or '-' for stdin")
|
|
2964
|
+
vs.add_argument("--format", choices=["json", "text"], default="text")
|
|
2965
|
+
vs.set_defaults(func=cmd_voice_score)
|
|
2966
|
+
|
|
2967
|
+
vf = sub.add_parser("verify", help="scan and report pattern breakdown by rule")
|
|
2968
|
+
vf.add_argument("draft", help="draft file, or '-' for stdin")
|
|
2969
|
+
vf.add_argument("--format", choices=["json", "text"], default="text")
|
|
2970
|
+
vf.add_argument("--fail-on-hit", action="store_true", help="exit 2 when issues are found")
|
|
2971
|
+
vf.add_argument("--meta", help="meta JSON file for learned pattern filtering")
|
|
2972
|
+
vf.set_defaults(func=cmd_verify)
|
|
2973
|
+
|
|
2974
|
+
vdp = sub.add_parser("voice-draft-prompt", help="generate a full-draft voice rewrite prompt")
|
|
2975
|
+
vdp.add_argument("draft", help="draft file, or '-' for stdin")
|
|
2976
|
+
vdp.add_argument("--profile", help="voice profile JSON file")
|
|
2977
|
+
vdp.add_argument("--angle", default="", help="writing angle or intent")
|
|
2978
|
+
vdp.add_argument("--constraints", default="", help="extra constraints")
|
|
2979
|
+
vdp.add_argument("--out", help="write prompt to this path")
|
|
2980
|
+
vdp.set_defaults(func=cmd_voice_draft_prompt)
|
|
2981
|
+
|
|
2003
2982
|
return parser
|
|
2004
2983
|
|
|
2005
2984
|
|