humanizer-rb 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,198 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Humanizer
4
+ module Stats
5
+ Result = Struct.new(
6
+ :word_count, :unique_word_count, :sentence_count, :paragraph_count,
7
+ :avg_word_length, :avg_sentence_length, :sentence_length_std_dev,
8
+ :sentence_length_variation, :burstiness, :type_token_ratio,
9
+ :function_word_ratio, :trigram_repetition, :avg_paragraph_length,
10
+ :flesch_kincaid, :sentence_lengths,
11
+ keyword_init: true
12
+ )
13
+
14
+ EMPTY = Result.new(
15
+ word_count: 0, unique_word_count: 0, sentence_count: 0, paragraph_count: 0,
16
+ avg_word_length: 0, avg_sentence_length: 0, sentence_length_std_dev: 0,
17
+ sentence_length_variation: 0, burstiness: 0, type_token_ratio: 0,
18
+ function_word_ratio: 0, trigram_repetition: 0, avg_paragraph_length: 0,
19
+ flesch_kincaid: 0, sentence_lengths: []
20
+ ).freeze
21
+
22
+ module_function
23
+
24
+ def compute(text)
25
+ return EMPTY if text.nil? || !text.is_a?(String) || text.strip.empty?
26
+
27
+ words = TextUtils.tokenize(text)
28
+ return EMPTY if words.empty?
29
+
30
+ sentences = split_sentences(text)
31
+ paragraphs = text.split(/\n\s*\n/).select { |p| p.strip.length > 0 }
32
+
33
+ # Word-level stats
34
+ word_count = words.length
35
+ unique_words = words.uniq
36
+ type_token_ratio = unique_words.length.to_f / word_count
37
+ avg_word_length = words.sum { |w| w.length }.to_f / word_count
38
+
39
+ # Sentence-level stats
40
+ sentence_lengths = sentences.map { |s| TextUtils.tokenize(s).length }.select { |n| n > 0 }
41
+ sentence_count = sentence_lengths.length
42
+
43
+ avg_sentence_length = 0.0
44
+ sentence_length_std_dev = 0.0
45
+ sentence_length_variation = 0.0
46
+ burstiness = 0.0
47
+
48
+ if sentence_count > 1
49
+ avg_sentence_length = sentence_lengths.sum.to_f / sentence_count
50
+
51
+ variance = sentence_lengths.sum { |len| (len - avg_sentence_length) ** 2 }.to_f / sentence_count
52
+ sentence_length_std_dev = Math.sqrt(variance)
53
+
54
+ sentence_length_variation = avg_sentence_length > 0 ? sentence_length_std_dev / avg_sentence_length : 0.0
55
+
56
+ consecutive_diff_sum = (1...sentence_lengths.length).sum { |i|
57
+ (sentence_lengths[i] - sentence_lengths[i - 1]).abs
58
+ }
59
+ avg_consecutive_diff = consecutive_diff_sum.to_f / (sentence_lengths.length - 1)
60
+ burstiness = avg_sentence_length > 0 ? avg_consecutive_diff / avg_sentence_length : 0.0
61
+ elsif sentence_count == 1
62
+ avg_sentence_length = sentence_lengths[0].to_f
63
+ end
64
+
65
+ # Function word ratio
66
+ function_word_set = Vocabulary::FUNCTION_WORDS.map(&:downcase).to_set
67
+ function_word_count = words.count { |w| function_word_set.include?(w) }
68
+ function_word_ratio = function_word_count.to_f / word_count
69
+
70
+ # N-gram repetition
71
+ trigram_repetition = compute_ngram_repetition(words, 3)
72
+
73
+ # Paragraph stats
74
+ paragraph_count = paragraphs.length
75
+ avg_paragraph_length = paragraph_count > 0 ?
76
+ paragraphs.sum { |p| TextUtils.tokenize(p).length }.to_f / paragraph_count : 0.0
77
+
78
+ # Readability (Flesch-Kincaid Grade Level)
79
+ syllable_count = words.sum { |w| estimate_syllables(w) }
80
+ flesch_kincaid = sentence_count > 0 ?
81
+ 0.39 * (word_count.to_f / sentence_count) + 11.8 * (syllable_count.to_f / word_count) - 15.59 : 0.0
82
+
83
+ Result.new(
84
+ word_count: word_count,
85
+ unique_word_count: unique_words.length,
86
+ sentence_count: sentence_count,
87
+ paragraph_count: paragraph_count,
88
+ avg_word_length: round3(avg_word_length),
89
+ avg_sentence_length: round3(avg_sentence_length),
90
+ sentence_length_std_dev: round3(sentence_length_std_dev),
91
+ sentence_length_variation: round3(sentence_length_variation),
92
+ burstiness: round3(burstiness),
93
+ type_token_ratio: round3(type_token_ratio),
94
+ function_word_ratio: round3(function_word_ratio),
95
+ trigram_repetition: round3(trigram_repetition),
96
+ avg_paragraph_length: round3(avg_paragraph_length),
97
+ flesch_kincaid: round3(flesch_kincaid),
98
+ sentence_lengths: sentence_lengths,
99
+ )
100
+ end
101
+
102
+ # Compute uniformity score (0-100). Higher = more uniform/AI-like.
103
+ def uniformity_score(stats)
104
+ return 0 if stats.word_count == 0
105
+
106
+ score = 0
107
+
108
+ # Low burstiness = more AI-like (max 25 points)
109
+ if stats.burstiness < 0.2 then score += 25
110
+ elsif stats.burstiness < 0.35 then score += 18
111
+ elsif stats.burstiness < 0.5 then score += 10
112
+ elsif stats.burstiness < 0.65 then score += 5
113
+ end
114
+
115
+ # Low sentence length variation = more AI-like (max 25 points)
116
+ if stats.sentence_length_variation < 0.2 then score += 25
117
+ elsif stats.sentence_length_variation < 0.35 then score += 18
118
+ elsif stats.sentence_length_variation < 0.5 then score += 10
119
+ elsif stats.sentence_length_variation < 0.65 then score += 5
120
+ end
121
+
122
+ # Low type-token ratio = more repetitive/AI-like (max 20 points)
123
+ if stats.word_count > 100
124
+ if stats.type_token_ratio < 0.35 then score += 20
125
+ elsif stats.type_token_ratio < 0.45 then score += 12
126
+ elsif stats.type_token_ratio < 0.55 then score += 5
127
+ end
128
+ end
129
+
130
+ # High trigram repetition = more AI-like (max 15 points)
131
+ if stats.trigram_repetition > 0.15 then score += 15
132
+ elsif stats.trigram_repetition > 0.1 then score += 10
133
+ elsif stats.trigram_repetition > 0.05 then score += 5
134
+ end
135
+
136
+ # Abnormally uniform paragraph lengths (max 15 points)
137
+ if stats.paragraph_count >= 3 && stats.sentence_count > 5
138
+ if stats.sentence_length_std_dev < 3 && stats.avg_sentence_length > 10
139
+ score += 15
140
+ end
141
+ end
142
+
143
+ [score, 100].min
144
+ end
145
+
146
+ # Split text into sentences, handling abbreviations
147
+ def split_sentences(text)
148
+ cleaned = text
149
+ .gsub(/\b(Mr|Mrs|Ms|Dr|Prof|Sr|Jr|etc|vs|approx|dept|est|vol)\./i, '\1' + "\u2024")
150
+ .gsub(/\b([A-Z])\./, '\1' + "\u2024")
151
+ .gsub(/\b(\d+)\./, '\1' + "\u2024")
152
+
153
+ sentences = cleaned
154
+ .split(/(?<=[.!?])\s+(?=[A-Z"'\u{201C}])|(?<=[.!?])$/)
155
+ .map { |s| s.gsub("\u2024", ".").strip }
156
+ .reject(&:empty?)
157
+
158
+ sentences
159
+ end
160
+
161
+ # Estimate syllable count for a word (English heuristic)
162
+ def estimate_syllables(word)
163
+ w = word.downcase.gsub(/[^a-z]/, "")
164
+ return 1 if w.length <= 3
165
+
166
+ vowel_groups = w.scan(/[aeiouy]+/)
167
+ count = vowel_groups.length > 0 ? vowel_groups.length : 1
168
+
169
+ # Subtract silent e
170
+ count -= 1 if w.end_with?("e") && !w.end_with?("le")
171
+ # Subtract for -ed that doesn't create syllable
172
+ count -= 1 if w.end_with?("ed") && w.length > 3 && w !~ /[aeiouy]ed$/
173
+
174
+ [count, 1].max
175
+ end
176
+
177
+ # Compute n-gram repetition rate
178
+ def compute_ngram_repetition(words, n)
179
+ return 0.0 if words.length < n
180
+
181
+ ngrams = Hash.new(0)
182
+ (0..words.length - n).each do |i|
183
+ gram = words[i, n].join(" ")
184
+ ngrams[gram] += 1
185
+ end
186
+
187
+ total = ngrams.length
188
+ return 0.0 if total == 0
189
+
190
+ repeated = ngrams.values.count { |c| c > 1 }
191
+ repeated.to_f / total
192
+ end
193
+
194
+ def round3(n)
195
+ (n * 1000).round / 1000.0
196
+ end
197
+ end
198
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Humanizer
4
+ module TextUtils
5
+ module_function
6
+
7
+ # Tokenize text into lowercase words
8
+ def tokenize(text)
9
+ text.downcase.gsub(/[^\w\s'-]/, " ").split(/\s+/).reject(&:empty?)
10
+ end
11
+
12
+ # Word count
13
+ def word_count(text)
14
+ text.strip.split(/\s+/).reject(&:empty?).length
15
+ end
16
+
17
+ # Find all regex matches with line/column info.
18
+ # Returns array of hashes with :match, :index, :line, :column, :suggestion, :confidence
19
+ def find_matches(text, regex, suggestion, confidence: "high")
20
+ results = []
21
+ lines = text.split("\n")
22
+ offset = 0
23
+
24
+ lines.each_with_index do |line, line_idx|
25
+ line.scan(regex) do
26
+ m = Regexp.last_match
27
+ results << {
28
+ match: m[0],
29
+ index: offset + m.begin(0),
30
+ line: line_idx + 1,
31
+ column: m.begin(0) + 1,
32
+ suggestion: suggestion.is_a?(Proc) ? suggestion.call(m[0]) : suggestion,
33
+ confidence: confidence,
34
+ }
35
+ end
36
+ offset += line.length + 1
37
+ end
38
+ results
39
+ end
40
+
41
+ # Count regex occurrences
42
+ def count_matches(text, regex)
43
+ m = text.scan(regex)
44
+ m.length
45
+ end
46
+
47
+ # Build a case-insensitive word-boundary regex for a word/phrase
48
+ def word_regex(word)
49
+ escaped = Regexp.escape(word)
50
+ /\b#{escaped}\b/i
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Humanizer
4
+ VERSION = "0.1.0"
5
+ end
@@ -0,0 +1,260 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Humanizer
4
+ module Vocabulary
5
+ # ── Tier 1: Dead Giveaways ─────────────────────────────
6
+ # Words that appear 5-20x more often in AI text than human text.
7
+ TIER_1 = %w[
8
+ delve delving delved delves
9
+ tapestry vibrant crucial comprehensive
10
+ intricate intricacies pivotal testament
11
+ landscape bustling nestled realm
12
+ meticulous meticulously complexities
13
+ embark embarking embarked
14
+ robust
15
+ showcasing showcase showcased showcases
16
+ underscores underscoring underscored
17
+ fostering foster fostered fosters
18
+ seamless seamlessly groundbreaking renowned
19
+ synergy synergies
20
+ leverage leveraging leveraged
21
+ garner garnered garnering
22
+ interplay enduring
23
+ enhance enhanced enhancing enhancement
24
+ tapestry testament additionally daunting
25
+ ever-evolving
26
+ underscore
27
+ unpack unpacking unpacked
28
+ unraveling unravel
29
+ holistic holistically synergistic
30
+ actionable impactful learnings cadence bandwidth
31
+ net-net value-add
32
+ ].freeze
33
+
34
+ # Multi-word tier 1 entries (need separate handling)
35
+ TIER_1_PHRASES = [
36
+ "game changer", "game-changing", "game-changer",
37
+ "deep dive", "deep-dive", "at its core",
38
+ "best practices", "best-practices", "best practice",
39
+ "thought leader", "thought leadership",
40
+ ].freeze
41
+
42
+ # ── Tier 2: Suspicious in Density ──────────────────────
43
+ TIER_2 = %w[
44
+ furthermore moreover notably consequently subsequently
45
+ accordingly nonetheless henceforth indeed specifically
46
+ essentially ultimately arguably fundamentally inherently
47
+ profoundly
48
+ encompassing encompasses encompassed
49
+ endeavour endeavor endeavoring
50
+ elevate elevated elevating
51
+ alleviate alleviating
52
+ streamline streamlined streamlining
53
+ harness harnessing harnessed
54
+ unleash unleashing unleashed
55
+ revolutionize revolutionizing revolutionized
56
+ transformative transformation paramount multifaceted
57
+ spearhead spearheading spearheaded
58
+ bolster bolstering bolstered
59
+ catalyze catalyst catalyzed cornerstone
60
+ reimagine reimagining reimagined
61
+ empower empowering empowerment empowered
62
+ navigate navigating navigated
63
+ poised myriad nuanced nuance nuances
64
+ paradigm paradigms paradigm-shifting
65
+ holistic holistically
66
+ utilize utilizing utilization utilized
67
+ facilitate facilitated facilitating facilitation
68
+ elucidate elucidating
69
+ illuminate illuminating illuminated
70
+ invaluable cutting-edge innovative innovation
71
+ align aligns aligning alignment
72
+ dynamic dynamics impactful agile
73
+ scalable scalability proactive proactively synergistic
74
+ optimize optimizing optimization
75
+ resonate resonating resonated resonates
76
+ underscore underscored
77
+ cultivate cultivating cultivated
78
+ galvanize galvanizing
79
+ invigorate invigorating
80
+ juxtapose juxtaposing juxtaposition
81
+ underscore bolster
82
+ augment augmenting augmented
83
+ proliferate proliferating proliferation
84
+ burgeoning nascent ubiquitous plethora myriad
85
+ quintessential eclectic indelible
86
+ overarching underpinning underpinnings
87
+ ].freeze
88
+
89
+ # ── Tier 3: Context-Dependent ──────────────────────────
90
+ TIER_3 = %w[
91
+ significant significantly important importantly
92
+ effective effectively efficient efficiently
93
+ diverse diversity unique uniquely
94
+ key vital vitally critical critically
95
+ essential essentially valuable notable
96
+ remarkable remarkably substantial substantially
97
+ considerable considerably noteworthy
98
+ prominent prominently influential
99
+ thoughtful thoughtfully insightful insightfully
100
+ meaningful meaningfully purposeful purposefully
101
+ deliberate deliberately strategic strategically
102
+ integral indispensable instrumental imperative
103
+ exemplary commendable praiseworthy
104
+ sophisticated profound compelling captivating
105
+ exquisite impeccable formidable stellar
106
+ exceptional exceptionally extraordinary
107
+ unparalleled unprecedented monumental
108
+ groundbreaking trailblazing visionary
109
+ world-class state-of-the-art best-in-class
110
+ ].freeze
111
+
112
+ # ── AI Phrases ─────────────────────────────────────────
113
+ # Multi-word phrases that strongly signal AI authorship.
114
+ AI_PHRASES = [
115
+ # "In today's..." openers
116
+ { pattern: /\bin today'?s (digital age|fast-paced world|rapidly evolving|ever-changing|modern|interconnected)\b/i, tier: 1, fix: "(remove or be specific about what changed)" },
117
+ { pattern: /\bin today'?s world\b/i, tier: 2, fix: "(remove or be specific)" },
118
+
119
+ # "It is [worth/important] to note"
120
+ { pattern: /\bit is (worth|important to|essential to|crucial to) not(?:e|ing) that\b/i, tier: 1, fix: "(remove — just state the fact)" },
121
+ { pattern: /\bit should be noted that\b/i, tier: 1, fix: "(remove — just state the fact)" },
122
+ { pattern: /\bit bears mentioning that\b/i, tier: 1, fix: "(remove — just state the fact)" },
123
+
124
+ # Journey metaphors
125
+ { pattern: /\bpave the way (?:for|to)\b/i, tier: 1, fix: "enable / allow / lead to" },
126
+ { pattern: /\bat the forefront of\b/i, tier: 1, fix: "leading / first in" },
127
+ { pattern: /\bnavigate the (?:complexities|challenges|landscape)\b/i, tier: 1, fix: "handle / deal with / work through" },
128
+ { pattern: /\bharness the (?:power|potential|capabilities) of\b/i, tier: 1, fix: "use" },
129
+ { pattern: /\bembark on a journey\b/i, tier: 1, fix: "start / begin" },
130
+ { pattern: /\bpush the boundaries\b/i, tier: 1, fix: "(be specific about what changed)" },
131
+ { pattern: /\bfoster a (?:culture|environment|atmosphere|sense) of\b/i, tier: 1, fix: "build / create / encourage" },
132
+ { pattern: /\bunlock the (?:potential|power|full|true)\b/i, tier: 1, fix: "enable / use / improve" },
133
+ { pattern: /\bserves as a testament\b/i, tier: 1, fix: "shows / proves / demonstrates" },
134
+ { pattern: /\bplays a (?:crucial|pivotal|vital|key|significant|important|critical) role\b/i, tier: 1, fix: "matters for / helps / is important to" },
135
+ { pattern: /\bin the realm of\b/i, tier: 1, fix: "in" },
136
+ { pattern: /\bdelve into\b/i, tier: 1, fix: "explore / examine / look at" },
137
+ { pattern: /\bthe landscape of\b/i, tier: 1, fix: "(be specific — what part of the field?)" },
138
+ { pattern: /\bnestled (?:in|within|among)\b/i, tier: 1, fix: "located in / in / near" },
139
+
140
+ # Abstract verb phrases
141
+ { pattern: /\brise to the (?:occasion|challenge)\b/i, tier: 2, fix: "handle / face / tackle" },
142
+ { pattern: /\bstand at the (?:crossroads|intersection)\b/i, tier: 2, fix: "(be specific about the choice)" },
143
+ { pattern: /\bshape the (?:future|trajectory|direction)\b/i, tier: 2, fix: "(be specific about how)" },
144
+ { pattern: /\btip of the iceberg\b/i, tier: 2, fix: "one example / a small part" },
145
+ { pattern: /\bdouble-edged sword\b/i, tier: 2, fix: "has tradeoffs / cuts both ways" },
146
+ { pattern: /\ba testament to\b/i, tier: 1, fix: "shows / proves" },
147
+ { pattern: /\bthe dawn of\b/i, tier: 2, fix: "the start of / the beginning of" },
148
+ { pattern: /\bthe fabric of\b/i, tier: 1, fix: "(be concrete)" },
149
+ { pattern: /\bthe tapestry of\b/i, tier: 1, fix: "(be concrete)" },
150
+
151
+ # Hedging stacks
152
+ { pattern: /\bcould potentially\b/i, tier: 1, fix: "could / might" },
153
+ { pattern: /\bmight possibly\b/i, tier: 1, fix: "might" },
154
+ { pattern: /\bcould possibly\b/i, tier: 1, fix: "could" },
155
+ { pattern: /\bperhaps potentially\b/i, tier: 1, fix: "perhaps / maybe" },
156
+ { pattern: /\bmay potentially\b/i, tier: 1, fix: "may" },
157
+ { pattern: /\bcould conceivably\b/i, tier: 1, fix: "could" },
158
+
159
+ # Chatbot filler
160
+ { pattern: /\bI hope this helps\b/i, tier: 1, fix: "(remove)" },
161
+ { pattern: /\blet me know if (?:you|there)\b/i, tier: 1, fix: "(remove)" },
162
+ { pattern: /\bwould you like me to\b/i, tier: 1, fix: "(remove)" },
163
+ { pattern: /\bfeel free to\b/i, tier: 1, fix: "(remove)" },
164
+ { pattern: /\bdon'?t hesitate to\b/i, tier: 1, fix: "(remove)" },
165
+ { pattern: /\bhappy to help\b/i, tier: 1, fix: "(remove)" },
166
+ { pattern: /\bhere is (?:a |an |the )?(?:comprehensive |brief |quick )?(?:overview|summary|breakdown|list|guide|explanation|look)\b/i, tier: 1, fix: "(remove — start with the content)" },
167
+ { pattern: /\bI'?d be happy to\b/i, tier: 1, fix: "(remove)" },
168
+ { pattern: /\bis there anything else\b/i, tier: 1, fix: "(remove)" },
169
+
170
+ # Sycophantic
171
+ { pattern: /\bgreat question\b/i, tier: 1, fix: "(remove)" },
172
+ { pattern: /\bexcellent (?:question|point|observation)\b/i, tier: 1, fix: "(remove)" },
173
+ { pattern: /\bthat'?s a (?:great|excellent|wonderful|fantastic|good|insightful|thoughtful) (?:question|point|observation)\b/i, tier: 1, fix: "(remove)" },
174
+ { pattern: /\byou'?re absolutely right\b/i, tier: 1, fix: "(remove or address the substance)" },
175
+ { pattern: /\byou raise a (?:great|good|excellent|valid|important) point\b/i, tier: 1, fix: "(remove or address the substance)" },
176
+
177
+ # Cutoff disclaimers
178
+ { pattern: /\bas of (?:my|this) (?:last|latest|most recent) (?:training|update|knowledge)\b/i, tier: 1, fix: "(remove)" },
179
+ { pattern: /\bwhile (?:specific )?details are (?:limited|scarce|not available)\b/i, tier: 1, fix: "(remove — research it or omit the claim)" },
180
+ { pattern: /\bbased on (?:available|my|current) (?:information|knowledge|understanding|data)\b/i, tier: 1, fix: "(remove)" },
181
+ { pattern: /\bup to my (?:last )?training\b/i, tier: 1, fix: "(remove)" },
182
+
183
+ # Generic conclusions
184
+ { pattern: /\bthe future (?:looks|is|remains) bright\b/i, tier: 1, fix: "(end with a specific fact or plan)" },
185
+ { pattern: /\bexciting times (?:lie|lay|are) ahead\b/i, tier: 1, fix: "(end with a specific fact or plan)" },
186
+ { pattern: /\bcontinue (?:this|their|our|the) journey\b/i, tier: 1, fix: "(be specific about what happens next)" },
187
+ { pattern: /\bjourney towards? (?:excellence|success|greatness)\b/i, tier: 1, fix: "(be specific)" },
188
+ { pattern: /\bstep in the right direction\b/i, tier: 1, fix: "(be specific about the outcome)" },
189
+ { pattern: /\bonly time will tell\b/i, tier: 1, fix: "(end with what you actually know)" },
190
+ { pattern: /\bthe possibilities are (?:endless|limitless|infinite)\b/i, tier: 1, fix: "(be specific about what's possible)" },
191
+ { pattern: /\bpoised for (?:growth|success|greatness|expansion)\b/i, tier: 1, fix: "(cite evidence or remove)" },
192
+ { pattern: /\bwatch this space\b/i, tier: 2, fix: "(end with something concrete)" },
193
+ { pattern: /\bstay tuned\b/i, tier: 2, fix: "(end with something concrete)" },
194
+ { pattern: /\bremains to be seen\b/i, tier: 2, fix: "(state what you do know)" },
195
+
196
+ # Formulaic filler
197
+ { pattern: /\bin order to\b/i, tier: 2, fix: "to" },
198
+ { pattern: /\bdue to the fact that\b/i, tier: 1, fix: "because" },
199
+ { pattern: /\bat this point in time\b/i, tier: 1, fix: "now" },
200
+ { pattern: /\bin the event that\b/i, tier: 1, fix: "if" },
201
+ { pattern: /\bhas the ability to\b/i, tier: 1, fix: "can" },
202
+ { pattern: /\bfor the purpose of\b/i, tier: 1, fix: "to / for" },
203
+ { pattern: /\bin light of the fact that\b/i, tier: 1, fix: "because / since" },
204
+ { pattern: /\bfirst and foremost\b/i, tier: 2, fix: "first" },
205
+ { pattern: /\blast but not least\b/i, tier: 2, fix: "finally" },
206
+ { pattern: /\bat the end of the day\b/i, tier: 2, fix: "(remove or be specific)" },
207
+ { pattern: /\bwhen it comes to\b/i, tier: 2, fix: "for / regarding" },
208
+ { pattern: /\bthe fact of the matter is\b/i, tier: 1, fix: "(remove — just state it)" },
209
+ { pattern: /\bin terms of\b/i, tier: 3, fix: "for / about / regarding" },
210
+ { pattern: /\bat its core\b/i, tier: 2, fix: "(remove or be specific)" },
211
+ { pattern: /\bit goes without saying\b/i, tier: 2, fix: "(if it goes without saying, don't say it)" },
212
+ { pattern: /\bneedless to say\b/i, tier: 2, fix: "(if needless to say, don't say it)" },
213
+
214
+ # v2.2 additions
215
+ { pattern: /\blet'?s dive in\b/i, tier: 1, fix: "(just start)" },
216
+ { pattern: /\blet'?s (?:break this|break it) down\b/i, tier: 1, fix: "(just explain)" },
217
+ { pattern: /\bhere'?s the thing\b/i, tier: 2, fix: "(just say it)" },
218
+ { pattern: /\bthe reality is\b/i, tier: 2, fix: "(state the fact)" },
219
+ { pattern: /\bmoving forward\b/i, tier: 2, fix: "next / from now on" },
220
+ { pattern: /\bcircle back\b/i, tier: 1, fix: "return to / revisit" },
221
+ { pattern: /\btouch base\b/i, tier: 1, fix: "talk / check in" },
222
+ { pattern: /\bgoing forward\b/i, tier: 2, fix: "from now on" },
223
+ { pattern: /\bkey takeaways?\b/i, tier: 1, fix: "main point(s)" },
224
+ { pattern: /\bvalue proposition\b/i, tier: 2, fix: "benefit / value" },
225
+ { pattern: /\bcore competenc(?:y|ies)\b/i, tier: 2, fix: "strength(s)" },
226
+ { pattern: /\bbest-in-class\b/i, tier: 1, fix: "excellent / (be specific)" },
227
+ { pattern: /\bworld-class\b/i, tier: 1, fix: "(be specific)" },
228
+ { pattern: /\bcutting-edge\b/i, tier: 1, fix: "(be specific)" },
229
+ { pattern: /\bstate-of-the-art\b/i, tier: 1, fix: "(be specific or cite)" },
230
+ { pattern: /\bgold standard\b/i, tier: 2, fix: "(cite the standard)" },
231
+ { pattern: /\blow-hanging fruit\b/i, tier: 1, fix: "easy wins / quick wins" },
232
+ { pattern: /\bpain points?\b/i, tier: 1, fix: "problem(s)" },
233
+ { pattern: /\bdeep dive\b/i, tier: 1, fix: "detailed look / analysis" },
234
+ { pattern: /\bparadigm shift\b/i, tier: 1, fix: "major change" },
235
+ { pattern: /\bdouble-click (?:on)?\b/i, tier: 1, fix: "examine / look closer at" },
236
+ { pattern: /\bloop (?:you |me |them )in\b/i, tier: 2, fix: "include / inform" },
237
+ { pattern: /\btable this\b/i, tier: 2, fix: "postpone / set aside" },
238
+ { pattern: /\bpivot to\b/i, tier: 2, fix: "switch to / change to" },
239
+ { pattern: /\bsynch? (?:up )?(?:on|about)\b/i, tier: 2, fix: "discuss / align on" },
240
+ { pattern: /\brun it up the flagpole\b/i, tier: 1, fix: "propose / suggest" },
241
+ { pattern: /\bboil the ocean\b/i, tier: 1, fix: "attempt too much" },
242
+ { pattern: /\bmove the needle\b/i, tier: 1, fix: "make progress / have impact" },
243
+ { pattern: /\bopen the kimono\b/i, tier: 1, fix: "share / be transparent" },
244
+ { pattern: /\bdrink the Kool-Aid\b/i, tier: 2, fix: "believe / accept" },
245
+ ].freeze
246
+
247
+ # ── Function Words ─────────────────────────────────────
248
+ FUNCTION_WORDS = %w[
249
+ the be to of and a in that have I it for not on with
250
+ he as you do at this but his by from they we say her
251
+ she or an will my one all would there their what so
252
+ up out if about who get which go me when make can like
253
+ time no just him know take people into year your good
254
+ some could them see other than then now look only come
255
+ its over think also back after use two how our work
256
+ first well way even new want because any these give
257
+ day most us
258
+ ].freeze
259
+ end
260
+ end
data/lib/humanizer.rb ADDED
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+
5
+ require_relative "humanizer/version"
6
+ require_relative "humanizer/text_utils"
7
+ require_relative "humanizer/vocabulary"
8
+ require_relative "humanizer/stats"
9
+ require_relative "humanizer/patterns"
10
+ require_relative "humanizer/analyzer"
11
+ require_relative "humanizer/humanizer_engine"
12
+
13
+ module Humanizer
14
+ # Quick score (0-100, higher = more AI-like)
15
+ def self.score(text)
16
+ Analyzer.score(text)
17
+ end
18
+
19
+ # Full analysis with details
20
+ def self.analyze(text, **opts)
21
+ Analyzer.analyze(text, **opts)
22
+ end
23
+
24
+ # Humanization suggestions
25
+ def self.humanize(text, **opts)
26
+ HumanizerEngine.humanize(text, **opts)
27
+ end
28
+
29
+ # Safe mechanical auto-fixes
30
+ def self.auto_fix(text)
31
+ HumanizerEngine.auto_fix(text)
32
+ end
33
+ end
metadata ADDED
@@ -0,0 +1,61 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: humanizer-rb
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Christian Genco
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2026-03-16 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Scores text 0-100 for AI writing patterns using 28 pattern detectors,
14
+ 500+ vocabulary terms, and statistical text analysis. Ruby port of the humanizer
15
+ Node.js tool.
16
+ email:
17
+ - christian@gen.co
18
+ executables:
19
+ - humanizer
20
+ extensions: []
21
+ extra_rdoc_files: []
22
+ files:
23
+ - CHANGELOG.md
24
+ - LICENSE
25
+ - README.md
26
+ - bin/humanizer
27
+ - lib/humanizer.rb
28
+ - lib/humanizer/analyzer.rb
29
+ - lib/humanizer/humanizer_engine.rb
30
+ - lib/humanizer/patterns.rb
31
+ - lib/humanizer/stats.rb
32
+ - lib/humanizer/text_utils.rb
33
+ - lib/humanizer/version.rb
34
+ - lib/humanizer/vocabulary.rb
35
+ homepage: https://github.com/christiangenco/humanizer-rb
36
+ licenses:
37
+ - MIT
38
+ metadata:
39
+ homepage_uri: https://github.com/christiangenco/humanizer-rb
40
+ source_code_uri: https://github.com/christiangenco/humanizer-rb
41
+ changelog_uri: https://github.com/christiangenco/humanizer-rb/blob/main/CHANGELOG.md
42
+ post_install_message:
43
+ rdoc_options: []
44
+ require_paths:
45
+ - lib
46
+ required_ruby_version: !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ version: '3.0'
51
+ required_rubygems_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ requirements: []
57
+ rubygems_version: 3.5.22
58
+ signing_key:
59
+ specification_version: 4
60
+ summary: Detect AI-generated writing patterns
61
+ test_files: []