humanizer-rb 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +12 -0
- data/LICENSE +21 -0
- data/README.md +133 -0
- data/bin/humanizer +388 -0
- data/lib/humanizer/analyzer.rb +319 -0
- data/lib/humanizer/humanizer_engine.rb +192 -0
- data/lib/humanizer/patterns.rb +637 -0
- data/lib/humanizer/stats.rb +198 -0
- data/lib/humanizer/text_utils.rb +53 -0
- data/lib/humanizer/version.rb +5 -0
- data/lib/humanizer/vocabulary.rb +260 -0
- data/lib/humanizer.rb +33 -0
- metadata +61 -0
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Humanizer
|
|
4
|
+
module Stats
|
|
5
|
+
Result = Struct.new(
|
|
6
|
+
:word_count, :unique_word_count, :sentence_count, :paragraph_count,
|
|
7
|
+
:avg_word_length, :avg_sentence_length, :sentence_length_std_dev,
|
|
8
|
+
:sentence_length_variation, :burstiness, :type_token_ratio,
|
|
9
|
+
:function_word_ratio, :trigram_repetition, :avg_paragraph_length,
|
|
10
|
+
:flesch_kincaid, :sentence_lengths,
|
|
11
|
+
keyword_init: true
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
EMPTY = Result.new(
|
|
15
|
+
word_count: 0, unique_word_count: 0, sentence_count: 0, paragraph_count: 0,
|
|
16
|
+
avg_word_length: 0, avg_sentence_length: 0, sentence_length_std_dev: 0,
|
|
17
|
+
sentence_length_variation: 0, burstiness: 0, type_token_ratio: 0,
|
|
18
|
+
function_word_ratio: 0, trigram_repetition: 0, avg_paragraph_length: 0,
|
|
19
|
+
flesch_kincaid: 0, sentence_lengths: []
|
|
20
|
+
).freeze
|
|
21
|
+
|
|
22
|
+
module_function
|
|
23
|
+
|
|
24
|
+
def compute(text)
|
|
25
|
+
return EMPTY if text.nil? || !text.is_a?(String) || text.strip.empty?
|
|
26
|
+
|
|
27
|
+
words = TextUtils.tokenize(text)
|
|
28
|
+
return EMPTY if words.empty?
|
|
29
|
+
|
|
30
|
+
sentences = split_sentences(text)
|
|
31
|
+
paragraphs = text.split(/\n\s*\n/).select { |p| p.strip.length > 0 }
|
|
32
|
+
|
|
33
|
+
# Word-level stats
|
|
34
|
+
word_count = words.length
|
|
35
|
+
unique_words = words.uniq
|
|
36
|
+
type_token_ratio = unique_words.length.to_f / word_count
|
|
37
|
+
avg_word_length = words.sum { |w| w.length }.to_f / word_count
|
|
38
|
+
|
|
39
|
+
# Sentence-level stats
|
|
40
|
+
sentence_lengths = sentences.map { |s| TextUtils.tokenize(s).length }.select { |n| n > 0 }
|
|
41
|
+
sentence_count = sentence_lengths.length
|
|
42
|
+
|
|
43
|
+
avg_sentence_length = 0.0
|
|
44
|
+
sentence_length_std_dev = 0.0
|
|
45
|
+
sentence_length_variation = 0.0
|
|
46
|
+
burstiness = 0.0
|
|
47
|
+
|
|
48
|
+
if sentence_count > 1
|
|
49
|
+
avg_sentence_length = sentence_lengths.sum.to_f / sentence_count
|
|
50
|
+
|
|
51
|
+
variance = sentence_lengths.sum { |len| (len - avg_sentence_length) ** 2 }.to_f / sentence_count
|
|
52
|
+
sentence_length_std_dev = Math.sqrt(variance)
|
|
53
|
+
|
|
54
|
+
sentence_length_variation = avg_sentence_length > 0 ? sentence_length_std_dev / avg_sentence_length : 0.0
|
|
55
|
+
|
|
56
|
+
consecutive_diff_sum = (1...sentence_lengths.length).sum { |i|
|
|
57
|
+
(sentence_lengths[i] - sentence_lengths[i - 1]).abs
|
|
58
|
+
}
|
|
59
|
+
avg_consecutive_diff = consecutive_diff_sum.to_f / (sentence_lengths.length - 1)
|
|
60
|
+
burstiness = avg_sentence_length > 0 ? avg_consecutive_diff / avg_sentence_length : 0.0
|
|
61
|
+
elsif sentence_count == 1
|
|
62
|
+
avg_sentence_length = sentence_lengths[0].to_f
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Function word ratio
|
|
66
|
+
function_word_set = Vocabulary::FUNCTION_WORDS.map(&:downcase).to_set
|
|
67
|
+
function_word_count = words.count { |w| function_word_set.include?(w) }
|
|
68
|
+
function_word_ratio = function_word_count.to_f / word_count
|
|
69
|
+
|
|
70
|
+
# N-gram repetition
|
|
71
|
+
trigram_repetition = compute_ngram_repetition(words, 3)
|
|
72
|
+
|
|
73
|
+
# Paragraph stats
|
|
74
|
+
paragraph_count = paragraphs.length
|
|
75
|
+
avg_paragraph_length = paragraph_count > 0 ?
|
|
76
|
+
paragraphs.sum { |p| TextUtils.tokenize(p).length }.to_f / paragraph_count : 0.0
|
|
77
|
+
|
|
78
|
+
# Readability (Flesch-Kincaid Grade Level)
|
|
79
|
+
syllable_count = words.sum { |w| estimate_syllables(w) }
|
|
80
|
+
flesch_kincaid = sentence_count > 0 ?
|
|
81
|
+
0.39 * (word_count.to_f / sentence_count) + 11.8 * (syllable_count.to_f / word_count) - 15.59 : 0.0
|
|
82
|
+
|
|
83
|
+
Result.new(
|
|
84
|
+
word_count: word_count,
|
|
85
|
+
unique_word_count: unique_words.length,
|
|
86
|
+
sentence_count: sentence_count,
|
|
87
|
+
paragraph_count: paragraph_count,
|
|
88
|
+
avg_word_length: round3(avg_word_length),
|
|
89
|
+
avg_sentence_length: round3(avg_sentence_length),
|
|
90
|
+
sentence_length_std_dev: round3(sentence_length_std_dev),
|
|
91
|
+
sentence_length_variation: round3(sentence_length_variation),
|
|
92
|
+
burstiness: round3(burstiness),
|
|
93
|
+
type_token_ratio: round3(type_token_ratio),
|
|
94
|
+
function_word_ratio: round3(function_word_ratio),
|
|
95
|
+
trigram_repetition: round3(trigram_repetition),
|
|
96
|
+
avg_paragraph_length: round3(avg_paragraph_length),
|
|
97
|
+
flesch_kincaid: round3(flesch_kincaid),
|
|
98
|
+
sentence_lengths: sentence_lengths,
|
|
99
|
+
)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Compute uniformity score (0-100). Higher = more uniform/AI-like.
|
|
103
|
+
def uniformity_score(stats)
|
|
104
|
+
return 0 if stats.word_count == 0
|
|
105
|
+
|
|
106
|
+
score = 0
|
|
107
|
+
|
|
108
|
+
# Low burstiness = more AI-like (max 25 points)
|
|
109
|
+
if stats.burstiness < 0.2 then score += 25
|
|
110
|
+
elsif stats.burstiness < 0.35 then score += 18
|
|
111
|
+
elsif stats.burstiness < 0.5 then score += 10
|
|
112
|
+
elsif stats.burstiness < 0.65 then score += 5
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Low sentence length variation = more AI-like (max 25 points)
|
|
116
|
+
if stats.sentence_length_variation < 0.2 then score += 25
|
|
117
|
+
elsif stats.sentence_length_variation < 0.35 then score += 18
|
|
118
|
+
elsif stats.sentence_length_variation < 0.5 then score += 10
|
|
119
|
+
elsif stats.sentence_length_variation < 0.65 then score += 5
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Low type-token ratio = more repetitive/AI-like (max 20 points)
|
|
123
|
+
if stats.word_count > 100
|
|
124
|
+
if stats.type_token_ratio < 0.35 then score += 20
|
|
125
|
+
elsif stats.type_token_ratio < 0.45 then score += 12
|
|
126
|
+
elsif stats.type_token_ratio < 0.55 then score += 5
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# High trigram repetition = more AI-like (max 15 points)
|
|
131
|
+
if stats.trigram_repetition > 0.15 then score += 15
|
|
132
|
+
elsif stats.trigram_repetition > 0.1 then score += 10
|
|
133
|
+
elsif stats.trigram_repetition > 0.05 then score += 5
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Abnormally uniform paragraph lengths (max 15 points)
|
|
137
|
+
if stats.paragraph_count >= 3 && stats.sentence_count > 5
|
|
138
|
+
if stats.sentence_length_std_dev < 3 && stats.avg_sentence_length > 10
|
|
139
|
+
score += 15
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
[score, 100].min
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Split text into sentences, handling abbreviations
|
|
147
|
+
def split_sentences(text)
|
|
148
|
+
cleaned = text
|
|
149
|
+
.gsub(/\b(Mr|Mrs|Ms|Dr|Prof|Sr|Jr|etc|vs|approx|dept|est|vol)\./i, '\1' + "\u2024")
|
|
150
|
+
.gsub(/\b([A-Z])\./, '\1' + "\u2024")
|
|
151
|
+
.gsub(/\b(\d+)\./, '\1' + "\u2024")
|
|
152
|
+
|
|
153
|
+
sentences = cleaned
|
|
154
|
+
.split(/(?<=[.!?])\s+(?=[A-Z"'\u{201C}])|(?<=[.!?])$/)
|
|
155
|
+
.map { |s| s.gsub("\u2024", ".").strip }
|
|
156
|
+
.reject(&:empty?)
|
|
157
|
+
|
|
158
|
+
sentences
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
# Estimate syllable count for a word (English heuristic)
|
|
162
|
+
def estimate_syllables(word)
|
|
163
|
+
w = word.downcase.gsub(/[^a-z]/, "")
|
|
164
|
+
return 1 if w.length <= 3
|
|
165
|
+
|
|
166
|
+
vowel_groups = w.scan(/[aeiouy]+/)
|
|
167
|
+
count = vowel_groups.length > 0 ? vowel_groups.length : 1
|
|
168
|
+
|
|
169
|
+
# Subtract silent e
|
|
170
|
+
count -= 1 if w.end_with?("e") && !w.end_with?("le")
|
|
171
|
+
# Subtract for -ed that doesn't create syllable
|
|
172
|
+
count -= 1 if w.end_with?("ed") && w.length > 3 && w !~ /[aeiouy]ed$/
|
|
173
|
+
|
|
174
|
+
[count, 1].max
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# Compute n-gram repetition rate
|
|
178
|
+
def compute_ngram_repetition(words, n)
|
|
179
|
+
return 0.0 if words.length < n
|
|
180
|
+
|
|
181
|
+
ngrams = Hash.new(0)
|
|
182
|
+
(0..words.length - n).each do |i|
|
|
183
|
+
gram = words[i, n].join(" ")
|
|
184
|
+
ngrams[gram] += 1
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
total = ngrams.length
|
|
188
|
+
return 0.0 if total == 0
|
|
189
|
+
|
|
190
|
+
repeated = ngrams.values.count { |c| c > 1 }
|
|
191
|
+
repeated.to_f / total
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
def round3(n)
|
|
195
|
+
(n * 1000).round / 1000.0
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
end
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Humanizer
|
|
4
|
+
module TextUtils
|
|
5
|
+
module_function
|
|
6
|
+
|
|
7
|
+
# Tokenize text into lowercase words
|
|
8
|
+
def tokenize(text)
|
|
9
|
+
text.downcase.gsub(/[^\w\s'-]/, " ").split(/\s+/).reject(&:empty?)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
# Word count
|
|
13
|
+
def word_count(text)
|
|
14
|
+
text.strip.split(/\s+/).reject(&:empty?).length
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Find all regex matches with line/column info.
|
|
18
|
+
# Returns array of hashes with :match, :index, :line, :column, :suggestion, :confidence
|
|
19
|
+
def find_matches(text, regex, suggestion, confidence: "high")
|
|
20
|
+
results = []
|
|
21
|
+
lines = text.split("\n")
|
|
22
|
+
offset = 0
|
|
23
|
+
|
|
24
|
+
lines.each_with_index do |line, line_idx|
|
|
25
|
+
line.scan(regex) do
|
|
26
|
+
m = Regexp.last_match
|
|
27
|
+
results << {
|
|
28
|
+
match: m[0],
|
|
29
|
+
index: offset + m.begin(0),
|
|
30
|
+
line: line_idx + 1,
|
|
31
|
+
column: m.begin(0) + 1,
|
|
32
|
+
suggestion: suggestion.is_a?(Proc) ? suggestion.call(m[0]) : suggestion,
|
|
33
|
+
confidence: confidence,
|
|
34
|
+
}
|
|
35
|
+
end
|
|
36
|
+
offset += line.length + 1
|
|
37
|
+
end
|
|
38
|
+
results
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Count regex occurrences
|
|
42
|
+
def count_matches(text, regex)
|
|
43
|
+
m = text.scan(regex)
|
|
44
|
+
m.length
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Build a case-insensitive word-boundary regex for a word/phrase
|
|
48
|
+
def word_regex(word)
|
|
49
|
+
escaped = Regexp.escape(word)
|
|
50
|
+
/\b#{escaped}\b/i
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Humanizer
|
|
4
|
+
module Vocabulary
|
|
5
|
+
# ── Tier 1: Dead Giveaways ─────────────────────────────
|
|
6
|
+
# Words that appear 5-20x more often in AI text than human text.
|
|
7
|
+
TIER_1 = %w[
|
|
8
|
+
delve delving delved delves
|
|
9
|
+
tapestry vibrant crucial comprehensive
|
|
10
|
+
intricate intricacies pivotal testament
|
|
11
|
+
landscape bustling nestled realm
|
|
12
|
+
meticulous meticulously complexities
|
|
13
|
+
embark embarking embarked
|
|
14
|
+
robust
|
|
15
|
+
showcasing showcase showcased showcases
|
|
16
|
+
underscores underscoring underscored
|
|
17
|
+
fostering foster fostered fosters
|
|
18
|
+
seamless seamlessly groundbreaking renowned
|
|
19
|
+
synergy synergies
|
|
20
|
+
leverage leveraging leveraged
|
|
21
|
+
garner garnered garnering
|
|
22
|
+
interplay enduring
|
|
23
|
+
enhance enhanced enhancing enhancement
|
|
24
|
+
tapestry testament additionally daunting
|
|
25
|
+
ever-evolving
|
|
26
|
+
underscore
|
|
27
|
+
unpack unpacking unpacked
|
|
28
|
+
unraveling unravel
|
|
29
|
+
holistic holistically synergistic
|
|
30
|
+
actionable impactful learnings cadence bandwidth
|
|
31
|
+
net-net value-add
|
|
32
|
+
].freeze
|
|
33
|
+
|
|
34
|
+
# Multi-word tier 1 entries (need separate handling)
|
|
35
|
+
TIER_1_PHRASES = [
|
|
36
|
+
"game changer", "game-changing", "game-changer",
|
|
37
|
+
"deep dive", "deep-dive", "at its core",
|
|
38
|
+
"best practices", "best-practices", "best practice",
|
|
39
|
+
"thought leader", "thought leadership",
|
|
40
|
+
].freeze
|
|
41
|
+
|
|
42
|
+
# ── Tier 2: Suspicious in Density ──────────────────────
|
|
43
|
+
TIER_2 = %w[
|
|
44
|
+
furthermore moreover notably consequently subsequently
|
|
45
|
+
accordingly nonetheless henceforth indeed specifically
|
|
46
|
+
essentially ultimately arguably fundamentally inherently
|
|
47
|
+
profoundly
|
|
48
|
+
encompassing encompasses encompassed
|
|
49
|
+
endeavour endeavor endeavoring
|
|
50
|
+
elevate elevated elevating
|
|
51
|
+
alleviate alleviating
|
|
52
|
+
streamline streamlined streamlining
|
|
53
|
+
harness harnessing harnessed
|
|
54
|
+
unleash unleashing unleashed
|
|
55
|
+
revolutionize revolutionizing revolutionized
|
|
56
|
+
transformative transformation paramount multifaceted
|
|
57
|
+
spearhead spearheading spearheaded
|
|
58
|
+
bolster bolstering bolstered
|
|
59
|
+
catalyze catalyst catalyzed cornerstone
|
|
60
|
+
reimagine reimagining reimagined
|
|
61
|
+
empower empowering empowerment empowered
|
|
62
|
+
navigate navigating navigated
|
|
63
|
+
poised myriad nuanced nuance nuances
|
|
64
|
+
paradigm paradigms paradigm-shifting
|
|
65
|
+
holistic holistically
|
|
66
|
+
utilize utilizing utilization utilized
|
|
67
|
+
facilitate facilitated facilitating facilitation
|
|
68
|
+
elucidate elucidating
|
|
69
|
+
illuminate illuminating illuminated
|
|
70
|
+
invaluable cutting-edge innovative innovation
|
|
71
|
+
align aligns aligning alignment
|
|
72
|
+
dynamic dynamics impactful agile
|
|
73
|
+
scalable scalability proactive proactively synergistic
|
|
74
|
+
optimize optimizing optimization
|
|
75
|
+
resonate resonating resonated resonates
|
|
76
|
+
underscore underscored
|
|
77
|
+
cultivate cultivating cultivated
|
|
78
|
+
galvanize galvanizing
|
|
79
|
+
invigorate invigorating
|
|
80
|
+
juxtapose juxtaposing juxtaposition
|
|
81
|
+
underscore bolster
|
|
82
|
+
augment augmenting augmented
|
|
83
|
+
proliferate proliferating proliferation
|
|
84
|
+
burgeoning nascent ubiquitous plethora myriad
|
|
85
|
+
quintessential eclectic indelible
|
|
86
|
+
overarching underpinning underpinnings
|
|
87
|
+
].freeze
|
|
88
|
+
|
|
89
|
+
# ── Tier 3: Context-Dependent ──────────────────────────
|
|
90
|
+
TIER_3 = %w[
|
|
91
|
+
significant significantly important importantly
|
|
92
|
+
effective effectively efficient efficiently
|
|
93
|
+
diverse diversity unique uniquely
|
|
94
|
+
key vital vitally critical critically
|
|
95
|
+
essential essentially valuable notable
|
|
96
|
+
remarkable remarkably substantial substantially
|
|
97
|
+
considerable considerably noteworthy
|
|
98
|
+
prominent prominently influential
|
|
99
|
+
thoughtful thoughtfully insightful insightfully
|
|
100
|
+
meaningful meaningfully purposeful purposefully
|
|
101
|
+
deliberate deliberately strategic strategically
|
|
102
|
+
integral indispensable instrumental imperative
|
|
103
|
+
exemplary commendable praiseworthy
|
|
104
|
+
sophisticated profound compelling captivating
|
|
105
|
+
exquisite impeccable formidable stellar
|
|
106
|
+
exceptional exceptionally extraordinary
|
|
107
|
+
unparalleled unprecedented monumental
|
|
108
|
+
groundbreaking trailblazing visionary
|
|
109
|
+
world-class state-of-the-art best-in-class
|
|
110
|
+
].freeze
|
|
111
|
+
|
|
112
|
+
# ── AI Phrases ─────────────────────────────────────────
|
|
113
|
+
# Multi-word phrases that strongly signal AI authorship.
|
|
114
|
+
AI_PHRASES = [
|
|
115
|
+
# "In today's..." openers
|
|
116
|
+
{ pattern: /\bin today'?s (digital age|fast-paced world|rapidly evolving|ever-changing|modern|interconnected)\b/i, tier: 1, fix: "(remove or be specific about what changed)" },
|
|
117
|
+
{ pattern: /\bin today'?s world\b/i, tier: 2, fix: "(remove or be specific)" },
|
|
118
|
+
|
|
119
|
+
# "It is [worth/important] to note"
|
|
120
|
+
{ pattern: /\bit is (worth|important to|essential to|crucial to) not(?:e|ing) that\b/i, tier: 1, fix: "(remove — just state the fact)" },
|
|
121
|
+
{ pattern: /\bit should be noted that\b/i, tier: 1, fix: "(remove — just state the fact)" },
|
|
122
|
+
{ pattern: /\bit bears mentioning that\b/i, tier: 1, fix: "(remove — just state the fact)" },
|
|
123
|
+
|
|
124
|
+
# Journey metaphors
|
|
125
|
+
{ pattern: /\bpave the way (?:for|to)\b/i, tier: 1, fix: "enable / allow / lead to" },
|
|
126
|
+
{ pattern: /\bat the forefront of\b/i, tier: 1, fix: "leading / first in" },
|
|
127
|
+
{ pattern: /\bnavigate the (?:complexities|challenges|landscape)\b/i, tier: 1, fix: "handle / deal with / work through" },
|
|
128
|
+
{ pattern: /\bharness the (?:power|potential|capabilities) of\b/i, tier: 1, fix: "use" },
|
|
129
|
+
{ pattern: /\bembark on a journey\b/i, tier: 1, fix: "start / begin" },
|
|
130
|
+
{ pattern: /\bpush the boundaries\b/i, tier: 1, fix: "(be specific about what changed)" },
|
|
131
|
+
{ pattern: /\bfoster a (?:culture|environment|atmosphere|sense) of\b/i, tier: 1, fix: "build / create / encourage" },
|
|
132
|
+
{ pattern: /\bunlock the (?:potential|power|full|true)\b/i, tier: 1, fix: "enable / use / improve" },
|
|
133
|
+
{ pattern: /\bserves as a testament\b/i, tier: 1, fix: "shows / proves / demonstrates" },
|
|
134
|
+
{ pattern: /\bplays a (?:crucial|pivotal|vital|key|significant|important|critical) role\b/i, tier: 1, fix: "matters for / helps / is important to" },
|
|
135
|
+
{ pattern: /\bin the realm of\b/i, tier: 1, fix: "in" },
|
|
136
|
+
{ pattern: /\bdelve into\b/i, tier: 1, fix: "explore / examine / look at" },
|
|
137
|
+
{ pattern: /\bthe landscape of\b/i, tier: 1, fix: "(be specific — what part of the field?)" },
|
|
138
|
+
{ pattern: /\bnestled (?:in|within|among)\b/i, tier: 1, fix: "located in / in / near" },
|
|
139
|
+
|
|
140
|
+
# Abstract verb phrases
|
|
141
|
+
{ pattern: /\brise to the (?:occasion|challenge)\b/i, tier: 2, fix: "handle / face / tackle" },
|
|
142
|
+
{ pattern: /\bstand at the (?:crossroads|intersection)\b/i, tier: 2, fix: "(be specific about the choice)" },
|
|
143
|
+
{ pattern: /\bshape the (?:future|trajectory|direction)\b/i, tier: 2, fix: "(be specific about how)" },
|
|
144
|
+
{ pattern: /\btip of the iceberg\b/i, tier: 2, fix: "one example / a small part" },
|
|
145
|
+
{ pattern: /\bdouble-edged sword\b/i, tier: 2, fix: "has tradeoffs / cuts both ways" },
|
|
146
|
+
{ pattern: /\ba testament to\b/i, tier: 1, fix: "shows / proves" },
|
|
147
|
+
{ pattern: /\bthe dawn of\b/i, tier: 2, fix: "the start of / the beginning of" },
|
|
148
|
+
{ pattern: /\bthe fabric of\b/i, tier: 1, fix: "(be concrete)" },
|
|
149
|
+
{ pattern: /\bthe tapestry of\b/i, tier: 1, fix: "(be concrete)" },
|
|
150
|
+
|
|
151
|
+
# Hedging stacks
|
|
152
|
+
{ pattern: /\bcould potentially\b/i, tier: 1, fix: "could / might" },
|
|
153
|
+
{ pattern: /\bmight possibly\b/i, tier: 1, fix: "might" },
|
|
154
|
+
{ pattern: /\bcould possibly\b/i, tier: 1, fix: "could" },
|
|
155
|
+
{ pattern: /\bperhaps potentially\b/i, tier: 1, fix: "perhaps / maybe" },
|
|
156
|
+
{ pattern: /\bmay potentially\b/i, tier: 1, fix: "may" },
|
|
157
|
+
{ pattern: /\bcould conceivably\b/i, tier: 1, fix: "could" },
|
|
158
|
+
|
|
159
|
+
# Chatbot filler
|
|
160
|
+
{ pattern: /\bI hope this helps\b/i, tier: 1, fix: "(remove)" },
|
|
161
|
+
{ pattern: /\blet me know if (?:you|there)\b/i, tier: 1, fix: "(remove)" },
|
|
162
|
+
{ pattern: /\bwould you like me to\b/i, tier: 1, fix: "(remove)" },
|
|
163
|
+
{ pattern: /\bfeel free to\b/i, tier: 1, fix: "(remove)" },
|
|
164
|
+
{ pattern: /\bdon'?t hesitate to\b/i, tier: 1, fix: "(remove)" },
|
|
165
|
+
{ pattern: /\bhappy to help\b/i, tier: 1, fix: "(remove)" },
|
|
166
|
+
{ pattern: /\bhere is (?:a |an |the )?(?:comprehensive |brief |quick )?(?:overview|summary|breakdown|list|guide|explanation|look)\b/i, tier: 1, fix: "(remove — start with the content)" },
|
|
167
|
+
{ pattern: /\bI'?d be happy to\b/i, tier: 1, fix: "(remove)" },
|
|
168
|
+
{ pattern: /\bis there anything else\b/i, tier: 1, fix: "(remove)" },
|
|
169
|
+
|
|
170
|
+
# Sycophantic
|
|
171
|
+
{ pattern: /\bgreat question\b/i, tier: 1, fix: "(remove)" },
|
|
172
|
+
{ pattern: /\bexcellent (?:question|point|observation)\b/i, tier: 1, fix: "(remove)" },
|
|
173
|
+
{ pattern: /\bthat'?s a (?:great|excellent|wonderful|fantastic|good|insightful|thoughtful) (?:question|point|observation)\b/i, tier: 1, fix: "(remove)" },
|
|
174
|
+
{ pattern: /\byou'?re absolutely right\b/i, tier: 1, fix: "(remove or address the substance)" },
|
|
175
|
+
{ pattern: /\byou raise a (?:great|good|excellent|valid|important) point\b/i, tier: 1, fix: "(remove or address the substance)" },
|
|
176
|
+
|
|
177
|
+
# Cutoff disclaimers
|
|
178
|
+
{ pattern: /\bas of (?:my|this) (?:last|latest|most recent) (?:training|update|knowledge)\b/i, tier: 1, fix: "(remove)" },
|
|
179
|
+
{ pattern: /\bwhile (?:specific )?details are (?:limited|scarce|not available)\b/i, tier: 1, fix: "(remove — research it or omit the claim)" },
|
|
180
|
+
{ pattern: /\bbased on (?:available|my|current) (?:information|knowledge|understanding|data)\b/i, tier: 1, fix: "(remove)" },
|
|
181
|
+
{ pattern: /\bup to my (?:last )?training\b/i, tier: 1, fix: "(remove)" },
|
|
182
|
+
|
|
183
|
+
# Generic conclusions
|
|
184
|
+
{ pattern: /\bthe future (?:looks|is|remains) bright\b/i, tier: 1, fix: "(end with a specific fact or plan)" },
|
|
185
|
+
{ pattern: /\bexciting times (?:lie|lay|are) ahead\b/i, tier: 1, fix: "(end with a specific fact or plan)" },
|
|
186
|
+
{ pattern: /\bcontinue (?:this|their|our|the) journey\b/i, tier: 1, fix: "(be specific about what happens next)" },
|
|
187
|
+
{ pattern: /\bjourney towards? (?:excellence|success|greatness)\b/i, tier: 1, fix: "(be specific)" },
|
|
188
|
+
{ pattern: /\bstep in the right direction\b/i, tier: 1, fix: "(be specific about the outcome)" },
|
|
189
|
+
{ pattern: /\bonly time will tell\b/i, tier: 1, fix: "(end with what you actually know)" },
|
|
190
|
+
{ pattern: /\bthe possibilities are (?:endless|limitless|infinite)\b/i, tier: 1, fix: "(be specific about what's possible)" },
|
|
191
|
+
{ pattern: /\bpoised for (?:growth|success|greatness|expansion)\b/i, tier: 1, fix: "(cite evidence or remove)" },
|
|
192
|
+
{ pattern: /\bwatch this space\b/i, tier: 2, fix: "(end with something concrete)" },
|
|
193
|
+
{ pattern: /\bstay tuned\b/i, tier: 2, fix: "(end with something concrete)" },
|
|
194
|
+
{ pattern: /\bremains to be seen\b/i, tier: 2, fix: "(state what you do know)" },
|
|
195
|
+
|
|
196
|
+
# Formulaic filler
|
|
197
|
+
{ pattern: /\bin order to\b/i, tier: 2, fix: "to" },
|
|
198
|
+
{ pattern: /\bdue to the fact that\b/i, tier: 1, fix: "because" },
|
|
199
|
+
{ pattern: /\bat this point in time\b/i, tier: 1, fix: "now" },
|
|
200
|
+
{ pattern: /\bin the event that\b/i, tier: 1, fix: "if" },
|
|
201
|
+
{ pattern: /\bhas the ability to\b/i, tier: 1, fix: "can" },
|
|
202
|
+
{ pattern: /\bfor the purpose of\b/i, tier: 1, fix: "to / for" },
|
|
203
|
+
{ pattern: /\bin light of the fact that\b/i, tier: 1, fix: "because / since" },
|
|
204
|
+
{ pattern: /\bfirst and foremost\b/i, tier: 2, fix: "first" },
|
|
205
|
+
{ pattern: /\blast but not least\b/i, tier: 2, fix: "finally" },
|
|
206
|
+
{ pattern: /\bat the end of the day\b/i, tier: 2, fix: "(remove or be specific)" },
|
|
207
|
+
{ pattern: /\bwhen it comes to\b/i, tier: 2, fix: "for / regarding" },
|
|
208
|
+
{ pattern: /\bthe fact of the matter is\b/i, tier: 1, fix: "(remove — just state it)" },
|
|
209
|
+
{ pattern: /\bin terms of\b/i, tier: 3, fix: "for / about / regarding" },
|
|
210
|
+
{ pattern: /\bat its core\b/i, tier: 2, fix: "(remove or be specific)" },
|
|
211
|
+
{ pattern: /\bit goes without saying\b/i, tier: 2, fix: "(if it goes without saying, don't say it)" },
|
|
212
|
+
{ pattern: /\bneedless to say\b/i, tier: 2, fix: "(if needless to say, don't say it)" },
|
|
213
|
+
|
|
214
|
+
# v2.2 additions
|
|
215
|
+
{ pattern: /\blet'?s dive in\b/i, tier: 1, fix: "(just start)" },
|
|
216
|
+
{ pattern: /\blet'?s (?:break this|break it) down\b/i, tier: 1, fix: "(just explain)" },
|
|
217
|
+
{ pattern: /\bhere'?s the thing\b/i, tier: 2, fix: "(just say it)" },
|
|
218
|
+
{ pattern: /\bthe reality is\b/i, tier: 2, fix: "(state the fact)" },
|
|
219
|
+
{ pattern: /\bmoving forward\b/i, tier: 2, fix: "next / from now on" },
|
|
220
|
+
{ pattern: /\bcircle back\b/i, tier: 1, fix: "return to / revisit" },
|
|
221
|
+
{ pattern: /\btouch base\b/i, tier: 1, fix: "talk / check in" },
|
|
222
|
+
{ pattern: /\bgoing forward\b/i, tier: 2, fix: "from now on" },
|
|
223
|
+
{ pattern: /\bkey takeaways?\b/i, tier: 1, fix: "main point(s)" },
|
|
224
|
+
{ pattern: /\bvalue proposition\b/i, tier: 2, fix: "benefit / value" },
|
|
225
|
+
{ pattern: /\bcore competenc(?:y|ies)\b/i, tier: 2, fix: "strength(s)" },
|
|
226
|
+
{ pattern: /\bbest-in-class\b/i, tier: 1, fix: "excellent / (be specific)" },
|
|
227
|
+
{ pattern: /\bworld-class\b/i, tier: 1, fix: "(be specific)" },
|
|
228
|
+
{ pattern: /\bcutting-edge\b/i, tier: 1, fix: "(be specific)" },
|
|
229
|
+
{ pattern: /\bstate-of-the-art\b/i, tier: 1, fix: "(be specific or cite)" },
|
|
230
|
+
{ pattern: /\bgold standard\b/i, tier: 2, fix: "(cite the standard)" },
|
|
231
|
+
{ pattern: /\blow-hanging fruit\b/i, tier: 1, fix: "easy wins / quick wins" },
|
|
232
|
+
{ pattern: /\bpain points?\b/i, tier: 1, fix: "problem(s)" },
|
|
233
|
+
{ pattern: /\bdeep dive\b/i, tier: 1, fix: "detailed look / analysis" },
|
|
234
|
+
{ pattern: /\bparadigm shift\b/i, tier: 1, fix: "major change" },
|
|
235
|
+
{ pattern: /\bdouble-click (?:on)?\b/i, tier: 1, fix: "examine / look closer at" },
|
|
236
|
+
{ pattern: /\bloop (?:you |me |them )in\b/i, tier: 2, fix: "include / inform" },
|
|
237
|
+
{ pattern: /\btable this\b/i, tier: 2, fix: "postpone / set aside" },
|
|
238
|
+
{ pattern: /\bpivot to\b/i, tier: 2, fix: "switch to / change to" },
|
|
239
|
+
{ pattern: /\bsynch? (?:up )?(?:on|about)\b/i, tier: 2, fix: "discuss / align on" },
|
|
240
|
+
{ pattern: /\brun it up the flagpole\b/i, tier: 1, fix: "propose / suggest" },
|
|
241
|
+
{ pattern: /\bboil the ocean\b/i, tier: 1, fix: "attempt too much" },
|
|
242
|
+
{ pattern: /\bmove the needle\b/i, tier: 1, fix: "make progress / have impact" },
|
|
243
|
+
{ pattern: /\bopen the kimono\b/i, tier: 1, fix: "share / be transparent" },
|
|
244
|
+
{ pattern: /\bdrink the Kool-Aid\b/i, tier: 2, fix: "believe / accept" },
|
|
245
|
+
].freeze
|
|
246
|
+
|
|
247
|
+
# ── Function Words ─────────────────────────────────────
|
|
248
|
+
FUNCTION_WORDS = %w[
|
|
249
|
+
the be to of and a in that have I it for not on with
|
|
250
|
+
he as you do at this but his by from they we say her
|
|
251
|
+
she or an will my one all would there their what so
|
|
252
|
+
up out if about who get which go me when make can like
|
|
253
|
+
time no just him know take people into year your good
|
|
254
|
+
some could them see other than then now look only come
|
|
255
|
+
its over think also back after use two how our work
|
|
256
|
+
first well way even new want because any these give
|
|
257
|
+
day most us
|
|
258
|
+
].freeze
|
|
259
|
+
end
|
|
260
|
+
end
|
data/lib/humanizer.rb
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "set"
|
|
4
|
+
|
|
5
|
+
require_relative "humanizer/version"
|
|
6
|
+
require_relative "humanizer/text_utils"
|
|
7
|
+
require_relative "humanizer/vocabulary"
|
|
8
|
+
require_relative "humanizer/stats"
|
|
9
|
+
require_relative "humanizer/patterns"
|
|
10
|
+
require_relative "humanizer/analyzer"
|
|
11
|
+
require_relative "humanizer/humanizer_engine"
|
|
12
|
+
|
|
13
|
+
module Humanizer
|
|
14
|
+
# Quick score (0-100, higher = more AI-like)
|
|
15
|
+
def self.score(text)
|
|
16
|
+
Analyzer.score(text)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# Full analysis with details
|
|
20
|
+
def self.analyze(text, **opts)
|
|
21
|
+
Analyzer.analyze(text, **opts)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Humanization suggestions
|
|
25
|
+
def self.humanize(text, **opts)
|
|
26
|
+
HumanizerEngine.humanize(text, **opts)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Safe mechanical auto-fixes
|
|
30
|
+
def self.auto_fix(text)
|
|
31
|
+
HumanizerEngine.auto_fix(text)
|
|
32
|
+
end
|
|
33
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: humanizer-rb
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Christian Genco
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2026-03-16 00:00:00.000000000 Z
|
|
12
|
+
dependencies: []
|
|
13
|
+
description: Scores text 0-100 for AI writing patterns using 28 pattern detectors,
|
|
14
|
+
500+ vocabulary terms, and statistical text analysis. Ruby port of the humanizer
|
|
15
|
+
Node.js tool.
|
|
16
|
+
email:
|
|
17
|
+
- christian@gen.co
|
|
18
|
+
executables:
|
|
19
|
+
- humanizer
|
|
20
|
+
extensions: []
|
|
21
|
+
extra_rdoc_files: []
|
|
22
|
+
files:
|
|
23
|
+
- CHANGELOG.md
|
|
24
|
+
- LICENSE
|
|
25
|
+
- README.md
|
|
26
|
+
- bin/humanizer
|
|
27
|
+
- lib/humanizer.rb
|
|
28
|
+
- lib/humanizer/analyzer.rb
|
|
29
|
+
- lib/humanizer/humanizer_engine.rb
|
|
30
|
+
- lib/humanizer/patterns.rb
|
|
31
|
+
- lib/humanizer/stats.rb
|
|
32
|
+
- lib/humanizer/text_utils.rb
|
|
33
|
+
- lib/humanizer/version.rb
|
|
34
|
+
- lib/humanizer/vocabulary.rb
|
|
35
|
+
homepage: https://github.com/christiangenco/humanizer-rb
|
|
36
|
+
licenses:
|
|
37
|
+
- MIT
|
|
38
|
+
metadata:
|
|
39
|
+
homepage_uri: https://github.com/christiangenco/humanizer-rb
|
|
40
|
+
source_code_uri: https://github.com/christiangenco/humanizer-rb
|
|
41
|
+
changelog_uri: https://github.com/christiangenco/humanizer-rb/blob/main/CHANGELOG.md
|
|
42
|
+
post_install_message:
|
|
43
|
+
rdoc_options: []
|
|
44
|
+
require_paths:
|
|
45
|
+
- lib
|
|
46
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
47
|
+
requirements:
|
|
48
|
+
- - ">="
|
|
49
|
+
- !ruby/object:Gem::Version
|
|
50
|
+
version: '3.0'
|
|
51
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
52
|
+
requirements:
|
|
53
|
+
- - ">="
|
|
54
|
+
- !ruby/object:Gem::Version
|
|
55
|
+
version: '0'
|
|
56
|
+
requirements: []
|
|
57
|
+
rubygems_version: 3.5.22
|
|
58
|
+
signing_key:
|
|
59
|
+
specification_version: 4
|
|
60
|
+
summary: Detect AI-generated writing patterns
|
|
61
|
+
test_files: []
|