textstat 0.1.4 → 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/counter.rb +0 -0
- data/lib/dictionaries/ca.txt +0 -0
- data/lib/dictionaries/cs.txt +0 -0
- data/lib/dictionaries/nl.txt +0 -0
- data/lib/textstat/version.rb +1 -1
- data/lib/textstat.rb +79 -73
- data/spec/textstat_spec.rb +21 -6
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 61ef6dcf0e938af4c3c30ad4b45c3241f10ffc01e4d63e566a74d993b57309ac
|
4
|
+
data.tar.gz: 20f6412df8d5a8658d4113ddb48e808697b7e1489ba5acb4fae9d97346393acd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ef7dee598e3db4c26e2e305f464fbebf6b3757429a72ae66b887ab7f068a34a0d2b6c2e5c52771b2c342e8c06d2ec9a69fa1d60065eaa37b3ccf76eb232f647d
|
7
|
+
data.tar.gz: 5ca1f7c6dcb11a81457b87339ab153ac95125542378875fd451c9ba58659925fc018d38af4bbc61149cb5f8f53765f67363029d8eb698ad80effa969d7ecbbeb
|
data/lib/counter.rb
CHANGED
File without changes
|
data/lib/dictionaries/ca.txt
CHANGED
File without changes
|
data/lib/dictionaries/cs.txt
CHANGED
File without changes
|
data/lib/dictionaries/nl.txt
CHANGED
File without changes
|
data/lib/textstat/version.rb
CHANGED
data/lib/textstat.rb
CHANGED
@@ -23,7 +23,7 @@ class TextStat
|
|
23
23
|
count = 0
|
24
24
|
text.split(' ').each do |word|
|
25
25
|
word_hyphenated = dictionary.visualise(word)
|
26
|
-
count +=
|
26
|
+
count += word_hyphenated.count('-') + 1
|
27
27
|
end
|
28
28
|
count
|
29
29
|
end
|
@@ -33,109 +33,105 @@ class TextStat
|
|
33
33
|
end
|
34
34
|
|
35
35
|
def self.avg_sentence_length(text)
|
36
|
-
asl = lexicon_count(text).to_f / sentence_count(text)
|
36
|
+
asl = lexicon_count(text).to_f / sentence_count(text)
|
37
37
|
asl.round(1)
|
38
38
|
rescue ZeroDivisionError
|
39
39
|
0.0
|
40
40
|
end
|
41
41
|
|
42
|
-
def self.avg_syllables_per_word(text)
|
43
|
-
syllable = syllable_count(text)
|
42
|
+
def self.avg_syllables_per_word(text, language = 'en_us')
|
43
|
+
syllable = syllable_count(text, language)
|
44
44
|
words = lexicon_count(text)
|
45
45
|
begin
|
46
|
-
syllables_per_word = syllable.to_f / words
|
47
|
-
|
46
|
+
syllables_per_word = syllable.to_f / words
|
47
|
+
syllables_per_word.round(1)
|
48
48
|
rescue ZeroDivisionError
|
49
|
-
|
49
|
+
0.0
|
50
50
|
end
|
51
51
|
end
|
52
52
|
|
53
53
|
def self.avg_letter_per_word(text)
|
54
|
-
letters_per_word = char_count(text).to_f / lexicon_count(text)
|
54
|
+
letters_per_word = char_count(text).to_f / lexicon_count(text)
|
55
55
|
letters_per_word.round(2)
|
56
56
|
rescue ZeroDivisionError
|
57
57
|
0.0
|
58
58
|
end
|
59
59
|
|
60
60
|
def self.avg_sentence_per_word(text)
|
61
|
-
sentence_per_word = sentence_count(text).to_f / lexicon_count(text)
|
61
|
+
sentence_per_word = sentence_count(text).to_f / lexicon_count(text)
|
62
62
|
sentence_per_word.round(2)
|
63
63
|
rescue ZeroDivisionError
|
64
64
|
0.0
|
65
65
|
end
|
66
66
|
|
67
|
-
def self.flesch_reading_ease(text)
|
67
|
+
def self.flesch_reading_ease(text, language = 'en_us')
|
68
68
|
sentence_length = avg_sentence_length(text)
|
69
|
-
syllables_per_word = avg_syllables_per_word(text)
|
70
|
-
flesch =
|
71
|
-
206.835 - (1.015 * sentence_length).to_f - (84.6 * syllables_per_word).to_f
|
72
|
-
)
|
69
|
+
syllables_per_word = avg_syllables_per_word(text, language)
|
70
|
+
flesch = 206.835 - 1.015 * sentence_length - 84.6 * syllables_per_word
|
73
71
|
flesch.round(2)
|
74
72
|
end
|
75
73
|
|
76
|
-
def self.flesch_kincaid_grade(text)
|
74
|
+
def self.flesch_kincaid_grade(text, language = 'en_us')
|
77
75
|
sentence_length = avg_sentence_length(text)
|
78
|
-
syllables_per_word = avg_syllables_per_word(text)
|
79
|
-
flesch =
|
76
|
+
syllables_per_word = avg_syllables_per_word(text, language)
|
77
|
+
flesch = 0.39 * sentence_length + 11.8 * syllables_per_word - 15.59
|
80
78
|
flesch.round(1)
|
81
79
|
end
|
82
80
|
|
83
|
-
def self.polysyllab_count(text)
|
81
|
+
def self.polysyllab_count(text, language = 'en_us')
|
84
82
|
count = 0
|
85
83
|
text.split(' ').each do |word|
|
86
|
-
w = syllable_count(word)
|
84
|
+
w = syllable_count(word, language)
|
87
85
|
count += 1 if w >= 3
|
88
86
|
end
|
89
87
|
count
|
90
88
|
end
|
91
89
|
|
92
|
-
def self.smog_index(text)
|
90
|
+
def self.smog_index(text, language = 'en_us')
|
93
91
|
sentences = sentence_count(text)
|
94
92
|
|
95
93
|
if sentences >= 3
|
96
94
|
begin
|
97
|
-
polysyllab = polysyllab_count(text)
|
98
|
-
smog = (
|
99
|
-
(1
|
100
|
-
return smog.round(1)
|
95
|
+
polysyllab = polysyllab_count(text, language)
|
96
|
+
smog = 1.043 * Math.sqrt(30.0 * polysyllab / sentences) + 3.1291
|
97
|
+
smog.round(1)
|
101
98
|
rescue ZeroDivisionError
|
102
|
-
|
99
|
+
0.0
|
103
100
|
end
|
104
101
|
else
|
105
|
-
|
102
|
+
0.0
|
106
103
|
end
|
107
104
|
end
|
108
105
|
|
109
106
|
def self.coleman_liau_index(text)
|
110
|
-
letters
|
107
|
+
letters = (avg_letter_per_word(text) * 100).round(2)
|
111
108
|
sentences = (avg_sentence_per_word(text) * 100).round(2)
|
112
|
-
coleman
|
109
|
+
coleman = 0.0588 * letters - 0.296 * sentences - 15.8
|
113
110
|
coleman.round(2)
|
114
111
|
end
|
115
112
|
|
116
113
|
def self.automated_readability_index(text)
|
117
|
-
chars
|
118
|
-
words
|
114
|
+
chars = char_count(text)
|
115
|
+
words = lexicon_count(text)
|
119
116
|
sentences = sentence_count(text)
|
120
117
|
begin
|
121
|
-
a = chars.to_f / words
|
122
|
-
b = words.to_f / sentences
|
118
|
+
a = chars.to_f / words
|
119
|
+
b = words.to_f / sentences
|
123
120
|
|
124
|
-
readability =
|
125
|
-
|
126
|
-
return readability.round(1)
|
121
|
+
readability = 4.71 * a + 0.5 * b - 21.43
|
122
|
+
readability.round(1)
|
127
123
|
rescue ZeroDivisionError
|
128
|
-
|
124
|
+
0.0
|
129
125
|
end
|
130
126
|
end
|
131
127
|
|
132
|
-
def self.linsear_write_formula(text)
|
128
|
+
def self.linsear_write_formula(text, language = 'en_us')
|
133
129
|
easy_word = 0
|
134
130
|
difficult_word = 0
|
135
131
|
text_list = text.split(' ')[0..100]
|
136
132
|
|
137
133
|
text_list.each do |word|
|
138
|
-
if syllable_count(word) < 3
|
134
|
+
if syllable_count(word, language) < 3
|
139
135
|
easy_word += 1
|
140
136
|
else
|
141
137
|
difficult_word += 1
|
@@ -144,11 +140,9 @@ class TextStat
|
|
144
140
|
|
145
141
|
text = text_list.join(' ')
|
146
142
|
|
147
|
-
number = (
|
148
|
-
if number <= 20
|
149
|
-
|
150
|
-
end
|
151
|
-
return number / 2
|
143
|
+
number = (easy_word * 1 + difficult_word * 3).to_f / sentence_count(text)
|
144
|
+
number -= 2 if number <= 20
|
145
|
+
number / 2
|
152
146
|
end
|
153
147
|
|
154
148
|
def self.difficult_words(text, language = 'en_us')
|
@@ -161,58 +155,70 @@ class TextStat
|
|
161
155
|
text_list = text.downcase.gsub(/[^0-9a-z ]/i, '').split(' ')
|
162
156
|
diff_words_set = Set.new
|
163
157
|
text_list.each do |value|
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
end
|
168
|
-
end
|
158
|
+
next if easy_words.include? value
|
159
|
+
|
160
|
+
diff_words_set.add(value) if syllable_count(value, language) > 1
|
169
161
|
end
|
170
|
-
|
162
|
+
diff_words_set.length
|
171
163
|
end
|
172
164
|
|
173
|
-
def self.dale_chall_readability_score(text)
|
165
|
+
def self.dale_chall_readability_score(text, language = 'en_us')
|
174
166
|
word_count = lexicon_count(text)
|
175
|
-
count = word_count - difficult_words(text)
|
167
|
+
count = word_count - difficult_words(text, language)
|
176
168
|
|
177
169
|
begin
|
178
|
-
per =
|
170
|
+
per = 100.0 * count / word_count
|
179
171
|
rescue ZeroDivisionError
|
180
172
|
return 0.0
|
181
173
|
end
|
182
174
|
|
183
175
|
difficult_words = 100 - per
|
184
|
-
score = (
|
185
|
-
|
186
|
-
+ (0.0496 * avg_sentence_length(text)))
|
176
|
+
score = 0.1579 * difficult_words + 0.0496 * avg_sentence_length(text)
|
177
|
+
score += 3.6365 if difficult_words > 5
|
187
178
|
|
188
|
-
|
189
|
-
score += 3.6365
|
190
|
-
end
|
191
|
-
return score.round(2)
|
179
|
+
score.round(2)
|
192
180
|
end
|
193
181
|
|
194
|
-
def self.gunning_fog(text)
|
195
|
-
|
196
|
-
|
197
|
-
(difficult_words(text) / lexicon_count(text) * 100) + 5)
|
182
|
+
def self.gunning_fog(text, language = 'en_us')
|
183
|
+
per_diff_words = 100.0 * difficult_words(text, language) / lexicon_count(text) + 5
|
184
|
+
grade = 0.4 * (avg_sentence_length(text) + per_diff_words)
|
198
185
|
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
return 0.0
|
203
|
-
end
|
186
|
+
grade.round(2)
|
187
|
+
rescue ZeroDivisionError
|
188
|
+
0.0
|
204
189
|
end
|
205
190
|
|
206
191
|
def self.lix(text)
|
207
192
|
words = text.split(' ')
|
208
193
|
words_length = words.length
|
209
|
-
long_words = words.
|
194
|
+
long_words = words.count { |word| word.length > 6 }
|
210
195
|
|
211
|
-
per_long_words =
|
196
|
+
per_long_words = 100.0 * long_words / words_length
|
212
197
|
asl = avg_sentence_length(text)
|
213
198
|
lix = asl + per_long_words
|
214
199
|
|
215
|
-
|
200
|
+
lix.round(2)
|
201
|
+
end
|
202
|
+
|
203
|
+
def self.forcast(text, language = 'en_us')
|
204
|
+
words = text.split(' ')[0..149]
|
205
|
+
words_with_one_syllabe = words.count {
|
206
|
+
|word| syllable_count(word, language) == 1
|
207
|
+
}
|
208
|
+
forcast = 20 - (words_with_one_syllabe / 10)
|
209
|
+
forcast
|
210
|
+
end
|
211
|
+
|
212
|
+
def self.powers_sumner_kearl(text, language = 'en_us')
|
213
|
+
grade = 0.0778 * avg_sentence_length(text) + 0.0455 * syllable_count(text, language) - 2.2029
|
214
|
+
grade.round(2)
|
215
|
+
end
|
216
|
+
|
217
|
+
def self.spache(text, language = 'en_us')
|
218
|
+
words = text.split(' ').count
|
219
|
+
unfamiliar_words = difficult_words(text, language) / words
|
220
|
+
grade = (0.141 * avg_sentence_length(text)) + (0.086 * unfamiliar_words) + 0.839
|
221
|
+
grade.round(2)
|
216
222
|
end
|
217
223
|
|
218
224
|
def self.text_standard(text, float_output=nil)
|
@@ -287,9 +293,9 @@ class TextStat
|
|
287
293
|
score = final_grade[0][0]
|
288
294
|
|
289
295
|
if float_output
|
290
|
-
|
296
|
+
score.to_f
|
291
297
|
else
|
292
|
-
|
298
|
+
"#{score.to_i - 1}th and #{score.to_i}th grade"
|
293
299
|
end
|
294
300
|
end
|
295
301
|
|
data/spec/textstat_spec.rb
CHANGED
@@ -116,22 +116,22 @@ describe TextStat do
|
|
116
116
|
|
117
117
|
it 'should return the correct smog index' do
|
118
118
|
index = TextStat.smog_index(@long_test)
|
119
|
-
expect(index).to eql
|
119
|
+
expect(index).to eql 12.5
|
120
120
|
end
|
121
121
|
|
122
122
|
it 'should return the correct Coleman–Liau index' do
|
123
123
|
index = TextStat.coleman_liau_index(@long_test)
|
124
|
-
expect(index).to eql 10.
|
124
|
+
expect(index).to eql 10.65
|
125
125
|
end
|
126
126
|
|
127
127
|
it 'should return the correct automated readability index' do
|
128
128
|
index = TextStat.automated_readability_index(@long_test)
|
129
|
-
expect(index).to eql 12.
|
129
|
+
expect(index).to eql 12.4
|
130
130
|
end
|
131
131
|
|
132
132
|
it 'should return the correct linsear write formula result' do
|
133
133
|
result = TextStat.linsear_write_formula(@long_test)
|
134
|
-
expect(result).to eql 14.
|
134
|
+
expect(result).to eql 14.875
|
135
135
|
end
|
136
136
|
|
137
137
|
it 'should return the correct difficult words result' do
|
@@ -141,12 +141,12 @@ describe TextStat do
|
|
141
141
|
|
142
142
|
it 'should return the correct Dale–Chall readability score' do
|
143
143
|
score = TextStat.dale_chall_readability_score(@long_test)
|
144
|
-
expect(score).to eql
|
144
|
+
expect(score).to eql 7.25
|
145
145
|
end
|
146
146
|
|
147
147
|
it 'should return the correct Gunning fog score' do
|
148
148
|
score = TextStat.gunning_fog(@long_test)
|
149
|
-
expect(score).to eql
|
149
|
+
expect(score).to eql 17.56
|
150
150
|
end
|
151
151
|
|
152
152
|
it 'should return the correct Lix readability test score' do
|
@@ -154,6 +154,21 @@ describe TextStat do
|
|
154
154
|
expect(score).to eql 45.11
|
155
155
|
end
|
156
156
|
|
157
|
+
it 'should return the correct FORCAST readability test score' do
|
158
|
+
score = TextStat.forcast(@long_test)
|
159
|
+
expect(score).to eql 10
|
160
|
+
end
|
161
|
+
|
162
|
+
it 'should return the correct Powers Sumner Kearl readability test score' do
|
163
|
+
score = TextStat.powers_sumner_kearl(@long_test)
|
164
|
+
expect(score).to eql 25.04
|
165
|
+
end
|
166
|
+
|
167
|
+
it 'should return the correct SPACHE readability test score' do
|
168
|
+
score = TextStat.spache(@long_test)
|
169
|
+
expect(score).to eql 4.12
|
170
|
+
end
|
171
|
+
|
157
172
|
it 'should return the readability consensus score' do
|
158
173
|
standard = TextStat.text_standard(@long_test)
|
159
174
|
expect(standard).to eql '10th and 11th grade'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: textstat
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jakub Polak
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-05-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: text-hyphen
|
@@ -50,14 +50,14 @@ dependencies:
|
|
50
50
|
requirements:
|
51
51
|
- - "~>"
|
52
52
|
- !ruby/object:Gem::Version
|
53
|
-
version: '
|
53
|
+
version: '13.0'
|
54
54
|
type: :development
|
55
55
|
prerelease: false
|
56
56
|
version_requirements: !ruby/object:Gem::Requirement
|
57
57
|
requirements:
|
58
58
|
- - "~>"
|
59
59
|
- !ruby/object:Gem::Version
|
60
|
-
version: '
|
60
|
+
version: '13.0'
|
61
61
|
- !ruby/object:Gem::Dependency
|
62
62
|
name: rspec
|
63
63
|
requirement: !ruby/object:Gem::Requirement
|
@@ -108,7 +108,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
108
108
|
- !ruby/object:Gem::Version
|
109
109
|
version: '0'
|
110
110
|
requirements: []
|
111
|
-
rubygems_version: 3.
|
111
|
+
rubygems_version: 3.2.17
|
112
112
|
signing_key:
|
113
113
|
specification_version: 4
|
114
114
|
summary: Ruby gem to calculate readability statistics of a text object - paragraphs,
|