textstat 0.1.4 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8d18721b77b630a224c0515f9618f9c1cf27192a9723c8b5eaad8f3e84ac4e7b
4
- data.tar.gz: 237f4f45b6eb32944bcc91542ec8eb2d16c81165855d6db51c287ccdb93b37fd
3
+ metadata.gz: 61ef6dcf0e938af4c3c30ad4b45c3241f10ffc01e4d63e566a74d993b57309ac
4
+ data.tar.gz: 20f6412df8d5a8658d4113ddb48e808697b7e1489ba5acb4fae9d97346393acd
5
5
  SHA512:
6
- metadata.gz: b8ec1aeb8e50394712caf603f947fff09924cc2beb133c26e3b38530a0b338e14dcf3587136746e8d060502ee8ba01f3ba921222ccfabe99c14dadc5b312e773
7
- data.tar.gz: 96e23a027c2fded6b414551ed4d9dabc4411f53bd6e9624770d38e49b30aaaac3ffe1f8e28189781b441cd19cb238455e48e5bb351f2aa34906ae5ee7abbcd4d
6
+ metadata.gz: ef7dee598e3db4c26e2e305f464fbebf6b3757429a72ae66b887ab7f068a34a0d2b6c2e5c52771b2c342e8c06d2ec9a69fa1d60065eaa37b3ccf76eb232f647d
7
+ data.tar.gz: 5ca1f7c6dcb11a81457b87339ab153ac95125542378875fd451c9ba58659925fc018d38af4bbc61149cb5f8f53765f67363029d8eb698ad80effa969d7ecbbeb
data/lib/counter.rb CHANGED
File without changes
File without changes
File without changes
File without changes
@@ -1,3 +1,3 @@
1
1
  class TextStat
2
- VERSION = "0.1.4"
2
+ VERSION = "0.1.8"
3
3
  end
data/lib/textstat.rb CHANGED
@@ -23,7 +23,7 @@ class TextStat
23
23
  count = 0
24
24
  text.split(' ').each do |word|
25
25
  word_hyphenated = dictionary.visualise(word)
26
- count += [1, word_hyphenated.count('-') + 1].max
26
+ count += word_hyphenated.count('-') + 1
27
27
  end
28
28
  count
29
29
  end
@@ -33,109 +33,105 @@ class TextStat
33
33
  end
34
34
 
35
35
  def self.avg_sentence_length(text)
36
- asl = lexicon_count(text).to_f / sentence_count(text).to_f
36
+ asl = lexicon_count(text).to_f / sentence_count(text)
37
37
  asl.round(1)
38
38
  rescue ZeroDivisionError
39
39
  0.0
40
40
  end
41
41
 
42
- def self.avg_syllables_per_word(text)
43
- syllable = syllable_count(text)
42
+ def self.avg_syllables_per_word(text, language = 'en_us')
43
+ syllable = syllable_count(text, language)
44
44
  words = lexicon_count(text)
45
45
  begin
46
- syllables_per_word = syllable.to_f / words.to_f
47
- return syllables_per_word.round(1)
46
+ syllables_per_word = syllable.to_f / words
47
+ syllables_per_word.round(1)
48
48
  rescue ZeroDivisionError
49
- return 0.0
49
+ 0.0
50
50
  end
51
51
  end
52
52
 
53
53
  def self.avg_letter_per_word(text)
54
- letters_per_word = char_count(text).to_f / lexicon_count(text).to_f
54
+ letters_per_word = char_count(text).to_f / lexicon_count(text)
55
55
  letters_per_word.round(2)
56
56
  rescue ZeroDivisionError
57
57
  0.0
58
58
  end
59
59
 
60
60
  def self.avg_sentence_per_word(text)
61
- sentence_per_word = sentence_count(text).to_f / lexicon_count(text).to_f
61
+ sentence_per_word = sentence_count(text).to_f / lexicon_count(text)
62
62
  sentence_per_word.round(2)
63
63
  rescue ZeroDivisionError
64
64
  0.0
65
65
  end
66
66
 
67
- def self.flesch_reading_ease(text)
67
+ def self.flesch_reading_ease(text, language = 'en_us')
68
68
  sentence_length = avg_sentence_length(text)
69
- syllables_per_word = avg_syllables_per_word(text)
70
- flesch = (
71
- 206.835 - (1.015 * sentence_length).to_f - (84.6 * syllables_per_word).to_f
72
- )
69
+ syllables_per_word = avg_syllables_per_word(text, language)
70
+ flesch = 206.835 - 1.015 * sentence_length - 84.6 * syllables_per_word
73
71
  flesch.round(2)
74
72
  end
75
73
 
76
- def self.flesch_kincaid_grade(text)
74
+ def self.flesch_kincaid_grade(text, language = 'en_us')
77
75
  sentence_length = avg_sentence_length(text)
78
- syllables_per_word = avg_syllables_per_word(text)
79
- flesch = (0.39 * sentence_length.to_f) + (11.8 * syllables_per_word.to_f) - 15.59
76
+ syllables_per_word = avg_syllables_per_word(text, language)
77
+ flesch = 0.39 * sentence_length + 11.8 * syllables_per_word - 15.59
80
78
  flesch.round(1)
81
79
  end
82
80
 
83
- def self.polysyllab_count(text)
81
+ def self.polysyllab_count(text, language = 'en_us')
84
82
  count = 0
85
83
  text.split(' ').each do |word|
86
- w = syllable_count(word)
84
+ w = syllable_count(word, language)
87
85
  count += 1 if w >= 3
88
86
  end
89
87
  count
90
88
  end
91
89
 
92
- def self.smog_index(text)
90
+ def self.smog_index(text, language = 'en_us')
93
91
  sentences = sentence_count(text)
94
92
 
95
93
  if sentences >= 3
96
94
  begin
97
- polysyllab = polysyllab_count(text)
98
- smog = (
99
- (1.043 * (30 * (polysyllab / sentences))**0.5) + 3.1291)
100
- return smog.round(1)
95
+ polysyllab = polysyllab_count(text, language)
96
+ smog = 1.043 * Math.sqrt(30.0 * polysyllab / sentences) + 3.1291
97
+ smog.round(1)
101
98
  rescue ZeroDivisionError
102
- return 0.0
99
+ 0.0
103
100
  end
104
101
  else
105
- return 0.0
102
+ 0.0
106
103
  end
107
104
  end
108
105
 
109
106
  def self.coleman_liau_index(text)
110
- letters = (avg_letter_per_word(text) * 100).round(2)
107
+ letters = (avg_letter_per_word(text) * 100).round(2)
111
108
  sentences = (avg_sentence_per_word(text) * 100).round(2)
112
- coleman = ((0.058 * letters) - (0.296 * sentences) - 15.8).to_f
109
+ coleman = 0.0588 * letters - 0.296 * sentences - 15.8
113
110
  coleman.round(2)
114
111
  end
115
112
 
116
113
  def self.automated_readability_index(text)
117
- chars = char_count(text)
118
- words = lexicon_count(text)
114
+ chars = char_count(text)
115
+ words = lexicon_count(text)
119
116
  sentences = sentence_count(text)
120
117
  begin
121
- a = chars.to_f / words.to_f
122
- b = words.to_f / sentences.to_f
118
+ a = chars.to_f / words
119
+ b = words.to_f / sentences
123
120
 
124
- readability = (
125
- (4.71 * a.round(2) + (0.5 * b.round(2))) - 21.43)
126
- return readability.round(1)
121
+ readability = 4.71 * a + 0.5 * b - 21.43
122
+ readability.round(1)
127
123
  rescue ZeroDivisionError
128
- return 0.0
124
+ 0.0
129
125
  end
130
126
  end
131
127
 
132
- def self.linsear_write_formula(text)
128
+ def self.linsear_write_formula(text, language = 'en_us')
133
129
  easy_word = 0
134
130
  difficult_word = 0
135
131
  text_list = text.split(' ')[0..100]
136
132
 
137
133
  text_list.each do |word|
138
- if syllable_count(word) < 3
134
+ if syllable_count(word, language) < 3
139
135
  easy_word += 1
140
136
  else
141
137
  difficult_word += 1
@@ -144,11 +140,9 @@ class TextStat
144
140
 
145
141
  text = text_list.join(' ')
146
142
 
147
- number = ((easy_word * 1 + difficult_word * 3) / sentence_count(text)).to_f
148
- if number <= 20
149
- number -= 2
150
- end
151
- return number / 2
143
+ number = (easy_word * 1 + difficult_word * 3).to_f / sentence_count(text)
144
+ number -= 2 if number <= 20
145
+ number / 2
152
146
  end
153
147
 
154
148
  def self.difficult_words(text, language = 'en_us')
@@ -161,58 +155,70 @@ class TextStat
161
155
  text_list = text.downcase.gsub(/[^0-9a-z ]/i, '').split(' ')
162
156
  diff_words_set = Set.new
163
157
  text_list.each do |value|
164
- unless easy_words.include? value
165
- if syllable_count(value) > 1
166
- diff_words_set.add(value)
167
- end
168
- end
158
+ next if easy_words.include? value
159
+
160
+ diff_words_set.add(value) if syllable_count(value, language) > 1
169
161
  end
170
- return diff_words_set.length
162
+ diff_words_set.length
171
163
  end
172
164
 
173
- def self.dale_chall_readability_score(text)
165
+ def self.dale_chall_readability_score(text, language = 'en_us')
174
166
  word_count = lexicon_count(text)
175
- count = word_count - difficult_words(text)
167
+ count = word_count - difficult_words(text, language)
176
168
 
177
169
  begin
178
- per = count.to_f / word_count.to_f * 100
170
+ per = 100.0 * count / word_count
179
171
  rescue ZeroDivisionError
180
172
  return 0.0
181
173
  end
182
174
 
183
175
  difficult_words = 100 - per
184
- score = (
185
- (0.1579 * difficult_words)
186
- + (0.0496 * avg_sentence_length(text)))
176
+ score = 0.1579 * difficult_words + 0.0496 * avg_sentence_length(text)
177
+ score += 3.6365 if difficult_words > 5
187
178
 
188
- if difficult_words > 5
189
- score += 3.6365
190
- end
191
- return score.round(2)
179
+ score.round(2)
192
180
  end
193
181
 
194
- def self.gunning_fog(text)
195
- begin
196
- per_diff_words = (
197
- (difficult_words(text) / lexicon_count(text) * 100) + 5)
182
+ def self.gunning_fog(text, language = 'en_us')
183
+ per_diff_words = 100.0 * difficult_words(text, language) / lexicon_count(text) + 5
184
+ grade = 0.4 * (avg_sentence_length(text) + per_diff_words)
198
185
 
199
- grade = 0.4 * (avg_sentence_length(text) + per_diff_words)
200
- return grade.round(2)
201
- rescue ZeroDivisionError
202
- return 0.0
203
- end
186
+ grade.round(2)
187
+ rescue ZeroDivisionError
188
+ 0.0
204
189
  end
205
190
 
206
191
  def self.lix(text)
207
192
  words = text.split(' ')
208
193
  words_length = words.length
209
- long_words = words.select { |word| word.length > 6 }.count
194
+ long_words = words.count { |word| word.length > 6 }
210
195
 
211
- per_long_words = (long_words * 100).to_f / words_length
196
+ per_long_words = 100.0 * long_words / words_length
212
197
  asl = avg_sentence_length(text)
213
198
  lix = asl + per_long_words
214
199
 
215
- return lix.round(2)
200
+ lix.round(2)
201
+ end
202
+
203
+ def self.forcast(text, language = 'en_us')
204
+ words = text.split(' ')[0..149]
205
+ words_with_one_syllabe = words.count {
206
+ |word| syllable_count(word, language) == 1
207
+ }
208
+ forcast = 20 - (words_with_one_syllabe / 10)
209
+ forcast
210
+ end
211
+
212
+ def self.powers_sumner_kearl(text, language = 'en_us')
213
+ grade = 0.0778 * avg_sentence_length(text) + 0.0455 * syllable_count(text, language) - 2.2029
214
+ grade.round(2)
215
+ end
216
+
217
+ def self.spache(text, language = 'en_us')
218
+ words = text.split(' ').count
219
+ unfamiliar_words = difficult_words(text, language) / words
220
+ grade = (0.141 * avg_sentence_length(text)) + (0.086 * unfamiliar_words) + 0.839
221
+ grade.round(2)
216
222
  end
217
223
 
218
224
  def self.text_standard(text, float_output=nil)
@@ -287,9 +293,9 @@ class TextStat
287
293
  score = final_grade[0][0]
288
294
 
289
295
  if float_output
290
- return score.to_f
296
+ score.to_f
291
297
  else
292
- return "#{score.to_i - 1}th and #{score.to_i}th grade"
298
+ "#{score.to_i - 1}th and #{score.to_i}th grade"
293
299
  end
294
300
  end
295
301
 
@@ -116,22 +116,22 @@ describe TextStat do
116
116
 
117
117
  it 'should return the correct smog index' do
118
118
  index = TextStat.smog_index(@long_test)
119
- expect(index).to eql 11.2
119
+ expect(index).to eql 12.5
120
120
  end
121
121
 
122
122
  it 'should return the correct Coleman–Liau index' do
123
123
  index = TextStat.coleman_liau_index(@long_test)
124
- expect(index).to eql 10.28
124
+ expect(index).to eql 10.65
125
125
  end
126
126
 
127
127
  it 'should return the correct automated readability index' do
128
128
  index = TextStat.automated_readability_index(@long_test)
129
- expect(index).to eql 12.3
129
+ expect(index).to eql 12.4
130
130
  end
131
131
 
132
132
  it 'should return the correct linsear write formula result' do
133
133
  result = TextStat.linsear_write_formula(@long_test)
134
- expect(result).to eql 14.5
134
+ expect(result).to eql 14.875
135
135
  end
136
136
 
137
137
  it 'should return the correct difficult words result' do
@@ -141,12 +141,12 @@ describe TextStat do
141
141
 
142
142
  it 'should return the correct Dale–Chall readability score' do
143
143
  score = TextStat.dale_chall_readability_score(@long_test)
144
- expect(score).to eql 4.79
144
+ expect(score).to eql 7.25
145
145
  end
146
146
 
147
147
  it 'should return the correct Gunning fog score' do
148
148
  score = TextStat.gunning_fog(@long_test)
149
- expect(score).to eql 11.32
149
+ expect(score).to eql 17.56
150
150
  end
151
151
 
152
152
  it 'should return the correct Lix readability test score' do
@@ -154,6 +154,21 @@ describe TextStat do
154
154
  expect(score).to eql 45.11
155
155
  end
156
156
 
157
+ it 'should return the correct FORCAST readability test score' do
158
+ score = TextStat.forcast(@long_test)
159
+ expect(score).to eql 10
160
+ end
161
+
162
+ it 'should return the correct Powers Sumner Kearl readability test score' do
163
+ score = TextStat.powers_sumner_kearl(@long_test)
164
+ expect(score).to eql 25.04
165
+ end
166
+
167
+ it 'should return the correct SPACHE readability test score' do
168
+ score = TextStat.spache(@long_test)
169
+ expect(score).to eql 4.12
170
+ end
171
+
157
172
  it 'should return the readability consensus score' do
158
173
  standard = TextStat.text_standard(@long_test)
159
174
  expect(standard).to eql '10th and 11th grade'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textstat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jakub Polak
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-02-11 00:00:00.000000000 Z
11
+ date: 2022-05-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: text-hyphen
@@ -50,14 +50,14 @@ dependencies:
50
50
  requirements:
51
51
  - - "~>"
52
52
  - !ruby/object:Gem::Version
53
- version: '10.0'
53
+ version: '13.0'
54
54
  type: :development
55
55
  prerelease: false
56
56
  version_requirements: !ruby/object:Gem::Requirement
57
57
  requirements:
58
58
  - - "~>"
59
59
  - !ruby/object:Gem::Version
60
- version: '10.0'
60
+ version: '13.0'
61
61
  - !ruby/object:Gem::Dependency
62
62
  name: rspec
63
63
  requirement: !ruby/object:Gem::Requirement
@@ -108,7 +108,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
108
108
  - !ruby/object:Gem::Version
109
109
  version: '0'
110
110
  requirements: []
111
- rubygems_version: 3.1.0.pre1
111
+ rubygems_version: 3.2.17
112
112
  signing_key:
113
113
  specification_version: 4
114
114
  summary: Ruby gem to calculate readability statistics of a text object - paragraphs,