textstat 0.1.4 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8d18721b77b630a224c0515f9618f9c1cf27192a9723c8b5eaad8f3e84ac4e7b
4
- data.tar.gz: 237f4f45b6eb32944bcc91542ec8eb2d16c81165855d6db51c287ccdb93b37fd
3
+ metadata.gz: 61ef6dcf0e938af4c3c30ad4b45c3241f10ffc01e4d63e566a74d993b57309ac
4
+ data.tar.gz: 20f6412df8d5a8658d4113ddb48e808697b7e1489ba5acb4fae9d97346393acd
5
5
  SHA512:
6
- metadata.gz: b8ec1aeb8e50394712caf603f947fff09924cc2beb133c26e3b38530a0b338e14dcf3587136746e8d060502ee8ba01f3ba921222ccfabe99c14dadc5b312e773
7
- data.tar.gz: 96e23a027c2fded6b414551ed4d9dabc4411f53bd6e9624770d38e49b30aaaac3ffe1f8e28189781b441cd19cb238455e48e5bb351f2aa34906ae5ee7abbcd4d
6
+ metadata.gz: ef7dee598e3db4c26e2e305f464fbebf6b3757429a72ae66b887ab7f068a34a0d2b6c2e5c52771b2c342e8c06d2ec9a69fa1d60065eaa37b3ccf76eb232f647d
7
+ data.tar.gz: 5ca1f7c6dcb11a81457b87339ab153ac95125542378875fd451c9ba58659925fc018d38af4bbc61149cb5f8f53765f67363029d8eb698ad80effa969d7ecbbeb
data/lib/counter.rb CHANGED
File without changes
File without changes
File without changes
File without changes
@@ -1,3 +1,3 @@
1
1
  class TextStat
2
- VERSION = "0.1.4"
2
+ VERSION = "0.1.8"
3
3
  end
data/lib/textstat.rb CHANGED
@@ -23,7 +23,7 @@ class TextStat
23
23
  count = 0
24
24
  text.split(' ').each do |word|
25
25
  word_hyphenated = dictionary.visualise(word)
26
- count += [1, word_hyphenated.count('-') + 1].max
26
+ count += word_hyphenated.count('-') + 1
27
27
  end
28
28
  count
29
29
  end
@@ -33,109 +33,105 @@ class TextStat
33
33
  end
34
34
 
35
35
  def self.avg_sentence_length(text)
36
- asl = lexicon_count(text).to_f / sentence_count(text).to_f
36
+ asl = lexicon_count(text).to_f / sentence_count(text)
37
37
  asl.round(1)
38
38
  rescue ZeroDivisionError
39
39
  0.0
40
40
  end
41
41
 
42
- def self.avg_syllables_per_word(text)
43
- syllable = syllable_count(text)
42
+ def self.avg_syllables_per_word(text, language = 'en_us')
43
+ syllable = syllable_count(text, language)
44
44
  words = lexicon_count(text)
45
45
  begin
46
- syllables_per_word = syllable.to_f / words.to_f
47
- return syllables_per_word.round(1)
46
+ syllables_per_word = syllable.to_f / words
47
+ syllables_per_word.round(1)
48
48
  rescue ZeroDivisionError
49
- return 0.0
49
+ 0.0
50
50
  end
51
51
  end
52
52
 
53
53
  def self.avg_letter_per_word(text)
54
- letters_per_word = char_count(text).to_f / lexicon_count(text).to_f
54
+ letters_per_word = char_count(text).to_f / lexicon_count(text)
55
55
  letters_per_word.round(2)
56
56
  rescue ZeroDivisionError
57
57
  0.0
58
58
  end
59
59
 
60
60
  def self.avg_sentence_per_word(text)
61
- sentence_per_word = sentence_count(text).to_f / lexicon_count(text).to_f
61
+ sentence_per_word = sentence_count(text).to_f / lexicon_count(text)
62
62
  sentence_per_word.round(2)
63
63
  rescue ZeroDivisionError
64
64
  0.0
65
65
  end
66
66
 
67
- def self.flesch_reading_ease(text)
67
+ def self.flesch_reading_ease(text, language = 'en_us')
68
68
  sentence_length = avg_sentence_length(text)
69
- syllables_per_word = avg_syllables_per_word(text)
70
- flesch = (
71
- 206.835 - (1.015 * sentence_length).to_f - (84.6 * syllables_per_word).to_f
72
- )
69
+ syllables_per_word = avg_syllables_per_word(text, language)
70
+ flesch = 206.835 - 1.015 * sentence_length - 84.6 * syllables_per_word
73
71
  flesch.round(2)
74
72
  end
75
73
 
76
- def self.flesch_kincaid_grade(text)
74
+ def self.flesch_kincaid_grade(text, language = 'en_us')
77
75
  sentence_length = avg_sentence_length(text)
78
- syllables_per_word = avg_syllables_per_word(text)
79
- flesch = (0.39 * sentence_length.to_f) + (11.8 * syllables_per_word.to_f) - 15.59
76
+ syllables_per_word = avg_syllables_per_word(text, language)
77
+ flesch = 0.39 * sentence_length + 11.8 * syllables_per_word - 15.59
80
78
  flesch.round(1)
81
79
  end
82
80
 
83
- def self.polysyllab_count(text)
81
+ def self.polysyllab_count(text, language = 'en_us')
84
82
  count = 0
85
83
  text.split(' ').each do |word|
86
- w = syllable_count(word)
84
+ w = syllable_count(word, language)
87
85
  count += 1 if w >= 3
88
86
  end
89
87
  count
90
88
  end
91
89
 
92
- def self.smog_index(text)
90
+ def self.smog_index(text, language = 'en_us')
93
91
  sentences = sentence_count(text)
94
92
 
95
93
  if sentences >= 3
96
94
  begin
97
- polysyllab = polysyllab_count(text)
98
- smog = (
99
- (1.043 * (30 * (polysyllab / sentences))**0.5) + 3.1291)
100
- return smog.round(1)
95
+ polysyllab = polysyllab_count(text, language)
96
+ smog = 1.043 * Math.sqrt(30.0 * polysyllab / sentences) + 3.1291
97
+ smog.round(1)
101
98
  rescue ZeroDivisionError
102
- return 0.0
99
+ 0.0
103
100
  end
104
101
  else
105
- return 0.0
102
+ 0.0
106
103
  end
107
104
  end
108
105
 
109
106
  def self.coleman_liau_index(text)
110
- letters = (avg_letter_per_word(text) * 100).round(2)
107
+ letters = (avg_letter_per_word(text) * 100).round(2)
111
108
  sentences = (avg_sentence_per_word(text) * 100).round(2)
112
- coleman = ((0.058 * letters) - (0.296 * sentences) - 15.8).to_f
109
+ coleman = 0.0588 * letters - 0.296 * sentences - 15.8
113
110
  coleman.round(2)
114
111
  end
115
112
 
116
113
  def self.automated_readability_index(text)
117
- chars = char_count(text)
118
- words = lexicon_count(text)
114
+ chars = char_count(text)
115
+ words = lexicon_count(text)
119
116
  sentences = sentence_count(text)
120
117
  begin
121
- a = chars.to_f / words.to_f
122
- b = words.to_f / sentences.to_f
118
+ a = chars.to_f / words
119
+ b = words.to_f / sentences
123
120
 
124
- readability = (
125
- (4.71 * a.round(2) + (0.5 * b.round(2))) - 21.43)
126
- return readability.round(1)
121
+ readability = 4.71 * a + 0.5 * b - 21.43
122
+ readability.round(1)
127
123
  rescue ZeroDivisionError
128
- return 0.0
124
+ 0.0
129
125
  end
130
126
  end
131
127
 
132
- def self.linsear_write_formula(text)
128
+ def self.linsear_write_formula(text, language = 'en_us')
133
129
  easy_word = 0
134
130
  difficult_word = 0
135
131
  text_list = text.split(' ')[0..100]
136
132
 
137
133
  text_list.each do |word|
138
- if syllable_count(word) < 3
134
+ if syllable_count(word, language) < 3
139
135
  easy_word += 1
140
136
  else
141
137
  difficult_word += 1
@@ -144,11 +140,9 @@ class TextStat
144
140
 
145
141
  text = text_list.join(' ')
146
142
 
147
- number = ((easy_word * 1 + difficult_word * 3) / sentence_count(text)).to_f
148
- if number <= 20
149
- number -= 2
150
- end
151
- return number / 2
143
+ number = (easy_word * 1 + difficult_word * 3).to_f / sentence_count(text)
144
+ number -= 2 if number <= 20
145
+ number / 2
152
146
  end
153
147
 
154
148
  def self.difficult_words(text, language = 'en_us')
@@ -161,58 +155,70 @@ class TextStat
161
155
  text_list = text.downcase.gsub(/[^0-9a-z ]/i, '').split(' ')
162
156
  diff_words_set = Set.new
163
157
  text_list.each do |value|
164
- unless easy_words.include? value
165
- if syllable_count(value) > 1
166
- diff_words_set.add(value)
167
- end
168
- end
158
+ next if easy_words.include? value
159
+
160
+ diff_words_set.add(value) if syllable_count(value, language) > 1
169
161
  end
170
- return diff_words_set.length
162
+ diff_words_set.length
171
163
  end
172
164
 
173
- def self.dale_chall_readability_score(text)
165
+ def self.dale_chall_readability_score(text, language = 'en_us')
174
166
  word_count = lexicon_count(text)
175
- count = word_count - difficult_words(text)
167
+ count = word_count - difficult_words(text, language)
176
168
 
177
169
  begin
178
- per = count.to_f / word_count.to_f * 100
170
+ per = 100.0 * count / word_count
179
171
  rescue ZeroDivisionError
180
172
  return 0.0
181
173
  end
182
174
 
183
175
  difficult_words = 100 - per
184
- score = (
185
- (0.1579 * difficult_words)
186
- + (0.0496 * avg_sentence_length(text)))
176
+ score = 0.1579 * difficult_words + 0.0496 * avg_sentence_length(text)
177
+ score += 3.6365 if difficult_words > 5
187
178
 
188
- if difficult_words > 5
189
- score += 3.6365
190
- end
191
- return score.round(2)
179
+ score.round(2)
192
180
  end
193
181
 
194
- def self.gunning_fog(text)
195
- begin
196
- per_diff_words = (
197
- (difficult_words(text) / lexicon_count(text) * 100) + 5)
182
+ def self.gunning_fog(text, language = 'en_us')
183
+ per_diff_words = 100.0 * difficult_words(text, language) / lexicon_count(text) + 5
184
+ grade = 0.4 * (avg_sentence_length(text) + per_diff_words)
198
185
 
199
- grade = 0.4 * (avg_sentence_length(text) + per_diff_words)
200
- return grade.round(2)
201
- rescue ZeroDivisionError
202
- return 0.0
203
- end
186
+ grade.round(2)
187
+ rescue ZeroDivisionError
188
+ 0.0
204
189
  end
205
190
 
206
191
  def self.lix(text)
207
192
  words = text.split(' ')
208
193
  words_length = words.length
209
- long_words = words.select { |word| word.length > 6 }.count
194
+ long_words = words.count { |word| word.length > 6 }
210
195
 
211
- per_long_words = (long_words * 100).to_f / words_length
196
+ per_long_words = 100.0 * long_words / words_length
212
197
  asl = avg_sentence_length(text)
213
198
  lix = asl + per_long_words
214
199
 
215
- return lix.round(2)
200
+ lix.round(2)
201
+ end
202
+
203
+ def self.forcast(text, language = 'en_us')
204
+ words = text.split(' ')[0..149]
205
+ words_with_one_syllabe = words.count {
206
+ |word| syllable_count(word, language) == 1
207
+ }
208
+ forcast = 20 - (words_with_one_syllabe / 10)
209
+ forcast
210
+ end
211
+
212
+ def self.powers_sumner_kearl(text, language = 'en_us')
213
+ grade = 0.0778 * avg_sentence_length(text) + 0.0455 * syllable_count(text, language) - 2.2029
214
+ grade.round(2)
215
+ end
216
+
217
+ def self.spache(text, language = 'en_us')
218
+ words = text.split(' ').count
219
+ unfamiliar_words = difficult_words(text, language) / words
220
+ grade = (0.141 * avg_sentence_length(text)) + (0.086 * unfamiliar_words) + 0.839
221
+ grade.round(2)
216
222
  end
217
223
 
218
224
  def self.text_standard(text, float_output=nil)
@@ -287,9 +293,9 @@ class TextStat
287
293
  score = final_grade[0][0]
288
294
 
289
295
  if float_output
290
- return score.to_f
296
+ score.to_f
291
297
  else
292
- return "#{score.to_i - 1}th and #{score.to_i}th grade"
298
+ "#{score.to_i - 1}th and #{score.to_i}th grade"
293
299
  end
294
300
  end
295
301
 
@@ -116,22 +116,22 @@ describe TextStat do
116
116
 
117
117
  it 'should return the correct smog index' do
118
118
  index = TextStat.smog_index(@long_test)
119
- expect(index).to eql 11.2
119
+ expect(index).to eql 12.5
120
120
  end
121
121
 
122
122
  it 'should return the correct Coleman–Liau index' do
123
123
  index = TextStat.coleman_liau_index(@long_test)
124
- expect(index).to eql 10.28
124
+ expect(index).to eql 10.65
125
125
  end
126
126
 
127
127
  it 'should return the correct automated readability index' do
128
128
  index = TextStat.automated_readability_index(@long_test)
129
- expect(index).to eql 12.3
129
+ expect(index).to eql 12.4
130
130
  end
131
131
 
132
132
  it 'should return the correct linsear write formula result' do
133
133
  result = TextStat.linsear_write_formula(@long_test)
134
- expect(result).to eql 14.5
134
+ expect(result).to eql 14.875
135
135
  end
136
136
 
137
137
  it 'should return the correct difficult words result' do
@@ -141,12 +141,12 @@ describe TextStat do
141
141
 
142
142
  it 'should return the correct Dale–Chall readability score' do
143
143
  score = TextStat.dale_chall_readability_score(@long_test)
144
- expect(score).to eql 4.79
144
+ expect(score).to eql 7.25
145
145
  end
146
146
 
147
147
  it 'should return the correct Gunning fog score' do
148
148
  score = TextStat.gunning_fog(@long_test)
149
- expect(score).to eql 11.32
149
+ expect(score).to eql 17.56
150
150
  end
151
151
 
152
152
  it 'should return the correct Lix readability test score' do
@@ -154,6 +154,21 @@ describe TextStat do
154
154
  expect(score).to eql 45.11
155
155
  end
156
156
 
157
+ it 'should return the correct FORCAST readability test score' do
158
+ score = TextStat.forcast(@long_test)
159
+ expect(score).to eql 10
160
+ end
161
+
162
+ it 'should return the correct Powers Sumner Kearl readability test score' do
163
+ score = TextStat.powers_sumner_kearl(@long_test)
164
+ expect(score).to eql 25.04
165
+ end
166
+
167
+ it 'should return the correct SPACHE readability test score' do
168
+ score = TextStat.spache(@long_test)
169
+ expect(score).to eql 4.12
170
+ end
171
+
157
172
  it 'should return the readability consensus score' do
158
173
  standard = TextStat.text_standard(@long_test)
159
174
  expect(standard).to eql '10th and 11th grade'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textstat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jakub Polak
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-02-11 00:00:00.000000000 Z
11
+ date: 2022-05-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: text-hyphen
@@ -50,14 +50,14 @@ dependencies:
50
50
  requirements:
51
51
  - - "~>"
52
52
  - !ruby/object:Gem::Version
53
- version: '10.0'
53
+ version: '13.0'
54
54
  type: :development
55
55
  prerelease: false
56
56
  version_requirements: !ruby/object:Gem::Requirement
57
57
  requirements:
58
58
  - - "~>"
59
59
  - !ruby/object:Gem::Version
60
- version: '10.0'
60
+ version: '13.0'
61
61
  - !ruby/object:Gem::Dependency
62
62
  name: rspec
63
63
  requirement: !ruby/object:Gem::Requirement
@@ -108,7 +108,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
108
108
  - !ruby/object:Gem::Version
109
109
  version: '0'
110
110
  requirements: []
111
- rubygems_version: 3.1.0.pre1
111
+ rubygems_version: 3.2.17
112
112
  signing_key:
113
113
  specification_version: 4
114
114
  summary: Ruby gem to calculate readability statistics of a text object - paragraphs,