lingua-it-readability 1.1.7 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/lib/lingua/it/readability.rb +14 -1
- data/lib/lingua/it/readability/version.rb +1 -1
- data/lib/lingua/it/sentence.rb +2 -2
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a882ae84c810b0133e6ef76d2bac16205b411954
|
|
4
|
+
data.tar.gz: c9f681c724fbb40f91b068dd30c1fee500f37c6d
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 9df4807ecb56e41cf2c700fc603c4b0263d19ec29d13e6261c948f29cea85262732bdf43572d4d2703879958a375383cc95e7b0d7c627f7c5c361bbab657fe91
|
|
7
|
+
data.tar.gz: 48518468c89a5e3d29a67fb5d28fbe8845f6b5633f982b86b27ed437a10bc62acf8d0f0931ca2324d9dac049de0db54e54d2cad6f91b13858c2e4930124cb341
|
data/CHANGELOG.md
CHANGED
|
@@ -60,6 +60,18 @@ module Lingua
|
|
|
60
60
|
count_words
|
|
61
61
|
end
|
|
62
62
|
|
|
63
|
+
# Analyze file content with optional delimiters
|
|
64
|
+
def analyze_file(file_name, *delimiters)
|
|
65
|
+
# check that file exists
|
|
66
|
+
if !File.exists?(file_name)
|
|
67
|
+
raise "An error has occured"
|
|
68
|
+
return
|
|
69
|
+
end
|
|
70
|
+
# slurp file into string and pass it to analyze method
|
|
71
|
+
text = File.open(file_name) { |f| f.read }.strip
|
|
72
|
+
analyze(text, delimiters)
|
|
73
|
+
end
|
|
74
|
+
|
|
63
75
|
# Reset Lingua::IT::Sentence symbols delimiter cache
|
|
64
76
|
def reset_delimiter!
|
|
65
77
|
@sentence.reset_delimiter!
|
|
@@ -149,6 +161,7 @@ module Lingua
|
|
|
149
161
|
sprintf "Sentence delimiters %s \n" <<
|
|
150
162
|
"Number of paragraphs %d \n" <<
|
|
151
163
|
"Number of sentences %d \n" <<
|
|
164
|
+
"Number of syllables %d \n" <<
|
|
152
165
|
"Number of words %d \n" <<
|
|
153
166
|
"Number of characters %d \n\n" <<
|
|
154
167
|
"Average words per sentence %.2f \n" <<
|
|
@@ -156,7 +169,7 @@ module Lingua
|
|
|
156
169
|
"Gulpease score %d \n" <<
|
|
157
170
|
"Flesch score %2.2f \n",
|
|
158
171
|
sentence.delim_regex.gsub(/\\/,''), num_paragraphs, num_sentences,
|
|
159
|
-
num_words, num_characters, words_per_sentence,
|
|
172
|
+
num_syllables, num_words, num_characters, words_per_sentence,
|
|
160
173
|
syllables_per_word, gulpease, flesch
|
|
161
174
|
end
|
|
162
175
|
|
data/lib/lingua/it/sentence.rb
CHANGED
|
@@ -29,7 +29,7 @@ module Lingua
|
|
|
29
29
|
def self.sentences(text)
|
|
30
30
|
txt = text.dup
|
|
31
31
|
txt.gsub!(/\b(#{@abbr_regex})(\.)\B/i, '\10002')
|
|
32
|
-
txt.gsub!(/["']?[A-Z][
|
|
32
|
+
txt.gsub!(/["']?[A-Z][^\Q#{@delim_regex}\E]+((?![\Q#{@delim_regex}\E]['"]?\s["']?[A-Z][^\Q#{@delim_regex}\E]).)+[\Q#{@delim_regex}\E'"]+/, '\2\001')
|
|
33
33
|
txt.gsub!(/\b(#{@abbr_regex})(0002)/i, '\1.')
|
|
34
34
|
txt.split(/01/).map { |sentence| sentence.strip }
|
|
35
35
|
end
|
|
@@ -72,7 +72,7 @@ module Lingua
|
|
|
72
72
|
# Utility method, join all elements of the delimiters arrays
|
|
73
73
|
# without a separator, making suitable for a regex.
|
|
74
74
|
def self.set_delim_regex!
|
|
75
|
-
@delim_regex = "#{@delimiters.join('
|
|
75
|
+
@delim_regex = "#{@delimiters.join('')}"
|
|
76
76
|
end
|
|
77
77
|
|
|
78
78
|
initialize_abbreviations!
|