text_parser 0.1.6 → 0.1.7
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/text_parser.rb +2 -1
- data/test/text_parser_test.rb +12 -0
- data/text_parser.gemspec +1 -1
- metadata +2 -2
data/lib/text_parser.rb
CHANGED
@@ -16,7 +16,8 @@ module TextParser
|
|
16
16
|
options[:dictionary] = text.split(" ") unless options[:dictionary]
|
17
17
|
return [] if options[:dictionary].count < 1
|
18
18
|
regex = Regexp.new(options[:dictionary].join('\\b|\\b'), Regexp::IGNORECASE)
|
19
|
-
match_result = text.scan(regex).map{|i| i.downcase}
|
19
|
+
match_result = text.scan(regex).map{|i| i.downcase}
|
20
|
+
match_result.select!{|i| i.size >= options[:minimum_length]} if options[:minimum_length]
|
20
21
|
match_result.each do |w|
|
21
22
|
result << {:hits => match_result.count(w), :word => w} unless result.select{|r| r[:word] == w}.shift || options[:negative_dictionary].map{|i| i.downcase}.include?(w)
|
22
23
|
end
|
data/test/text_parser_test.rb
CHANGED
@@ -100,6 +100,18 @@ class TextParserTest < Test::Unit::TestCase
|
|
100
100
|
{:word => "pão", :hits => 1}], "Pão açúcar".parse
|
101
101
|
assert_equal [{:word => "ãéç", :hits => 1}], "ãéç".parse
|
102
102
|
end
|
103
|
+
|
104
|
+
def test_minimum_length
|
105
|
+
text = "a ab abc "
|
106
|
+
assert_equal [{:word => "a", :hits => 1},
|
107
|
+
{:word => "ab", :hits => 1},
|
108
|
+
{:word => "abc", :hits => 1}], text.parse(:minimum_length => 1)
|
109
|
+
assert_equal [{:word => "ab", :hits => 1},
|
110
|
+
{:word => "abc", :hits => 1}], text.parse(:minimum_length => 2)
|
111
|
+
assert_equal [{:word => "abc", :hits => 1}], text.parse(:minimum_length => 3)
|
112
|
+
assert_equal [{:word => "abc", :hits => 1}], text.parse(:minimum_length => 2, :negative_dictionary => ["ab"])
|
113
|
+
assert_equal [], text.parse(:minimum_length => 3, :dictionary => ["a"])
|
114
|
+
end
|
103
115
|
end
|
104
116
|
|
105
117
|
|
data/text_parser.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: text_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-12-
|
12
|
+
date: 2011-12-19 00:00:00.000000000Z
|
13
13
|
dependencies: []
|
14
14
|
description: Using method parse in the String object you can parse any text.
|
15
15
|
email: fpaula@gmail.com
|