text_parser 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,7 +2,7 @@ module TextParser
2
2
  module Version
3
3
  MAJOR = 0
4
4
  MINOR = 1
5
- PATCH = 3
5
+ PATCH = 4
6
6
  STRING = "#{MAJOR}.#{MINOR}.#{PATCH}"
7
7
  end
8
8
  end
data/lib/text_parser.rb CHANGED
@@ -1,3 +1,4 @@
1
+ require 'iconv'
1
2
  module TextParser
2
3
  # Returns a parsed text with the words and its occurrences.
3
4
  # @param [Hash] [args]
@@ -14,10 +15,10 @@ module TextParser
14
15
  text = process_text
15
16
  options[:dictionary] = text.split(" ") unless options[:dictionary]
16
17
  return [] if options[:dictionary].count < 1
17
- regex = Regexp.new(options[:dictionary].join("|"), Regexp::IGNORECASE)
18
- match_result = text.scan(regex).map{|i| i.downcase}
18
+ regex = Regexp.new("(\\b#{options[:dictionary].join('\\b|\\b')}\\b)", Regexp::IGNORECASE)
19
+ match_result = text.scan(regex).map{|i| i.shift.downcase}
19
20
  match_result.each do |w|
20
- result << {:hits => match_result.count(w), :word => w} unless result.select{|r| r[:word] == w}.shift unless options[:negative_dictionary].map{|i| i.downcase}.include?(w)
21
+ result << {:hits => match_result.count(w), :word => w} unless result.select{|r| r[:word] == w}.shift || options[:negative_dictionary].map{|i| i.downcase}.include?(w)
21
22
  end
22
23
  result = result.sort_by{|i| i[options[:order]]}
23
24
  result.reverse! if options[:order_direction] == :desc
@@ -27,7 +28,8 @@ module TextParser
27
28
  private
28
29
 
29
30
  def process_text
30
- self.gsub(/[^\w\s\-]/, "")
31
+ text = self.gsub(/\s{2,}/," ")
32
+ text = text.gsub(/[^\w\s\-]/, "")
31
33
  end
32
34
  end
33
35
 
@@ -2,7 +2,6 @@ require "test/unit"
2
2
  require "text_parser"
3
3
 
4
4
  class TextParserTest < Test::Unit::TestCase
5
-
6
5
  def test_should_have_method_parse
7
6
  assert "some text".methods.select{|a| a == "parse"}.count > 0
8
7
  end
@@ -79,4 +78,28 @@ class TextParserTest < Test::Unit::TestCase
79
78
  assert_equal "text".parse(args), [{:word => "text", :hits => 1}]
80
79
  end
81
80
 
82
- end
81
+ def test_should_work_with_many_spaces
82
+ text = "e se eu encher de espacos"
83
+ assert_equal [{:word => "de", :hits => 1},
84
+ {:word => "e", :hits => 1},
85
+ {:word => "encher", :hits => 1},
86
+ {:word => "espacos", :hits => 1},
87
+ {:word => "eu", :hits => 1},
88
+ {:word => "se", :hits => 1}], text.parse
89
+ end
90
+ end
91
+
92
+
93
+
94
+
95
+
96
+
97
+
98
+
99
+
100
+
101
+
102
+
103
+
104
+
105
+
data/text_parser.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "text_parser"
3
- s.version = "0.1.3"
3
+ s.version = "0.1.4"
4
4
  s.author = "Frederico de Paula"
5
5
  s.email = "fpaula@gmail.com"
6
6
  s.summary = "A easy way to parse text."
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_parser
3
3
  version: !ruby/object:Gem::Version
4
- hash: 29
4
+ hash: 19
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 1
9
- - 3
10
- version: 0.1.3
9
+ - 4
10
+ version: 0.1.4
11
11
  platform: ruby
12
12
  authors:
13
13
  - Frederico de Paula
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-12-07 00:00:00 -02:00
18
+ date: 2011-12-08 00:00:00 -02:00
19
19
  default_executable:
20
20
  dependencies: []
21
21