text_parser 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@ module TextParser
2
2
  module Version
3
3
  MAJOR = 0
4
4
  MINOR = 1
5
- PATCH = 3
5
+ PATCH = 4
6
6
  STRING = "#{MAJOR}.#{MINOR}.#{PATCH}"
7
7
  end
8
8
  end
data/lib/text_parser.rb CHANGED
@@ -1,3 +1,4 @@
1
+ require 'iconv'
1
2
  module TextParser
2
3
  # Returns a parsed text with the words and its occurrences.
3
4
  # @param [Hash] [args]
@@ -14,10 +15,10 @@ module TextParser
14
15
  text = process_text
15
16
  options[:dictionary] = text.split(" ") unless options[:dictionary]
16
17
  return [] if options[:dictionary].count < 1
17
- regex = Regexp.new(options[:dictionary].join("|"), Regexp::IGNORECASE)
18
- match_result = text.scan(regex).map{|i| i.downcase}
18
+ regex = Regexp.new("(\\b#{options[:dictionary].join('\\b|\\b')}\\b)", Regexp::IGNORECASE)
19
+ match_result = text.scan(regex).map{|i| i.shift.downcase}
19
20
  match_result.each do |w|
20
- result << {:hits => match_result.count(w), :word => w} unless result.select{|r| r[:word] == w}.shift unless options[:negative_dictionary].map{|i| i.downcase}.include?(w)
21
+ result << {:hits => match_result.count(w), :word => w} unless result.select{|r| r[:word] == w}.shift || options[:negative_dictionary].map{|i| i.downcase}.include?(w)
21
22
  end
22
23
  result = result.sort_by{|i| i[options[:order]]}
23
24
  result.reverse! if options[:order_direction] == :desc
@@ -27,7 +28,8 @@ module TextParser
27
28
  private
28
29
 
29
30
  def process_text
30
- self.gsub(/[^\w\s\-]/, "")
31
+ text = self.gsub(/\s{2,}/," ")
32
+ text = text.gsub(/[^\w\s\-]/, "")
31
33
  end
32
34
  end
33
35
 
@@ -2,7 +2,6 @@ require "test/unit"
2
2
  require "text_parser"
3
3
 
4
4
  class TextParserTest < Test::Unit::TestCase
5
-
6
5
  def test_should_have_method_parse
7
6
  assert "some text".methods.select{|a| a == "parse"}.count > 0
8
7
  end
@@ -79,4 +78,28 @@ class TextParserTest < Test::Unit::TestCase
79
78
  assert_equal "text".parse(args), [{:word => "text", :hits => 1}]
80
79
  end
81
80
 
82
- end
81
+ def test_should_work_with_many_spaces
82
+ text = "e se eu encher de espacos"
83
+ assert_equal [{:word => "de", :hits => 1},
84
+ {:word => "e", :hits => 1},
85
+ {:word => "encher", :hits => 1},
86
+ {:word => "espacos", :hits => 1},
87
+ {:word => "eu", :hits => 1},
88
+ {:word => "se", :hits => 1}], text.parse
89
+ end
90
+ end
91
+
92
+
93
+
94
+
95
+
96
+
97
+
98
+
99
+
100
+
101
+
102
+
103
+
104
+
105
+
data/text_parser.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "text_parser"
3
- s.version = "0.1.3"
3
+ s.version = "0.1.4"
4
4
  s.author = "Frederico de Paula"
5
5
  s.email = "fpaula@gmail.com"
6
6
  s.summary = "A easy way to parse text."
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_parser
3
3
  version: !ruby/object:Gem::Version
4
- hash: 29
4
+ hash: 19
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 1
9
- - 3
10
- version: 0.1.3
9
+ - 4
10
+ version: 0.1.4
11
11
  platform: ruby
12
12
  authors:
13
13
  - Frederico de Paula
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-12-07 00:00:00 -02:00
18
+ date: 2011-12-08 00:00:00 -02:00
19
19
  default_executable:
20
20
  dependencies: []
21
21