text_parser 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/text_parser.rb CHANGED
@@ -1,3 +1,4 @@
1
+ # -*- encoding : utf-8 -*-
1
2
  module TextParser
2
3
  # Returns a parsed text with the words and its occurrences.
3
4
  # @param [Hash] [args]
@@ -11,11 +12,11 @@ module TextParser
11
12
  :negative_dictionary => []
12
13
  }.merge(args)
13
14
  result = []
14
- text = process_text
15
+ text = self.gsub(/[^A-Za-zÀ-ú0-9\-]/u," ").strip
15
16
  options[:dictionary] = text.split(" ") unless options[:dictionary]
16
17
  return [] if options[:dictionary].count < 1
17
- regex = Regexp.new("(\\b#{options[:dictionary].join('\\b|\\b')}\\b)", Regexp::IGNORECASE)
18
- match_result = text.scan(regex).map{|i| i.shift.downcase}
18
+ regex = Regexp.new(options[:dictionary].join('\\b|\\b'), Regexp::IGNORECASE)
19
+ match_result = text.scan(regex).map{|i| i.downcase}
19
20
  match_result.each do |w|
20
21
  result << {:hits => match_result.count(w), :word => w} unless result.select{|r| r[:word] == w}.shift || options[:negative_dictionary].map{|i| i.downcase}.include?(w)
21
22
  end
@@ -23,16 +24,9 @@ module TextParser
23
24
  result.reverse! if options[:order_direction] == :desc
24
25
  result
25
26
  end
26
-
27
- private
28
-
29
- def process_text
30
- text = self.gsub(/\s{2,}/," ")
31
- text = text.gsub(/[^\w\s\-]/u, "")
32
- end
33
27
  end
34
28
 
35
29
  # Includes module TextParser in the String object
36
30
  class String
37
31
  include TextParser
38
- end
32
+ end
@@ -1,10 +1,11 @@
1
+ # -*- encoding : utf-8 -*-
1
2
  require "test/unit"
2
3
  require "text_parser"
3
4
 
4
5
  class TextParserTest < Test::Unit::TestCase
5
6
 
6
7
  def test_should_have_method_parse
7
- assert "some text".methods.select{|a| a == "parse"}.count > 0
8
+ assert "some text".methods.select{|a| a.to_s == "parse"}.count > 0
8
9
  end
9
10
 
10
11
  def test_should_parse
@@ -13,11 +14,11 @@ class TextParserTest < Test::Unit::TestCase
13
14
  {:word => "dolor", :hits => 1}],
14
15
  text.parse(:dictionary => ["dolor", "consectetur"])
15
16
  end
16
-
17
+
17
18
  def test_should_parse_without_dictionary
18
19
  assert_equal [{:word => "test", :hits => 2}], "test test".parse
19
20
  end
20
-
21
+
21
22
  def test_should_remove_some_characters
22
23
  text = "Test? Test. Yes, test!"
23
24
  assert_equal [{:word => "test", :hits => 3}, {:word => "yes", :hits => 1}], text.parse
@@ -65,7 +66,7 @@ class TextParserTest < Test::Unit::TestCase
65
66
  end
66
67
 
67
68
  def test_should_works_with_special_characters
68
- assert_equal [], "'/[.](\")".parse
69
+ assert_equal [], "*&%?!$#%$@\\'///[.](\")".parse
69
70
  end
70
71
 
71
72
  def test_should_works_hifen
@@ -90,20 +91,18 @@ class TextParserTest < Test::Unit::TestCase
90
91
  {:word => "espacos",:hits => 1},
91
92
  {:word => "eu", :hits => 1},
92
93
  {:word => "se", :hits => 1}], text.parse
93
- end
94
-
94
+ end
95
+
95
96
  def test_should_keep_some_special_character
96
97
  assert_equal [{:word => "espaço", :hits => 1},
97
98
  {:word => "sideral",:hits => 1}], "Espaço sideral".parse
98
99
  assert_equal [{:word => "açúcar", :hits => 1},
99
- {:word => "bom", :hits => 1},
100
- {:word => "de", :hits => 1},
101
- {:word => "pão", :hits => 1}], "Pão de açúcar é bom.".parse
100
+ {:word => "pão", :hits => 1}], "Pão açúcar".parse
101
+ assert_equal [{:word => "ãéç", :hits => 1}], "ãéç".parse
102
102
  end
103
103
  end
104
104
 
105
-
106
-
105
+
107
106
 
108
107
 
109
108
 
data/text_parser.gemspec CHANGED
@@ -1,12 +1,10 @@
1
- require "lib/text_parser/version"
2
-
3
1
  Gem::Specification.new do |s|
4
2
  s.name = "text_parser"
5
- s.version = TextParser::Version.const_get("STRING")
3
+ s.version = "0.1.6"
6
4
  s.author = "Frederico de Paula"
7
5
  s.email = "fpaula@gmail.com"
8
6
  s.summary = "A easy way to parse a text."
9
- s.description = "Using method parse in the String object you can parse any text"
7
+ s.description = "Using method parse in the String object you can parse any text."
10
8
  s.files = Dir["{lib/**/*.rb,README.rdoc,test/**/*.rb,Rakefile,*.gemspec,doc/**/*}"]
11
9
  s.homepage = "http://textparser.heroku.com/"
12
10
  end
metadata CHANGED
@@ -1,38 +1,25 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: text_parser
3
- version: !ruby/object:Gem::Version
4
- hash: 17
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.6
5
5
  prerelease:
6
- segments:
7
- - 0
8
- - 1
9
- - 5
10
- version: 0.1.5
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Frederico de Paula
14
9
  autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
-
18
- date: 2011-12-09 00:00:00 -02:00
19
- default_executable:
12
+ date: 2011-12-15 00:00:00.000000000Z
20
13
  dependencies: []
21
-
22
- description: Using method parse in the String object you can parse any text
14
+ description: Using method parse in the String object you can parse any text.
23
15
  email: fpaula@gmail.com
24
16
  executables: []
25
-
26
17
  extensions: []
27
-
28
18
  extra_rdoc_files: []
29
-
30
- files:
31
- - lib/text_parser/version.rb
19
+ files:
32
20
  - lib/text_parser.rb
33
21
  - README.rdoc
34
22
  - test/text_parser_test.rb
35
- - test/version_test.rb
36
23
  - Rakefile
37
24
  - text_parser.gemspec
38
25
  - doc/_index.html
@@ -52,39 +39,28 @@ files:
52
39
  - doc/TextParser/Version.html
53
40
  - doc/TextParser.html
54
41
  - doc/top-level-namespace.html
55
- has_rdoc: true
56
42
  homepage: http://textparser.heroku.com/
57
43
  licenses: []
58
-
59
44
  post_install_message:
60
45
  rdoc_options: []
61
-
62
- require_paths:
46
+ require_paths:
63
47
  - lib
64
- required_ruby_version: !ruby/object:Gem::Requirement
48
+ required_ruby_version: !ruby/object:Gem::Requirement
65
49
  none: false
66
- requirements:
67
- - - ">="
68
- - !ruby/object:Gem::Version
69
- hash: 3
70
- segments:
71
- - 0
72
- version: "0"
73
- required_rubygems_version: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ required_rubygems_version: !ruby/object:Gem::Requirement
74
55
  none: false
75
- requirements:
76
- - - ">="
77
- - !ruby/object:Gem::Version
78
- hash: 3
79
- segments:
80
- - 0
81
- version: "0"
56
+ requirements:
57
+ - - ! '>='
58
+ - !ruby/object:Gem::Version
59
+ version: '0'
82
60
  requirements: []
83
-
84
61
  rubyforge_project:
85
- rubygems_version: 1.4.2
62
+ rubygems_version: 1.8.10
86
63
  signing_key:
87
64
  specification_version: 3
88
65
  summary: A easy way to parse a text.
89
66
  test_files: []
90
-
@@ -1,8 +0,0 @@
1
- module TextParser
2
- module Version
3
- MAJOR = 0
4
- MINOR = 1
5
- PATCH = 5
6
- STRING = "#{MAJOR}.#{MINOR}.#{PATCH}"
7
- end
8
- end
data/test/version_test.rb DELETED
@@ -1,23 +0,0 @@
1
- require "test/unit"
2
- require "text_parser/version"
3
-
4
- class TextParserTest < Test::Unit::TestCase
5
- def test_version
6
- assert_equal TextParser::Version.const_get("STRING"), "0.1.5"
7
- end
8
- end
9
-
10
-
11
-
12
-
13
-
14
-
15
-
16
-
17
-
18
-
19
-
20
-
21
-
22
-
23
-